20 files changed, 471 insertions, 338 deletions
diff --git a/configure.ac b/configure.ac
index 57a4268a4..16edb7e6e 100644
--- a/configure.ac
+++ b/configure.ac
@@ -22,7 +22,7 @@ AC_CONFIG_AUX_DIR([build-aux])
 AC_CONFIG_MACRO_DIR([m4])
 
 AC_CONFIG_HEADERS([config.h])
-AC_CONFIG_HEADERS([src/snappy/google-snappy/config.h])
+AC_CONFIG_HEADERS([src/snappy/c_src/snappy/config.h])
 
 AM_INIT_AUTOMAKE([1.6.3 foreign tar-ustar])
 
@@ -39,8 +39,8 @@ PKG_PROG_PKG_CONFIG
 
 dnl Config for google snappy
 m4_define([snappy_major], [1])
-m4_define([snappy_minor], [0])
-m4_define([snappy_patchlevel], [5])
+m4_define([snappy_minor], [1])
+m4_define([snappy_patchlevel], [1])
 
 AC_PROG_CXX
 AC_LANG([C++])
@@ -745,7 +745,7 @@ AC_CONFIG_FILES([src/etap/Makefile])
 AC_CONFIG_FILES([src/ibrowse/Makefile])
 AC_CONFIG_FILES([src/mochiweb/Makefile])
 AC_CONFIG_FILES([src/snappy/Makefile])
-AC_CONFIG_FILES([src/snappy/google-snappy/snappy-stubs-public.h])
+AC_CONFIG_FILES([src/snappy/c_src/snappy/snappy-stubs-public.h])
 AC_CONFIG_FILES([src/ejson/Makefile])
 AC_CONFIG_FILES([test/Makefile])
 AC_CONFIG_FILES([test/bench/Makefile])
diff --git a/src/snappy/Makefile.am b/src/snappy/Makefile.am
index cd0e1924a..980825b0b 100644
--- a/src/snappy/Makefile.am
+++ b/src/snappy/Makefile.am
@@ -10,22 +10,22 @@
 ## License for the specific language governing permissions and limitations under
 ## the License.
 
-snappyebindir = $(localerlanglibdir)/snappy-1.1.0/ebin
-snappyprivdir = $(localerlanglibdir)/snappy-1.1.0/priv
+snappyebindir = $(localerlanglibdir)/snappy-1.1.1/ebin
+snappyprivdir = $(localerlanglibdir)/snappy-1.1.1/priv
 
 snappy_cxx_srcs = \
 	snappy_nif.cc \
-	google-snappy/snappy.cc \
-	google-snappy/snappy-sinksource.cc \
-	google-snappy/snappy-stubs-internal.cc
+	c_src/snappy/snappy.cc \
+	c_src/snappy/snappy-sinksource.cc \
+	c_src/snappy/snappy-stubs-internal.cc
 
 snappy_cxx_hdrs = \
-	erl_nif_compat.h \
-	google-snappy/snappy.h \
-	google-snappy/snappy-internal.h \
-	google-snappy/snappy-sinksource.h \
-	google-snappy/snappy-stubs-internal.h \
-	google-snappy/snappy-stubs-public.h.in
+	c_src/erl_nif_compat.h \
+	c_src/snappy/snappy.h \
+	c_src/snappy/snappy-internal.h \
+	c_src/snappy/snappy-sinksource.h \
+	c_src/snappy/snappy-stubs-internal.h \
+	c_src/snappy/snappy-stubs-public.h.in
 
 snappy_file_collection = \
     snappy.app.in \
@@ -36,14 +36,14 @@ snappyebin_make_generated_file_list = \
     snappy.beam
 
 EXTRA_DIST = \
-    google-snappy/AUTHORS \
-    google-snappy/COPYING \
-	$(snappy_cxx_hdrs) \
+    c_src/snappy/AUTHORS \
+    c_src/snappy/COPYING \
+    $(snappy_cxx_hdrs) \
     $(snappy_file_collection)
 
 CLEANFILES = \
     $(snappyebin_make_generated_file_list) \
-	priv/snappy_nif.so
+    priv/snappy_nif.so
 
 snappyebin_DATA = \
     $(snappyebin_make_generated_file_list)
diff --git a/src/snappy/erl_nif_compat.h b/src/snappy/c_src/erl_nif_compat.h
index b8eb9b02f..b8eb9b02f 100644
--- a/src/snappy/erl_nif_compat.h
+++ b/src/snappy/c_src/erl_nif_compat.h
diff --git a/src/snappy/google-snappy/AUTHORS b/src/snappy/c_src/snappy/AUTHORS
index 4858b377c..4858b377c 100644
--- a/src/snappy/google-snappy/AUTHORS
+++ b/src/snappy/c_src/snappy/AUTHORS
diff --git a/src/snappy/google-snappy/COPYING b/src/snappy/c_src/snappy/COPYING
index 8d6bd9fed..8d6bd9fed 100644
--- a/src/snappy/google-snappy/COPYING
+++ b/src/snappy/c_src/snappy/COPYING
diff --git a/src/snappy/c_src/snappy/NEWS b/src/snappy/c_src/snappy/NEWS
new file mode 100644
index 000000000..d51478783
--- /dev/null
+++ b/src/snappy/c_src/snappy/NEWS
@@ -0,0 +1,46 @@
+Snappy v1.0.3, June 2nd 2011:
+
+  * Speeded up the decompressor somewhat; about 3-6% for Core 2,
+    6-13% for Core i7, and 5-12% for Opteron (all in 64-bit mode).
+
+  * Added compressed format documentation. This text is new,
+    but an earlier version from Zeev Tarantov was used as reference.
+
+  * Only link snappy_unittest against -lz and other autodetected
+    libraries, not libsnappy.so (which doesn't need any such dependency).
+
+  * Fixed some display issues in the microbenchmarks, one of which would
+    frequently make the test crash on GNU/Hurd.
+
+
+Snappy v1.0.2, April 29th 2011:
+
+  * Relicense to a BSD-type license.
+
+  * Added C bindings, contributed by Martin Gieseking.
+
+  * More Win32 fixes, in particular for MSVC.
+
+  * Replace geo.protodata with a newer version.
+
+  * Fix timing inaccuracies in the unit test when comparing Snappy
+    to other algorithms.
+
+
+Snappy v1.0.1, March 25th 2011:
+
+This is a maintenance release, mostly containing minor fixes.
+There is no new functionality. The most important fixes include:
+
+  * The COPYING file and all licensing headers now correctly state that
+    Snappy is licensed under the Apache 2.0 license.
+
+  * snappy_unittest should now compile natively under Windows,
+    as well as on embedded systems with no mmap().
+
+  * Various autotools nits have been fixed.
+
+
+Snappy v1.0, March 17th 2011:
+
+  * Initial version.
diff --git a/src/snappy/c_src/snappy/README b/src/snappy/c_src/snappy/README
new file mode 100644
index 000000000..df8f0e178
--- /dev/null
+++ b/src/snappy/c_src/snappy/README
@@ -0,0 +1,135 @@
+Snappy, a fast compressor/decompressor.
+
+
+Introduction
+============
+
+Snappy is a compression/decompression library. It does not aim for maximum
+compression, or compatibility with any other compression library; instead,
+it aims for very high speeds and reasonable compression. For instance,
+compared to the fastest mode of zlib, Snappy is an order of magnitude faster
+for most inputs, but the resulting compressed files are anywhere from 20% to
+100% bigger. (For more information, see "Performance", below.)
+
+Snappy has the following properties:
+
+ * Fast: Compression speeds at 250 MB/sec and beyond, with no assembler code.
+   See "Performance" below.
+ * Stable: Over the last few years, Snappy has compressed and decompressed
+   petabytes of data in Google's production environment. The Snappy bitstream
+   format is stable and will not change between versions.
+ * Robust: The Snappy decompressor is designed not to crash in the face of
+   corrupted or malicious input.
+ * Free and open source software: Snappy is licensed under a BSD-type license.
+   For more information, see the included COPYING file.
+
+Snappy has previously been called "Zippy" in some Google presentations
+and the like.
+
+
+Performance
+===========
+ 
+Snappy is intended to be fast. On a single core of a Core i7 processor
+in 64-bit mode, it compresses at about 250 MB/sec or more and decompresses at
+about 500 MB/sec or more. (These numbers are for the slowest inputs in our
+benchmark suite; others are much faster.) In our tests, Snappy usually
+is faster than algorithms in the same class (e.g. LZO, LZF, FastLZ, QuickLZ,
+etc.) while achieving comparable compression ratios.
+
+Typical compression ratios (based on the benchmark suite) are about 1.5-1.7x
+for plain text, about 2-4x for HTML, and of course 1.0x for JPEGs, PNGs and
+other already-compressed data. Similar numbers for zlib in its fastest mode
+are 2.6-2.8x, 3-7x and 1.0x, respectively. More sophisticated algorithms are
+capable of achieving yet higher compression rates, although usually at the
+expense of speed. Of course, compression ratio will vary significantly with
+the input.
+
+Although Snappy should be fairly portable, it is primarily optimized
+for 64-bit x86-compatible processors, and may run slower in other environments.
+In particular:
+
+ - Snappy uses 64-bit operations in several places to process more data at
+   once than would otherwise be possible.
+ - Snappy assumes unaligned 32- and 64-bit loads and stores are cheap.
+   On some platforms, these must be emulated with single-byte loads 
+   and stores, which is much slower.
+ - Snappy assumes little-endian throughout, and needs to byte-swap data in
+   several places if running on a big-endian platform.
+
+Experience has shown that even heavily tuned code can be improved.
+Performance optimizations, whether for 64-bit x86 or other platforms,
+are of course most welcome; see "Contact", below.
+
+
+Usage
+=====
+
+Note that Snappy, both the implementation and the main interface,
+is written in C++. However, several third-party bindings to other languages
+are available; see the Google Code page at http://code.google.com/p/snappy/
+for more information. Also, if you want to use Snappy from C code, you can
+use the included C bindings in snappy-c.h.
+
+To use Snappy from your own C++ program, include the file "snappy.h" from
+your calling file, and link against the compiled library.
+
+There are many ways to call Snappy, but the simplest possible is
+
+  snappy::Compress(input, &output);
+
+and similarly
+
+  snappy::Uncompress(input, &output);
+
+where "input" and "output" are both instances of std::string.
+
+There are other interfaces that are more flexible in various ways, including
+support for custom (non-array) input sources. See the header file for more
+information.
+
+
+Tests and benchmarks
+====================
+
+When you compile Snappy, snappy_unittest is compiled in addition to the
+library itself. You do not need it to use the compressor from your own library,
+but it contains several useful components for Snappy development.
+
+First of all, it contains unit tests, verifying correctness on your machine in
+various scenarios. If you want to change or optimize Snappy, please run the
+tests to verify you have not broken anything. Note that if you have the
+Google Test library installed, unit test behavior (especially failures) will be
+significantly more user-friendly. You can find Google Test at
+
+  http://code.google.com/p/googletest/
+
+You probably also want the gflags library for handling of command-line flags;
+you can find it at
+
+  http://code.google.com/p/google-gflags/
+
+In addition to the unit tests, snappy contains microbenchmarks used to
+tune compression and decompression performance. These are automatically run
+before the unit tests, but you can disable them using the flag
+--run_microbenchmarks=false if you have gflags installed (otherwise you will
+need to edit the source).
+
+Finally, snappy can benchmark Snappy against a few other compression libraries
+(zlib, LZO, LZF, FastLZ and QuickLZ), if they were detected at configure time.
+To benchmark using a given file, give the compression algorithm you want to test
+Snappy against (e.g. --zlib) and then a list of one or more file names on the
+command line. The testdata/ directory contains the files used by the
+microbenchmark, which should provide a reasonably balanced starting point for
+benchmarking. (Note that baddata[1-3].snappy are not intended as benchmarks; they
+are used to verify correctness in the presence of corrupted data in the unit
+test.)
+
+
+Contact
+=======
+
+Snappy is distributed through Google Code. For the latest version, a bug tracker,
+and other information, see
+
+  http://code.google.com/p/snappy/
diff --git a/src/snappy/google-snappy/config.h.in b/src/snappy/c_src/snappy/config.h.in
index 8f912f652..28f57c28f 100644
--- a/src/snappy/google-snappy/config.h.in
+++ b/src/snappy/c_src/snappy/config.h.in
@@ -72,9 +72,6 @@
 /* Define to 1 if you have the <sys/stat.h> header file. */
 #undef HAVE_SYS_STAT_H
 
-/* Define to 1 if you have the <sys/time.h> header file. */
-#undef HAVE_SYS_TIME_H
-
 /* Define to 1 if you have the <sys/types.h> header file. */
 #undef HAVE_SYS_TYPES_H
 
diff --git a/src/snappy/google-snappy/snappy-internal.h b/src/snappy/c_src/snappy/snappy-internal.h
index c99d33130..a32eda59f 100644
--- a/src/snappy/google-snappy/snappy-internal.h
+++ b/src/snappy/c_src/snappy/snappy-internal.h
@@ -85,7 +85,7 @@ char* CompressFragment(const char* input,
 static inline int FindMatchLength(const char* s1,
                                   const char* s2,
                                   const char* s2_limit) {
-  assert(s2_limit >= s2);
+  DCHECK_GE(s2_limit, s2);
   int matched = 0;
 
   // Find out how long the match is. We loop over the data 64 bits at a
@@ -122,7 +122,7 @@ static inline int FindMatchLength(const char* s1,
                                   const char* s2,
                                   const char* s2_limit) {
   // Implementation based on the x86-64 version, above.
-  assert(s2_limit >= s2);
+  DCHECK_GE(s2_limit, s2);
   int matched = 0;
 
   while (s2 <= s2_limit - 4 &&
diff --git a/src/snappy/google-snappy/snappy-sinksource.cc b/src/snappy/c_src/snappy/snappy-sinksource.cc
index 5844552cb..1017895f9 100644
--- a/src/snappy/google-snappy/snappy-sinksource.cc
+++ b/src/snappy/c_src/snappy/snappy-sinksource.cc
@@ -68,4 +68,5 @@ char* UncheckedByteArraySink::GetAppendBuffer(size_t len, char* scratch) {
   return dest_;
 }
 
+
 }
diff --git a/src/snappy/google-snappy/snappy-sinksource.h b/src/snappy/c_src/snappy/snappy-sinksource.h
index faabfa1e6..430baeabb 100644
--- a/src/snappy/google-snappy/snappy-sinksource.h
+++ b/src/snappy/c_src/snappy/snappy-sinksource.h
@@ -60,7 +60,6 @@ class Sink {
   // The default implementation always returns the scratch buffer.
   virtual char* GetAppendBuffer(size_t length, char* scratch);
 
-
  private:
   // No copying
   Sink(const Sink&);
diff --git a/src/snappy/google-snappy/snappy-stubs-internal.cc b/src/snappy/c_src/snappy/snappy-stubs-internal.cc
index 6ed334371..6ed334371 100644
--- a/src/snappy/google-snappy/snappy-stubs-internal.cc
+++ b/src/snappy/c_src/snappy/snappy-stubs-internal.cc
diff --git a/src/snappy/google-snappy/snappy-stubs-internal.h b/src/snappy/c_src/snappy/snappy-stubs-internal.h
index 12393b628..46ee23542 100644
--- a/src/snappy/google-snappy/snappy-stubs-internal.h
+++ b/src/snappy/c_src/snappy/snappy-stubs-internal.h
@@ -35,13 +35,14 @@
 #include "config.h"
 #endif
 
+#include <iostream>
 #include <string>
 
 #include <assert.h>
 #include <stdlib.h>
 #include <string.h>
 
-#ifdef HAVE_SYS_MMAN_H
+#ifdef HAVE_SYS_MMAN
 #include <sys/mman.h>
 #endif
 
@@ -85,71 +86,108 @@ using namespace std;
 // version (anyone who wants to regenerate it can just do the call
 // themselves within main()).
 #define DEFINE_bool(flag_name, default_value, description) \
-  bool FLAGS_ ## flag_name = default_value
+  bool FLAGS_ ## flag_name = default_value;
 #define DECLARE_bool(flag_name) \
-  extern bool FLAGS_ ## flag_name
+  extern bool FLAGS_ ## flag_name;
+#define REGISTER_MODULE_INITIALIZER(name, code)
 
 namespace snappy {
 
 static const uint32 kuint32max = static_cast<uint32>(0xFFFFFFFF);
 static const int64 kint64max = static_cast<int64>(0x7FFFFFFFFFFFFFFFLL);
 
-// Potentially unaligned loads and stores.
+// Logging.
 
-// x86 and PowerPC can simply do these loads and stores native.
+#define LOG(level) LogMessage()
+#define VLOG(level) true ? (void)0 : \
+    snappy::LogMessageVoidify() & snappy::LogMessage()
 
-#if defined(__i386__) || defined(__x86_64__) || defined(__powerpc__)
+class LogMessage {
+ public:
+  LogMessage() { }
+  ~LogMessage() {
+    cerr << endl;
+  }
 
-#define UNALIGNED_LOAD16(_p) (*reinterpret_cast<const uint16 *>(_p))
-#define UNALIGNED_LOAD32(_p) (*reinterpret_cast<const uint32 *>(_p))
-#define UNALIGNED_LOAD64(_p) (*reinterpret_cast<const uint64 *>(_p))
+  LogMessage& operator<<(const std::string& msg) {
+    cerr << msg;
+    return *this;
+  }
+  LogMessage& operator<<(int x) {
+    cerr << x;
+    return *this;
+  }
+};
 
-#define UNALIGNED_STORE16(_p, _val) (*reinterpret_cast<uint16 *>(_p) = (_val))
-#define UNALIGNED_STORE32(_p, _val) (*reinterpret_cast<uint32 *>(_p) = (_val))
-#define UNALIGNED_STORE64(_p, _val) (*reinterpret_cast<uint64 *>(_p) = (_val))
+// Asserts, both versions activated in debug mode only,
+// and ones that are always active.
 
-// ARMv7 and newer support native unaligned accesses, but only of 16-bit
-// and 32-bit values (not 64-bit); older versions either raise a fatal signal,
-// do an unaligned read and rotate the words around a bit, or do the reads very
-// slowly (trip through kernel mode). There's no simple #define that says just
-// “ARMv7 or higher”, so we have to filter away all ARMv5 and ARMv6
-// sub-architectures.
-//
-// This is a mess, but there's not much we can do about it.
-
-#elif defined(__arm__) && \
-      !defined(__ARM_ARCH_4__) && \
-      !defined(__ARM_ARCH_4T__) && \
-      !defined(__ARM_ARCH_5__) && \
-      !defined(__ARM_ARCH_5T__) && \
-      !defined(__ARM_ARCH_5TE__) && \
-      !defined(__ARM_ARCH_5TEJ__) && \
-      !defined(__ARM_ARCH_6__) && \
-      !defined(__ARM_ARCH_6J__) && \
-      !defined(__ARM_ARCH_6K__) && \
-      !defined(__ARM_ARCH_6Z__) && \
-      !defined(__ARM_ARCH_6ZK__) && \
-      !defined(__ARM_ARCH_6T2__)
+#define CRASH_UNLESS(condition) \
+    PREDICT_TRUE(condition) ? (void)0 : \
+    snappy::LogMessageVoidify() & snappy::LogMessageCrash()
+
+class LogMessageCrash : public LogMessage {
+ public:
+  LogMessageCrash() { }
+  ~LogMessageCrash() {
+    cerr << endl;
+    abort();
+  }
+};
+
+// This class is used to explicitly ignore values in the conditional
+// logging macros.  This avoids compiler warnings like "value computed
+// is not used" and "statement has no effect".
+
+class LogMessageVoidify {
+ public:
+  LogMessageVoidify() { }
+  // This has to be an operator with a precedence lower than << but
+  // higher than ?:
+  void operator&(const LogMessage&) { }
+};
+
+#define CHECK(cond) CRASH_UNLESS(cond)
+#define CHECK_LE(a, b) CRASH_UNLESS((a) <= (b))
+#define CHECK_GE(a, b) CRASH_UNLESS((a) >= (b))
+#define CHECK_EQ(a, b) CRASH_UNLESS((a) == (b))
+#define CHECK_NE(a, b) CRASH_UNLESS((a) != (b))
+#define CHECK_LT(a, b) CRASH_UNLESS((a) < (b))
+#define CHECK_GT(a, b) CRASH_UNLESS((a) > (b))
+
+#ifdef NDEBUG
+
+#define DCHECK(cond) CRASH_UNLESS(true)
+#define DCHECK_LE(a, b) CRASH_UNLESS(true)
+#define DCHECK_GE(a, b) CRASH_UNLESS(true)
+#define DCHECK_EQ(a, b) CRASH_UNLESS(true)
+#define DCHECK_NE(a, b) CRASH_UNLESS(true)
+#define DCHECK_LT(a, b) CRASH_UNLESS(true)
+#define DCHECK_GT(a, b) CRASH_UNLESS(true)
+
+#else
+
+#define DCHECK(cond) CHECK(cond)
+#define DCHECK_LE(a, b) CHECK_LE(a, b)
+#define DCHECK_GE(a, b) CHECK_GE(a, b)
+#define DCHECK_EQ(a, b) CHECK_EQ(a, b)
+#define DCHECK_NE(a, b) CHECK_NE(a, b)
+#define DCHECK_LT(a, b) CHECK_LT(a, b)
+#define DCHECK_GT(a, b) CHECK_GT(a, b)
+
+#endif
+
+// Potentially unaligned loads and stores.
+
+#if defined(__i386__) || defined(__x86_64__) || defined(__powerpc__)
 
 #define UNALIGNED_LOAD16(_p) (*reinterpret_cast<const uint16 *>(_p))
 #define UNALIGNED_LOAD32(_p) (*reinterpret_cast<const uint32 *>(_p))
+#define UNALIGNED_LOAD64(_p) (*reinterpret_cast<const uint64 *>(_p))
 
 #define UNALIGNED_STORE16(_p, _val) (*reinterpret_cast<uint16 *>(_p) = (_val))
 #define UNALIGNED_STORE32(_p, _val) (*reinterpret_cast<uint32 *>(_p) = (_val))
-
-// TODO(user): NEON supports unaligned 64-bit loads and stores.
-// See if that would be more efficient on platforms supporting it,
-// at least for copies.
-
-inline uint64 UNALIGNED_LOAD64(const void *p) {
-  uint64 t;
-  memcpy(&t, p, sizeof t);
-  return t;
-}
-
-inline void UNALIGNED_STORE64(void *p, uint64 v) {
-  memcpy(p, &v, sizeof v);
-}
+#define UNALIGNED_STORE64(_p, _val) (*reinterpret_cast<uint64 *>(_p) = (_val))
 
 #else
 
@@ -188,31 +226,9 @@ inline void UNALIGNED_STORE64(void *p, uint64 v) {
 
 #endif
 
-// This can be more efficient than UNALIGNED_LOAD64 + UNALIGNED_STORE64
-// on some platforms, in particular ARM.
-inline void UnalignedCopy64(const void *src, void *dst) {
-  if (sizeof(void *) == 8) {
-    UNALIGNED_STORE64(dst, UNALIGNED_LOAD64(src));
-  } else {
-    const char *src_char = reinterpret_cast<const char *>(src);
-    char *dst_char = reinterpret_cast<char *>(dst);
-
-    UNALIGNED_STORE32(dst_char, UNALIGNED_LOAD32(src_char));
-    UNALIGNED_STORE32(dst_char + 4, UNALIGNED_LOAD32(src_char + 4));
-  }
-}
-
 // The following guarantees declaration of the byte swap functions.
 #ifdef WORDS_BIGENDIAN
 
-#ifdef HAVE_SYS_BYTEORDER_H
-#include <sys/byteorder.h>
-#endif
-
-#ifdef HAVE_SYS_ENDIAN_H
-#include <sys/endian.h>
-#endif
-
 #ifdef _MSC_VER
 #include <stdlib.h>
 #define bswap_16(x) _byteswap_ushort(x)
@@ -226,38 +242,8 @@ inline void UnalignedCopy64(const void *src, void *dst) {
 #define bswap_32(x) OSSwapInt32(x)
 #define bswap_64(x) OSSwapInt64(x)
 
-#elif defined(HAVE_BYTESWAP_H)
-#include <byteswap.h>
-
-#elif defined(bswap32)
-// FreeBSD defines bswap{16,32,64} in <sys/endian.h> (already #included).
-#define bswap_16(x) bswap16(x)
-#define bswap_32(x) bswap32(x)
-#define bswap_64(x) bswap64(x)
-
-#elif defined(BSWAP_64)
-// Solaris 10 defines BSWAP_{16,32,64} in <sys/byteorder.h> (already #included).
-#define bswap_16(x) BSWAP_16(x)
-#define bswap_32(x) BSWAP_32(x)
-#define bswap_64(x) BSWAP_64(x)
-
 #else
-
-inline uint16 bswap_16(uint16 x) {
-  return (x << 8) | (x >> 8);
-}
-
-inline uint32 bswap_32(uint32 x) {
-  x = ((x & 0xff00ff00UL) >> 8) | ((x & 0x00ff00ffUL) << 8);
-  return (x >> 16) | (x << 16);
-}
-
-inline uint64 bswap_64(uint64 x) {
-  x = ((x & 0xff00ff00ff00ff00ULL) >> 8) | ((x & 0x00ff00ff00ff00ffULL) << 8);
-  x = ((x & 0xffff0000ffff0000ULL) >> 16) | ((x & 0x0000ffff0000ffffULL) << 16);
-  return (x >> 32) | (x << 32);
-}
-
+#include <byteswap.h>
 #endif
 
 #endif  // WORDS_BIGENDIAN
diff --git a/src/snappy/google-snappy/snappy-stubs-public.h.in b/src/snappy/c_src/snappy/snappy-stubs-public.h.in
index f0babcbe5..f0babcbe5 100644
--- a/src/snappy/google-snappy/snappy-stubs-public.h.in
+++ b/src/snappy/c_src/snappy/snappy-stubs-public.h.in
diff --git a/src/snappy/google-snappy/snappy.cc b/src/snappy/c_src/snappy/snappy.cc
index 1230321f6..a591aba59 100644
--- a/src/snappy/google-snappy/snappy.cc
+++ b/src/snappy/c_src/snappy/snappy.cc
@@ -95,7 +95,7 @@ enum {
 // Note that this does not match the semantics of either memcpy()
 // or memmove().
 static inline void IncrementalCopy(const char* src, char* op, int len) {
-  assert(len > 0);
+  DCHECK_GT(len, 0);
   do {
     *op++ = *src++;
   } while (--len > 0);
@@ -140,12 +140,12 @@ const int kMaxIncrementCopyOverflow = 10;
 
 static inline void IncrementalCopyFastPath(const char* src, char* op, int len) {
   while (op - src < 8) {
-    UnalignedCopy64(src, op);
+    UNALIGNED_STORE64(op, UNALIGNED_LOAD64(src));
     len -= op - src;
     op += op - src;
   }
   while (len > 0) {
-    UnalignedCopy64(src, op);
+    UNALIGNED_STORE64(op, UNALIGNED_LOAD64(src));
     src += 8;
     op += 8;
     len -= 8;
@@ -172,8 +172,8 @@ static inline char* EmitLiteral(char* op,
     //   - The output will always have 32 spare bytes (see
     //     MaxCompressedLength).
     if (allow_fast_path && len <= 16) {
-      UnalignedCopy64(literal, op);
-      UnalignedCopy64(literal + 8, op + 8);
+      UNALIGNED_STORE64(op, UNALIGNED_LOAD64(literal));
+      UNALIGNED_STORE64(op + 8, UNALIGNED_LOAD64(literal + 8));
       return op + len;
     }
   } else {
@@ -194,25 +194,25 @@ static inline char* EmitLiteral(char* op,
   return op + len;
 }
 
-static inline char* EmitCopyLessThan64(char* op, size_t offset, int len) {
-  assert(len <= 64);
-  assert(len >= 4);
-  assert(offset < 65536);
+static inline char* EmitCopyLessThan64(char* op, int offset, int len) {
+  DCHECK_LE(len, 64);
+  DCHECK_GE(len, 4);
+  DCHECK_LT(offset, 65536);
 
   if ((len < 12) && (offset < 2048)) {
-    size_t len_minus_4 = len - 4;
+    int len_minus_4 = len - 4;
     assert(len_minus_4 < 8);            // Must fit in 3 bits
-    *op++ = COPY_1_BYTE_OFFSET + ((len_minus_4) << 2) + ((offset >> 8) << 5);
+    *op++ = COPY_1_BYTE_OFFSET | ((len_minus_4) << 2) | ((offset >> 8) << 5);
     *op++ = offset & 0xff;
   } else {
-    *op++ = COPY_2_BYTE_OFFSET + ((len-1) << 2);
+    *op++ = COPY_2_BYTE_OFFSET | ((len-1) << 2);
     LittleEndian::Store16(op, offset);
     op += 2;
   }
   return op;
 }
 
-static inline char* EmitCopy(char* op, size_t offset, int len) {
+static inline char* EmitCopy(char* op, int offset, int len) {
   // Emit 64 byte copies but make sure to keep at least four bytes reserved
   while (len >= 68) {
     op = EmitCopyLessThan64(op, offset, 64);
@@ -249,10 +249,12 @@ uint16* WorkingMemory::GetHashTable(size_t input_size, int* table_size) {
   // compression, and if the input is short, we won't need that
   // many hash table entries anyway.
   assert(kMaxHashTableSize >= 256);
-  size_t htsize = 256;
+  int htsize = 256;
   while (htsize < kMaxHashTableSize && htsize < input_size) {
     htsize <<= 1;
   }
+  CHECK_EQ(0, htsize & (htsize - 1)) << ": must be power of two";
+  CHECK_LE(htsize, kMaxHashTableSize) << ": hash table too large";
 
   uint16* table;
   if (htsize <= ARRAYSIZE(small_table_)) {
@@ -270,49 +272,16 @@ uint16* WorkingMemory::GetHashTable(size_t input_size, int* table_size) {
 }
 }  // end namespace internal
 
-// For 0 <= offset <= 4, GetUint32AtOffset(GetEightBytesAt(p), offset) will
+// For 0 <= offset <= 4, GetUint32AtOffset(UNALIGNED_LOAD64(p), offset) will
 // equal UNALIGNED_LOAD32(p + offset).  Motivation: On x86-64 hardware we have
 // empirically found that overlapping loads such as
 //  UNALIGNED_LOAD32(p) ... UNALIGNED_LOAD32(p+1) ... UNALIGNED_LOAD32(p+2)
 // are slower than UNALIGNED_LOAD64(p) followed by shifts and casts to uint32.
-//
-// We have different versions for 64- and 32-bit; ideally we would avoid the
-// two functions and just inline the UNALIGNED_LOAD64 call into
-// GetUint32AtOffset, but GCC (at least not as of 4.6) is seemingly not clever
-// enough to avoid loading the value multiple times then. For 64-bit, the load
-// is done when GetEightBytesAt() is called, whereas for 32-bit, the load is
-// done at GetUint32AtOffset() time.
-
-#ifdef ARCH_K8
-
-typedef uint64 EightBytesReference;
-
-static inline EightBytesReference GetEightBytesAt(const char* ptr) {
-  return UNALIGNED_LOAD64(ptr);
-}
-
 static inline uint32 GetUint32AtOffset(uint64 v, int offset) {
-  assert(offset >= 0);
-  assert(offset <= 4);
+  DCHECK(0 <= offset && offset <= 4) << offset;
   return v >> (LittleEndian::IsLittleEndian() ? 8 * offset : 32 - 8 * offset);
 }
 
-#else
-
-typedef const char* EightBytesReference;
-
-static inline EightBytesReference GetEightBytesAt(const char* ptr) {
-  return ptr;
-}
-
-static inline uint32 GetUint32AtOffset(const char* v, int offset) {
-  assert(offset >= 0);
-  assert(offset <= 4);
-  return UNALIGNED_LOAD32(v + offset);
-}
-
-#endif
-
 // Flat array compression that does not emit the "uncompressed length"
 // prefix. Compresses "input" string to the "*op" buffer.
 //
@@ -325,29 +294,29 @@ static inline uint32 GetUint32AtOffset(const char* v, int offset) {
 // Returns an "end" pointer into "op" buffer.
 // "end - op" is the compressed size of "input".
 namespace internal {
-char* CompressFragment(const char* input,
-                       size_t input_size,
+char* CompressFragment(const char* const input,
+                       const size_t input_size,
                        char* op,
                        uint16* table,
                        const int table_size) {
   // "ip" is the input pointer, and "op" is the output pointer.
   const char* ip = input;
-  assert(input_size <= kBlockSize);
-  assert((table_size & (table_size - 1)) == 0); // table must be power of two
+  CHECK_LE(input_size, kBlockSize);
+  CHECK_EQ(table_size & (table_size - 1), 0) << ": table must be power of two";
   const int shift = 32 - Bits::Log2Floor(table_size);
-  assert(static_cast<int>(kuint32max >> shift) == table_size - 1);
+  DCHECK_EQ(kuint32max >> shift, table_size - 1);
   const char* ip_end = input + input_size;
   const char* base_ip = ip;
   // Bytes in [next_emit, ip) will be emitted as literal bytes.  Or
   // [next_emit, ip_end) after the main loop.
   const char* next_emit = ip;
 
-  const size_t kInputMarginBytes = 15;
+  const int kInputMarginBytes = 15;
   if (PREDICT_TRUE(input_size >= kInputMarginBytes)) {
     const char* ip_limit = input + input_size - kInputMarginBytes;
 
     for (uint32 next_hash = Hash(++ip, shift); ; ) {
-      assert(next_emit < ip);
+      DCHECK_LT(next_emit, ip);
       // The body of this loop calls EmitLiteral once and then EmitCopy one or
       // more times.  (The exception is that when we're close to exhausting
       // the input we goto emit_remainder.)
@@ -380,7 +349,7 @@ char* CompressFragment(const char* input,
       do {
         ip = next_ip;
         uint32 hash = next_hash;
-        assert(hash == Hash(ip, shift));
+        DCHECK_EQ(hash, Hash(ip, shift));
         uint32 bytes_between_hash_lookups = skip++ >> 5;
         next_ip = ip + bytes_between_hash_lookups;
         if (PREDICT_FALSE(next_ip > ip_limit)) {
@@ -388,8 +357,8 @@ char* CompressFragment(const char* input,
         }
         next_hash = Hash(next_ip, shift);
         candidate = base_ip + table[hash];
-        assert(candidate >= base_ip);
-        assert(candidate < ip);
+        DCHECK_GE(candidate, base_ip);
+        DCHECK_LT(candidate, ip);
 
         table[hash] = ip - base_ip;
       } while (PREDICT_TRUE(UNALIGNED_LOAD32(ip) !=
@@ -398,7 +367,7 @@ char* CompressFragment(const char* input,
       // Step 2: A 4-byte match has been found.  We'll later see if more
       // than 4 bytes match.  But, prior to the match, input
       // bytes [next_emit, ip) are unmatched.  Emit them as "literal bytes."
-      assert(next_emit + 16 <= ip_end);
+      DCHECK_LE(next_emit + 16, ip_end);
       op = EmitLiteral(op, next_emit, ip - next_emit, true);
 
       // Step 3: Call EmitCopy, and then see if another EmitCopy could
@@ -409,7 +378,7 @@ char* CompressFragment(const char* input,
       // though we don't yet know how big the literal will be.  We handle that
       // by proceeding to the next iteration of the main loop.  We also can exit
       // this loop via goto if we get close to exhausting the input.
-      EightBytesReference input_bytes;
+      uint64 input_bytes = 0;
       uint32 candidate_bytes = 0;
 
       do {
@@ -418,8 +387,8 @@ char* CompressFragment(const char* input,
         const char* base = ip;
         int matched = 4 + FindMatchLength(candidate + 4, ip + 4, ip_end);
         ip += matched;
-        size_t offset = base - candidate;
-        assert(0 == memcmp(base, candidate, matched));
+        int offset = base - candidate;
+        DCHECK_EQ(0, memcmp(base, candidate, matched));
         op = EmitCopy(op, offset, matched);
         // We could immediately start working at ip now, but to improve
         // compression we first update table[Hash(ip - 1, ...)].
@@ -428,7 +397,7 @@ char* CompressFragment(const char* input,
         if (PREDICT_FALSE(ip >= ip_limit)) {
           goto emit_remainder;
         }
-        input_bytes = GetEightBytesAt(insert_tail);
+        input_bytes = UNALIGNED_LOAD64(insert_tail);
         uint32 prev_hash = HashBytes(GetUint32AtOffset(input_bytes, 0), shift);
         table[prev_hash] = ip - base_ip - 1;
         uint32 cur_hash = HashBytes(GetUint32AtOffset(input_bytes, 1), shift);
@@ -466,26 +435,12 @@ char* CompressFragment(const char* input,
 //   bool CheckLength() const;
 //
 //   // Called repeatedly during decompression
-//   bool Append(const char* ip, size_t length);
-//   bool AppendFromSelf(uint32 offset, size_t length);
-//
-//   // The difference between TryFastAppend and Append is that TryFastAppend
-//   // is allowed to read up to <available> bytes from the input buffer,
-//   // whereas Append is allowed to read <length>.
-//   //
-//   // Also, TryFastAppend is allowed to return false, declining the append,
-//   // without it being a fatal error -- just "return false" would be
-//   // a perfectly legal implementation of TryFastAppend. The intention
-//   // is for TryFastAppend to allow a fast path in the common case of
-//   // a small append.
-//   //
-//   // NOTE(user): TryFastAppend must always return decline (return false)
-//   // if <length> is 61 or more, as in this case the literal length is not
-//   // decoded fully. In practice, this should not be a big problem,
-//   // as it is unlikely that one would implement a fast path accepting
-//   // this much data.
-//   bool TryFastAppend(const char* ip, size_t available, size_t length);
+//   bool Append(const char* ip, uint32 length, bool allow_fast_path);
+//   bool AppendFromSelf(uint32 offset, uint32 length);
 // };
+//
+// "allow_fast_path" is a parameter that says if there is at least 16
+// readable bytes in "ip". It is currently only used by SnappyArrayWriter.
 
 // -----------------------------------------------------------------------
 // Lookup table for decompression code.  Generated by ComputeTable() below.
@@ -552,9 +507,9 @@ static uint16 MakeEntry(unsigned int extra,
                         unsigned int len,
                         unsigned int copy_offset) {
   // Check that all of the fields fit within the allocated space
-  assert(extra       == (extra & 0x7));          // At most 3 bits
-  assert(copy_offset == (copy_offset & 0x7));    // At most 3 bits
-  assert(len         == (len & 0x7f));           // At most 7 bits
+  DCHECK_EQ(extra,       extra & 0x7);          // At most 3 bits
+  DCHECK_EQ(copy_offset, copy_offset & 0x7);    // At most 3 bits
+  DCHECK_EQ(len,         len & 0x7f);           // At most 7 bits
   return len | (copy_offset << 8) | (extra << 11);
 }
 
@@ -612,15 +567,9 @@ static void ComputeTable() {
   }
 
   // Check that each entry was initialized exactly once.
-  if (assigned != 256) {
-    fprintf(stderr, "ComputeTable: assigned only %d of 256\n", assigned);
-    abort();
-  }
+  CHECK_EQ(assigned, 256);
   for (int i = 0; i < 256; i++) {
-    if (dst[i] == 0xffff) {
-      fprintf(stderr, "ComputeTable: did not assign byte %d\n", i);
-      abort();
-    }
+    CHECK_NE(dst[i], 0xffff);
   }
 
   if (FLAGS_snappy_dump_decompression_table) {
@@ -635,13 +584,10 @@ static void ComputeTable() {
 
   // Check that computed table matched recorded table
   for (int i = 0; i < 256; i++) {
-    if (dst[i] != char_table[i]) {
-      fprintf(stderr, "ComputeTable: byte %d: computed (%x), expect (%x)\n",
-              i, static_cast<int>(dst[i]), static_cast<int>(char_table[i]));
-      abort();
-    }
+    CHECK_EQ(dst[i], char_table[i]);
   }
 }
+REGISTER_MODULE_INITIALIZER(snappy, ComputeTable());
 #endif /* !NDEBUG */
 
 // Helper class for decompression
@@ -684,7 +630,7 @@ class SnappyDecompressor {
   // On succcess, stores the length in *result and returns true.
   // On failure, returns false.
   bool ReadUncompressedLength(uint32* result) {
-    assert(ip_ == NULL);       // Must not have read anything yet
+    DCHECK(ip_ == NULL);       // Must not have read anything yet
     // Length is encoded in 1..5 bytes
     *result = 0;
     uint32 shift = 0;
@@ -709,41 +655,25 @@ class SnappyDecompressor {
   template <class Writer>
   void DecompressAllTags(Writer* writer) {
     const char* ip = ip_;
-
-    // We could have put this refill fragment only at the beginning of the loop.
-    // However, duplicating it at the end of each branch gives the compiler more
-    // scope to optimize the <ip_limit_ - ip> expression based on the local
-    // context, which overall increases speed.
-    #define MAYBE_REFILL() \
-        if (ip_limit_ - ip < 5) { \
-          ip_ = ip; \
-          if (!RefillTag()) return; \
-          ip = ip_; \
-        }
-
-    MAYBE_REFILL();
     for ( ;; ) {
+      if (ip_limit_ - ip < 5) {
+        ip_ = ip;
+        if (!RefillTag()) return;
+        ip = ip_;
+      }
+
       const unsigned char c = *(reinterpret_cast<const unsigned char*>(ip++));
+      const uint32 entry = char_table[c];
+      const uint32 trailer = LittleEndian::Load32(ip) & wordmask[entry >> 11];
+      ip += entry >> 11;
+      const uint32 length = entry & 0xff;
 
       if ((c & 0x3) == LITERAL) {
-        size_t literal_length = (c >> 2) + 1u;
-        if (writer->TryFastAppend(ip, ip_limit_ - ip, literal_length)) {
-          assert(literal_length < 61);
-          ip += literal_length;
-          MAYBE_REFILL();
-          continue;
-        }
-        if (PREDICT_FALSE(literal_length >= 61)) {
-          // Long literal.
-          const size_t literal_length_length = literal_length - 60;
-          literal_length =
-              (LittleEndian::Load32(ip) & wordmask[literal_length_length]) + 1;
-          ip += literal_length_length;
-        }
-
-        size_t avail = ip_limit_ - ip;
+        uint32 literal_length = length + trailer;
+        uint32 avail = ip_limit_ - ip;
         while (avail < literal_length) {
-          if (!writer->Append(ip, avail)) return;
+          bool allow_fast_path = (avail >= 16);
+          if (!writer->Append(ip, avail, allow_fast_path)) return;
           literal_length -= avail;
           reader_->Skip(peeked_);
           size_t n;
@@ -753,17 +683,12 @@ class SnappyDecompressor {
           if (avail == 0) return;  // Premature end of input
           ip_limit_ = ip + avail;
         }
-        if (!writer->Append(ip, literal_length)) {
+        bool allow_fast_path = (avail >= 16);
+        if (!writer->Append(ip, literal_length, allow_fast_path)) {
           return;
         }
         ip += literal_length;
-        MAYBE_REFILL();
       } else {
-        const uint32 entry = char_table[c];
-        const uint32 trailer = LittleEndian::Load32(ip) & wordmask[entry >> 11];
-        const uint32 length = entry & 0xff;
-        ip += entry >> 11;
-
         // copy_offset/256 is encoded in bits 8..10.  By just fetching
         // those bits, we get copy_offset (since the bit-field starts at
         // bit 8).
@@ -771,11 +696,8 @@ class SnappyDecompressor {
         if (!writer->AppendFromSelf(copy_offset + trailer, length)) {
           return;
         }
-        MAYBE_REFILL();
       }
     }
-
-#undef MAYBE_REFILL
   }
 };
 
@@ -795,11 +717,11 @@ bool SnappyDecompressor::RefillTag() {
   }
 
   // Read the tag character
-  assert(ip < ip_limit_);
+  DCHECK_LT(ip, ip_limit_);
   const unsigned char c = *(reinterpret_cast<const unsigned char*>(ip));
   const uint32 entry = char_table[c];
   const uint32 needed = (entry >> 11) + 1;  // +1 byte for 'c'
-  assert(needed <= sizeof(scratch_));
+  DCHECK_LE(needed, sizeof(scratch_));
 
   // Read more bytes from reader if needed
   uint32 nbuf = ip_limit_ - ip;
@@ -820,7 +742,7 @@ bool SnappyDecompressor::RefillTag() {
       nbuf += to_add;
       reader_->Skip(to_add);
     }
-    assert(nbuf == needed);
+    DCHECK_EQ(nbuf, needed);
     ip_ = scratch_;
     ip_limit_ = scratch_ + needed;
   } else if (nbuf < 5) {
@@ -846,15 +768,6 @@ static bool InternalUncompress(Source* r,
   SnappyDecompressor decompressor(r);
   uint32 uncompressed_len = 0;
   if (!decompressor.ReadUncompressedLength(&uncompressed_len)) return false;
-  return InternalUncompressAllTags(
-      &decompressor, writer, uncompressed_len, max_len);
-}
-
-template <typename Writer>
-static bool InternalUncompressAllTags(SnappyDecompressor* decompressor,
-                                      Writer* writer,
-                                      uint32 uncompressed_len,
-                                      uint32 max_len) {
   // Protect against possible DoS attack
   if (static_cast<uint64>(uncompressed_len) > max_len) {
     return false;
@@ -863,8 +776,8 @@ static bool InternalUncompressAllTags(SnappyDecompressor* decompressor,
   writer->SetExpectedLength(uncompressed_len);
 
   // Process the entire input
-  decompressor->DecompressAllTags(writer);
-  return (decompressor->eof() && writer->CheckLength());
+  decompressor.DecompressAllTags(writer);
+  return (decompressor.eof() && writer->CheckLength());
 }
 
 bool GetUncompressedLength(Source* source, uint32* result) {
@@ -874,7 +787,7 @@ bool GetUncompressedLength(Source* source, uint32* result) {
 
 size_t Compress(Source* reader, Sink* writer) {
   size_t written = 0;
-  size_t N = reader->Available();
+  int N = reader->Available();
   char ulength[Varint::kMax32];
   char* p = Varint::Encode32(ulength, N);
   writer->Append(ulength, p-ulength);
@@ -888,11 +801,11 @@ size_t Compress(Source* reader, Sink* writer) {
     // Get next block to compress (without copying if possible)
     size_t fragment_size;
     const char* fragment = reader->Peek(&fragment_size);
-    assert(fragment_size != 0);  // premature end of input
-    const size_t num_to_read = min(N, kBlockSize);
+    DCHECK_NE(fragment_size, 0) << ": premature end of input";
+    const int num_to_read = min(N, kBlockSize);
     size_t bytes_read = fragment_size;
 
-    size_t pending_advance = 0;
+    int pending_advance = 0;
     if (bytes_read >= num_to_read) {
       // Buffer returned by reader is large enough
       pending_advance = num_to_read;
@@ -915,11 +828,11 @@ size_t Compress(Source* reader, Sink* writer) {
         bytes_read += n;
         reader->Skip(n);
       }
-      assert(bytes_read == num_to_read);
+      DCHECK_EQ(bytes_read, num_to_read);
       fragment = scratch;
       fragment_size = num_to_read;
     }
-    assert(fragment_size == num_to_read);
+    DCHECK_EQ(fragment_size, num_to_read);
 
     // Get encoding table for compression
     int table_size;
@@ -980,42 +893,34 @@ class SnappyArrayWriter {
     return op_ == op_limit_;
   }
 
-  inline bool Append(const char* ip, size_t len) {
+  inline bool Append(const char* ip, uint32 len, bool allow_fast_path) {
     char* op = op_;
-    const size_t space_left = op_limit_ - op;
-    if (space_left < len) {
-      return false;
+    const int space_left = op_limit_ - op;
+    if (allow_fast_path && len <= 16 && space_left >= 16) {
+      // Fast path, used for the majority (about 90%) of dynamic invocations.
+      UNALIGNED_STORE64(op, UNALIGNED_LOAD64(ip));
+      UNALIGNED_STORE64(op + 8, UNALIGNED_LOAD64(ip + 8));
+    } else {
+      if (space_left < len) {
+        return false;
+      }
+      memcpy(op, ip, len);
     }
-    memcpy(op, ip, len);
     op_ = op + len;
     return true;
   }
 
-  inline bool TryFastAppend(const char* ip, size_t available, size_t len) {
-    char* op = op_;
-    const size_t space_left = op_limit_ - op;
-    if (len <= 16 && available >= 16 && space_left >= 16) {
-      // Fast path, used for the majority (about 95%) of invocations.
-      UnalignedCopy64(ip, op);
-      UnalignedCopy64(ip + 8, op + 8);
-      op_ = op + len;
-      return true;
-    } else {
-      return false;
-    }
-  }
-
-  inline bool AppendFromSelf(size_t offset, size_t len) {
+  inline bool AppendFromSelf(uint32 offset, uint32 len) {
     char* op = op_;
-    const size_t space_left = op_limit_ - op;
+    const int space_left = op_limit_ - op;
 
     if (op - base_ <= offset - 1u) {  // -1u catches offset==0
       return false;
     }
     if (len <= 16 && offset >= 8 && space_left >= 16) {
       // Fast path, used for the majority (70-80%) of dynamic invocations.
-      UnalignedCopy64(op - offset, op);
-      UnalignedCopy64(op - offset + 8, op + 8);
+      UNALIGNED_STORE64(op, UNALIGNED_LOAD64(op - offset));
+      UNALIGNED_STORE64(op + 8, UNALIGNED_LOAD64(op - offset + 8));
     } else {
       if (space_left >= len + kMaxIncrementCopyOverflow) {
         IncrementalCopyFastPath(op - offset, op, len);
@@ -1071,14 +976,11 @@ class SnappyDecompressionValidator {
   inline bool CheckLength() const {
     return expected_ == produced_;
   }
-  inline bool Append(const char* ip, size_t len) {
+  inline bool Append(const char* ip, uint32 len, bool allow_fast_path) {
     produced_ += len;
     return produced_ <= expected_;
   }
-  inline bool TryFastAppend(const char* ip, size_t available, size_t length) {
-    return false;
-  }
-  inline bool AppendFromSelf(size_t offset, size_t len) {
+  inline bool AppendFromSelf(uint32 offset, uint32 len) {
     if (produced_ <= offset - 1u) return false;  // -1u catches offset==0
     produced_ += len;
     return produced_ <= expected_;
diff --git a/src/snappy/google-snappy/snappy.h b/src/snappy/c_src/snappy/snappy.h
index 03ef6ce5b..8d6ef2294 100644
--- a/src/snappy/google-snappy/snappy.h
+++ b/src/snappy/c_src/snappy/snappy.h
@@ -56,13 +56,6 @@ namespace snappy {
   // number of bytes written.
   size_t Compress(Source* source, Sink* sink);
 
-  // Find the uncompressed length of the given stream, as given by the header.
-  // Note that the true length could deviate from this; the stream could e.g.
-  // be truncated.
-  //
-  // Also note that this leaves "*source" in a state that is unsuitable for
-  // further operations, such as RawUncompress(). You will need to rewind
-  // or recreate the source yourself before attempting any further calls.
   bool GetUncompressedLength(Source* source, uint32* result);
 
   // ------------------------------------------------------------------------
@@ -142,20 +135,19 @@ namespace snappy {
   bool IsValidCompressedBuffer(const char* compressed,
                                size_t compressed_length);
 
-  // The size of a compression block. Note that many parts of the compression
-  // code assumes that kBlockSize <= 65536; in particular, the hash table
-  // can only store 16-bit offsets, and EmitCopy() also assumes the offset
-  // is 65535 bytes or less. Note also that if you change this, it will
-  // affect the framing format (see framing_format.txt).
+  // *** DO NOT CHANGE THE VALUE OF kBlockSize ***
   //
-  // Note that there might be older data around that is compressed with larger
-  // block sizes, so the decompression code should not rely on the
-  // non-existence of long backreferences.
-  static const int kBlockLog = 16;
-  static const size_t kBlockSize = 1 << kBlockLog;
+  // New Compression code chops up the input into blocks of at most
+  // the following size.  This ensures that back-references in the
+  // output never cross kBlockSize block boundaries.  This can be
+  // helpful in implementing blocked decompression.  However the
+  // decompression code should not rely on this guarantee since older
+  // compression code may not obey it.
+  static const int kBlockLog = 15;
+  static const int kBlockSize = 1 << kBlockLog;
 
   static const int kMaxHashTableBits = 14;
-  static const size_t kMaxHashTableSize = 1 << kMaxHashTableBits;
+  static const int kMaxHashTableSize = 1 << kMaxHashTableBits;
 
 }  // end namespace snappy
 
diff --git a/src/snappy/snappy_nif.cc b/src/snappy/c_src/snappy_nif.cc
index 30b9c66c0..93c18595b 100644
--- a/src/snappy/snappy_nif.cc
+++ b/src/snappy/c_src/snappy_nif.cc
@@ -15,11 +15,12 @@
  * the License.
  **/
 
+#include <iostream>
 #include <cstring>
 
 #include "erl_nif_compat.h"
-#include "google-snappy/snappy.h"
-#include "google-snappy/snappy-sinksource.h"
+#include "snappy/snappy.h"
+#include "snappy/snappy-sinksource.h"
 
 #ifdef OTP_R13B03
 #error OTP R13B03 not supported. Upgrade to R13B04 or later.
@@ -40,7 +41,7 @@ class SnappyNifSink : public snappy::Sink
     public:
         SnappyNifSink(ErlNifEnv* e);
         ~SnappyNifSink();
-
+        
         void Append(const char* data, size_t n);
         char* GetAppendBuffer(size_t len, char* scratch);
         ErlNifBinary& getBin();
@@ -79,7 +80,7 @@ char*
 SnappyNifSink::GetAppendBuffer(size_t len, char* scratch)
 {
     size_t sz;
-
+    
     if((length + len) > bin.size) {
         sz = (len * 4) < 8192 ? 8192 : (len * 4);
 
@@ -118,7 +119,7 @@ static inline ERL_NIF_TERM
 make_ok(ErlNifEnv* env, ERL_NIF_TERM mesg)
 {
     ERL_NIF_TERM ok = make_atom(env, "ok");
-    return enif_make_tuple2(env, ok, mesg);
+    return enif_make_tuple2(env, ok, mesg);   
 }
 
 
diff --git a/src/snappy/snappy.app.in b/src/snappy/src/snappy.app.in
index ecf5b008a..965a1905e 100644
--- a/src/snappy/snappy.app.in
+++ b/src/snappy/src/snappy.app.in
@@ -1,7 +1,7 @@
 {application, snappy,
  [
   {description, "snappy compressor/decompressor Erlang NIF wrapper"},
-  {vsn, "1.1.0"},
+  {vsn, "1.1.1"},
   {registered, []},
   {applications, [
                   kernel,
diff --git a/src/snappy/snappy.erl b/src/snappy/src/snappy.erl
index 7d3d36a8d..7d3d36a8d 100644
--- a/src/snappy/snappy.erl
+++ b/src/snappy/src/snappy.erl
diff --git a/src/snappy/test/snappy_tests.erl b/src/snappy/test/snappy_tests.erl
new file mode 100644
index 000000000..ac39c58be
--- /dev/null
+++ b/src/snappy/test/snappy_tests.erl
@@ -0,0 +1,74 @@
+%% Copyright 2011,  Filipe David Manana  <fdmanana@apache.org>
+%% Web:  http://github.com/fdmanana/snappy-erlang-nif
+%%
+%% Licensed under the Apache License, Version 2.0 (the "License"); you may not
+%% use this file except in compliance with the License. You may obtain a copy of
+%% the License at
+%%
+%%  http://www.apache.org/licenses/LICENSE-2.0
+%%
+%% Unless required by applicable law or agreed to in writing, software
+%% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+%% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+%% License for the specific language governing permissions and limitations under
+%% the License.
+
+-module(snappy_tests).
+-include_lib("eunit/include/eunit.hrl").
+
+
+compression_test_() ->
+    {timeout, 60, [fun compression/0]}.
+
+decompression_test_() ->
+    {timeout, 60, [fun decompression/0]}.
+
+
+compression() ->
+    DataIoList = lists:duplicate(11, <<"words that go unspoken, deeds that go undone">>),
+    Data = iolist_to_binary(DataIoList),
+    Result = snappy:compress(Data),
+    ?assertMatch({ok, _}, Result),
+    {ok, Compressed} = Result,
+
+    ?assertEqual(true, byte_size(Compressed) < byte_size(Data)),
+
+    ?assertEqual(true, snappy:is_valid(Compressed)),
+    ?assertEqual(false, snappy:is_valid(Data)),
+    ?assertEqual(false, snappy:is_valid(<<"foobar123">>)),
+    ?assertEqual({ok, byte_size(Data)}, snappy:uncompressed_length(Compressed)),
+
+    Result2 = snappy:compress(DataIoList),
+    ?assertMatch({ok, _}, Result2),
+    {ok, Compressed2} = Result2,
+
+    ?assertEqual(byte_size(Compressed2), byte_size(Compressed)),
+    ?assertEqual(true, snappy:is_valid(Compressed2)),
+    ?assertEqual({ok, byte_size(Data)}, snappy:uncompressed_length(Compressed2)),
+    ok.
+
+
+decompression() ->
+    DataIoList = lists:duplicate(11, <<"words that go unspoken, deeds that go undone">>),
+    Data = iolist_to_binary(DataIoList),
+    Result = snappy:compress(Data),
+    ?assertMatch({ok, _}, Result),
+    {ok, Compressed} = Result,
+    ?assertEqual({ok, Data}, snappy:decompress(Compressed)),
+
+    Result2 = snappy:compress(DataIoList),
+    ?assertMatch({ok, _}, Result2),
+    {ok, Compressed2} = Result2,
+    ?assertEqual({ok, Data}, snappy:decompress(Compressed2)),
+
+    BigData = <<"mVPZzfDzKNeZrh1QdkMEgh2U0Bv2i3+bLJaCqgNibXuMuwfjrqTuxPGupxjI",
+                "xEbuYR+u/KZvSDhoxnkpPbgJo7oiQv2ibDrrGZx7RDs3Nn7Ww51B7+zUL4tr",
+                "G+16TlJilJT47Z4cQn8EpWex2bMRFAoJ6AMJAodLGbiD78yUyIorRKVcCa+k",
+                "udzjsqYAoXzW/z8JCB6rbGGSbnLyqztR//ch5sRwSvYARlV+IamzBkDXFZxj",
+                "5TAwAl2ZcbCeMX0qgXX4EonVZxc=">>,
+    Result3 = snappy:compress(BigData),
+    ?assertMatch({ok, _}, Result3),
+    {ok, Compressed3} = Result3,
+    ?assertEqual({ok, BigData}, snappy:decompress(Compressed3)),
+    ok.
+