summaryrefslogtreecommitdiff
path: root/storage/tokudb/PerconaFT/ft/serialize/compress.cc
diff options
context:
space:
mode:
Diffstat (limited to 'storage/tokudb/PerconaFT/ft/serialize/compress.cc')
-rw-r--r--storage/tokudb/PerconaFT/ft/serialize/compress.cc257
1 files changed, 257 insertions, 0 deletions
diff --git a/storage/tokudb/PerconaFT/ft/serialize/compress.cc b/storage/tokudb/PerconaFT/ft/serialize/compress.cc
new file mode 100644
index 00000000000..1719b6b7cb5
--- /dev/null
+++ b/storage/tokudb/PerconaFT/ft/serialize/compress.cc
@@ -0,0 +1,257 @@
+/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */
+// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4:
+#ident "$Id$"
+/*======
+This file is part of PerconaFT.
+
+
+Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved.
+
+ PerconaFT is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License, version 2,
+ as published by the Free Software Foundation.
+
+ PerconaFT is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with PerconaFT. If not, see <http://www.gnu.org/licenses/>.
+
+----------------------------------------
+
+ PerconaFT is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License, version 3,
+ as published by the Free Software Foundation.
+
+ PerconaFT is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with PerconaFT. If not, see <http://www.gnu.org/licenses/>.
+======= */
+
+#ident "Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved."
+
+#include <toku_portability.h>
+#include <util/scoped_malloc.h>
+
+#include <zlib.h>
+#include <lzma.h>
+#include <snappy.h>
+
+#include "compress.h"
+#include "memory.h"
+#include "quicklz.h"
+#include "toku_assert.h"
+
+static inline enum toku_compression_method
+normalize_compression_method(enum toku_compression_method method)
+// Effect: resolve "friendly" names like "fast" and "small" into their real values.
+{
+ switch (method) {
+ case TOKU_DEFAULT_COMPRESSION_METHOD:
+ case TOKU_FAST_COMPRESSION_METHOD:
+ return TOKU_QUICKLZ_METHOD;
+ case TOKU_SMALL_COMPRESSION_METHOD:
+ return TOKU_LZMA_METHOD;
+ default:
+ return method; // everything else is fine
+ }
+}
+
+size_t toku_compress_bound (enum toku_compression_method a, size_t size)
+// See compress.h for the specification of this function.
+{
+ a = normalize_compression_method(a);
+ switch (a) {
+ case TOKU_NO_COMPRESSION:
+ return size + 1;
+ case TOKU_LZMA_METHOD:
+ return 1+lzma_stream_buffer_bound(size); // We need one extra for the rfc1950-style header byte (bits -03 are TOKU_LZMA_METHOD (1), bits 4-7 are the compression level)
+ case TOKU_QUICKLZ_METHOD:
+ return size+400 + 1; // quicklz manual says 400 bytes is enough. We need one more byte for the rfc1950-style header byte. bits 0-3 are 9, bits 4-7 are the QLZ_COMPRESSION_LEVEL.
+ case TOKU_ZLIB_METHOD:
+ return compressBound (size);
+ case TOKU_ZLIB_WITHOUT_CHECKSUM_METHOD:
+ return 2+deflateBound(nullptr, size); // We need one extra for the rfc1950-style header byte, and one extra to store windowBits (a bit over cautious about future upgrades maybe).
+ case TOKU_SNAPPY_METHOD:
+ return (1 + snappy::MaxCompressedLength(size));
+ default:
+ break;
+ }
+ // fall through for bad enum (thus compiler can warn us if we didn't use all the enums
+ assert(0); return 0;
+}
+
+void toku_compress (enum toku_compression_method a,
+ // the following types and naming conventions come from zlib.h
+ Bytef *dest, uLongf *destLen,
+ const Bytef *source, uLong sourceLen)
+// See compress.h for the specification of this function.
+{
+ static const int zlib_compression_level = 5;
+ static const int zlib_without_checksum_windowbits = -15;
+
+ a = normalize_compression_method(a);
+ assert(sourceLen < (1LL << 32));
+ switch (a) {
+ case TOKU_NO_COMPRESSION:
+ dest[0] = TOKU_NO_COMPRESSION;
+ memcpy(dest + 1, source, sourceLen);
+ *destLen = sourceLen + 1;
+ return;
+ case TOKU_ZLIB_METHOD: {
+ int r = compress2(dest, destLen, source, sourceLen, zlib_compression_level);
+ assert(r == Z_OK);
+ assert((dest[0]&0xF) == TOKU_ZLIB_METHOD);
+ return;
+ }
+ case TOKU_QUICKLZ_METHOD: {
+ if (sourceLen==0) {
+ // quicklz requires at least one byte, so we handle this ourselves
+ assert(1 <= *destLen);
+ *destLen = 1;
+ } else {
+ toku::scoped_calloc qsc_buf(sizeof(qlz_state_compress));
+ qlz_state_compress *qsc = reinterpret_cast<qlz_state_compress *>(qsc_buf.get());
+ size_t actual_destlen = qlz_compress(source, (char*)(dest+1), sourceLen, qsc);
+ assert(actual_destlen + 1 <= *destLen);
+ // add one for the rfc1950-style header byte.
+ *destLen = actual_destlen + 1;
+ }
+ // Fill in that first byte
+ dest[0] = TOKU_QUICKLZ_METHOD + (QLZ_COMPRESSION_LEVEL << 4);
+ return;
+ }
+ case TOKU_LZMA_METHOD: {
+ const int lzma_compression_level = 2;
+ if (sourceLen==0) {
+ // lzma version 4.999 requires at least one byte, so we'll do it ourselves.
+ assert(1<=*destLen);
+ *destLen = 1;
+ } else {
+ size_t out_pos = 1;
+ lzma_ret r = lzma_easy_buffer_encode(lzma_compression_level,
+ LZMA_CHECK_NONE, NULL,
+ source, sourceLen,
+ dest, &out_pos, *destLen);
+ assert(out_pos < *destLen);
+ if (r != LZMA_OK) {
+ fprintf(stderr, "lzma_easy_buffer_encode() returned %d\n", (int) r);
+ }
+ assert(r==LZMA_OK);
+ *destLen = out_pos;
+ }
+ dest[0] = TOKU_LZMA_METHOD + (lzma_compression_level << 4);
+ return;
+ }
+ case TOKU_ZLIB_WITHOUT_CHECKSUM_METHOD: {
+ z_stream strm;
+ strm.zalloc = Z_NULL;
+ strm.zfree = Z_NULL;
+ strm.opaque = Z_NULL;
+ strm.next_in = const_cast<Bytef *>(source);
+ strm.avail_in = sourceLen;
+ int r = deflateInit2(&strm, zlib_compression_level, Z_DEFLATED,
+ zlib_without_checksum_windowbits, 8, Z_DEFAULT_STRATEGY);
+ lazy_assert(r == Z_OK);
+ strm.next_out = dest + 2;
+ strm.avail_out = *destLen - 2;
+ r = deflate(&strm, Z_FINISH);
+ lazy_assert(r == Z_STREAM_END);
+ r = deflateEnd(&strm);
+ lazy_assert(r == Z_OK);
+ *destLen = strm.total_out + 2;
+ dest[0] = TOKU_ZLIB_WITHOUT_CHECKSUM_METHOD + (zlib_compression_level << 4);
+ dest[1] = zlib_without_checksum_windowbits;
+ return;
+ }
+ case TOKU_SNAPPY_METHOD: {
+ snappy::RawCompress((char*)source, sourceLen, (char*)dest + 1, destLen);
+ *destLen += 1;
+ dest[0] = TOKU_SNAPPY_METHOD;
+ return;
+ }
+ default:
+ break;
+ }
+ // default fall through to error.
+ assert(0);
+}
+
+void toku_decompress (Bytef *dest, uLongf destLen,
+ const Bytef *source, uLongf sourceLen)
+// See compress.h for the specification of this function.
+{
+ assert(sourceLen>=1); // need at least one byte for the RFC header.
+ switch (source[0] & 0xF) {
+ case TOKU_NO_COMPRESSION:
+ memcpy(dest, source + 1, sourceLen - 1);
+ return;
+ case TOKU_ZLIB_METHOD: {
+ uLongf actual_destlen = destLen;
+ int r = uncompress(dest, &actual_destlen, source, sourceLen);
+ assert(r == Z_OK);
+ assert(actual_destlen == destLen);
+ return;
+ }
+ case TOKU_QUICKLZ_METHOD:
+ if (sourceLen>1) {
+ toku::scoped_calloc state_buf(sizeof(qlz_state_decompress));
+ qlz_state_decompress *qsd = reinterpret_cast<qlz_state_decompress *>(state_buf.get());
+ uLongf actual_destlen = qlz_decompress((char*)source+1, dest, qsd);
+ assert(actual_destlen == destLen);
+ } else {
+ // length 1 means there is no data, so do nothing.
+ assert(destLen==0);
+ }
+ return;
+ case TOKU_LZMA_METHOD: {
+ if (sourceLen>1) {
+ uint64_t memlimit = UINT64_MAX;
+ size_t out_pos = 0;
+ size_t in_pos = 1;
+ lzma_ret r = lzma_stream_buffer_decode(&memlimit, // memlimit, use UINT64_MAX to disable this check
+ 0, // flags
+ NULL, // allocator
+ source, &in_pos, sourceLen,
+ dest, &out_pos, destLen);
+ assert(r==LZMA_OK);
+ assert(out_pos == destLen);
+ } else {
+ // length 1 means there is no data, so do nothing.
+ assert(destLen==0);
+ }
+ return;
+ }
+ case TOKU_ZLIB_WITHOUT_CHECKSUM_METHOD: {
+ z_stream strm;
+ strm.next_in = const_cast<Bytef *>(source + 2);
+ strm.avail_in = sourceLen - 2;
+ strm.zalloc = Z_NULL;
+ strm.zfree = Z_NULL;
+ strm.opaque = Z_NULL;
+ char windowBits = source[1];
+ int r = inflateInit2(&strm, windowBits);
+ lazy_assert(r == Z_OK);
+ strm.next_out = dest;
+ strm.avail_out = destLen;
+ r = inflate(&strm, Z_FINISH);
+ lazy_assert(r == Z_STREAM_END);
+ r = inflateEnd(&strm);
+ lazy_assert(r == Z_OK);
+ return;
+ }
+ case TOKU_SNAPPY_METHOD: {
+ bool r = snappy::RawUncompress((char*)source + 1, sourceLen - 1, (char*)dest);
+ assert(r);
+ return;
+ }
+ }
+ // default fall through to error.
+ assert(0);
+}