summaryrefslogtreecommitdiff
path: root/Utilities/cmzstd/lib/compress/zstd_compress_internal.h
diff options
context:
space:
mode:
Diffstat (limited to 'Utilities/cmzstd/lib/compress/zstd_compress_internal.h')
-rw-r--r--Utilities/cmzstd/lib/compress/zstd_compress_internal.h395
1 files changed, 330 insertions, 65 deletions
diff --git a/Utilities/cmzstd/lib/compress/zstd_compress_internal.h b/Utilities/cmzstd/lib/compress/zstd_compress_internal.h
index 29bca59859..db73f6ce21 100644
--- a/Utilities/cmzstd/lib/compress/zstd_compress_internal.h
+++ b/Utilities/cmzstd/lib/compress/zstd_compress_internal.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
+ * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
@@ -18,7 +18,8 @@
/*-*************************************
* Dependencies
***************************************/
-#include "zstd_internal.h"
+#include "../common/zstd_internal.h"
+#include "zstd_cwksp.h"
#ifdef ZSTD_MULTITHREAD
# include "zstdmt_compress.h"
#endif
@@ -33,13 +34,13 @@ extern "C" {
***************************************/
#define kSearchStrength 8
#define HASH_READ_SIZE 8
-#define ZSTD_DUBT_UNSORTED_MARK 1 /* For btlazy2 strategy, index 1 now means "unsorted".
+#define ZSTD_DUBT_UNSORTED_MARK 1 /* For btlazy2 strategy, index ZSTD_DUBT_UNSORTED_MARK==1 means "unsorted".
It could be confused for a real successor at index "1", if sorted as larger than its predecessor.
It's not a big deal though : candidate will just be sorted again.
- Additionnally, candidate position 1 will be lost.
+ Additionally, candidate position 1 will be lost.
But candidate 1 cannot hide a large tree of candidates, so it's a minimal loss.
- The benefit is that ZSTD_DUBT_UNSORTED_MARK cannot be misdhandled after table re-use with a different strategy
- Constant required by ZSTD_compressBlock_btlazy2() and ZSTD_reduceTable_internal() */
+ The benefit is that ZSTD_DUBT_UNSORTED_MARK cannot be mishandled after table re-use with a different strategy.
+ This constant is required by ZSTD_compressBlock_btlazy2() and ZSTD_reduceTable_internal() */
/*-*************************************
@@ -55,6 +56,14 @@ typedef struct ZSTD_prefixDict_s {
} ZSTD_prefixDict;
typedef struct {
+ void* dictBuffer;
+ void const* dict;
+ size_t dictSize;
+ ZSTD_dictContentType_e dictContentType;
+ ZSTD_CDict* cdict;
+} ZSTD_localDict;
+
+typedef struct {
U32 CTable[HUF_CTABLE_SIZE_U32(255)];
HUF_repeat repeatMode;
} ZSTD_hufCTables_t;
@@ -107,6 +116,7 @@ typedef struct {
U32 offCodeSumBasePrice; /* to compare to log2(offreq) */
ZSTD_OptPrice_e priceType; /* prices can be determined dynamically, or follow a pre-defined cost structure */
const ZSTD_entropyCTables_t* symbolCosts; /* pre-calculated dictionary statistics */
+ ZSTD_literalCompressionMode_e literalCompressionMode;
} optState_t;
typedef struct {
@@ -119,21 +129,26 @@ typedef struct {
BYTE const* base; /* All regular indexes relative to this position */
BYTE const* dictBase; /* extDict indexes relative to this position */
U32 dictLimit; /* below that point, need extDict */
- U32 lowLimit; /* below that point, no more data */
+ U32 lowLimit; /* below that point, no more valid data */
} ZSTD_window_t;
typedef struct ZSTD_matchState_t ZSTD_matchState_t;
struct ZSTD_matchState_t {
ZSTD_window_t window; /* State for window round buffer management */
- U32 loadedDictEnd; /* index of end of dictionary */
+ U32 loadedDictEnd; /* index of end of dictionary, within context's referential.
+ * When loadedDictEnd != 0, a dictionary is in use, and still valid.
+ * This relies on a mechanism to set loadedDictEnd=0 when dictionary is no longer within distance.
+ * Such mechanism is provided within ZSTD_window_enforceMaxDist() and ZSTD_checkDictValidity().
+ * When dict referential is copied into active context (i.e. not attached),
+ * loadedDictEnd == dictSize, since referential starts from zero.
+ */
U32 nextToUpdate; /* index from which to continue table update */
- U32 nextToUpdate3; /* index from which to continue table update */
- U32 hashLog3; /* dispatch table : larger == faster, more memory */
+ U32 hashLog3; /* dispatch table for matches of len==3 : larger == faster, more memory */
U32* hashTable;
U32* hashTable3;
U32* chainTable;
optState_t opt; /* optimal parser state */
- const ZSTD_matchState_t * dictMatchState;
+ const ZSTD_matchState_t* dictMatchState;
ZSTD_compressionParameters cParams;
};
@@ -151,6 +166,7 @@ typedef struct {
typedef struct {
ZSTD_window_t window; /* State for the window round buffer management */
ldmEntry_t* hashTable;
+ U32 loadedDictEnd;
BYTE* bucketOffsets; /* Next position in bucket to insert entry */
U64 hashPower; /* Used to compute the rolling hash.
* Depends on ldmParams.minMatchLength */
@@ -178,6 +194,13 @@ typedef struct {
size_t capacity; /* The capacity starting from `seq` pointer */
} rawSeqStore_t;
+typedef struct {
+ int collectSequences;
+ ZSTD_Sequence* seqStart;
+ size_t seqIndex;
+ size_t maxSequences;
+} SeqCollector;
+
struct ZSTD_CCtx_params_s {
ZSTD_format_e format;
ZSTD_compressionParameters cParams;
@@ -186,8 +209,15 @@ struct ZSTD_CCtx_params_s {
int compressionLevel;
int forceWindow; /* force back-references to respect limit of
* 1<<wLog, even for dictionary */
+ size_t targetCBlockSize; /* Tries to fit compressed block size to be around targetCBlockSize.
+ * No target when targetCBlockSize == 0.
+ * There is no guarantee on compressed block size */
+ int srcSizeHint; /* User's best guess of source size.
+ * Hint is not valid when srcSizeHint == 0.
+ * There is no guarantee that hint is close to actual source size */
ZSTD_dictAttachPref_e attachDictPref;
+ ZSTD_literalCompressionMode_e literalCompressionMode;
/* Multithreading: used to pass parameters to mtctx */
int nbWorkers;
@@ -210,9 +240,7 @@ struct ZSTD_CCtx_s {
ZSTD_CCtx_params appliedParams;
U32 dictID;
- int workSpaceOversizedDuration;
- void* workSpace;
- size_t workSpaceSize;
+ ZSTD_cwksp workspace; /* manages buffer for dynamic allocations */
size_t blockSize;
unsigned long long pledgedSrcSizePlusOne; /* this way, 0 (default) == unknown */
unsigned long long consumedSrcSize;
@@ -220,6 +248,9 @@ struct ZSTD_CCtx_s {
XXH64_state_t xxhState;
ZSTD_customMem customMem;
size_t staticSize;
+ SeqCollector seqCollector;
+ int isFirstBlock;
+ int initialized;
seqStore_t seqStore; /* sequences storage ptrs */
ldmState_t ldmState; /* long distance matching state */
@@ -243,7 +274,7 @@ struct ZSTD_CCtx_s {
U32 frameEnded;
/* Dictionary */
- ZSTD_CDict* cdictLocal;
+ ZSTD_localDict localDict;
const ZSTD_CDict* cdict;
ZSTD_prefixDict prefixDict; /* single-usage dictionary */
@@ -295,26 +326,145 @@ MEM_STATIC U32 ZSTD_MLcode(U32 mlBase)
return (mlBase > 127) ? ZSTD_highbit32(mlBase) + ML_deltaCode : ML_Code[mlBase];
}
+typedef struct repcodes_s {
+ U32 rep[3];
+} repcodes_t;
+
+MEM_STATIC repcodes_t ZSTD_updateRep(U32 const rep[3], U32 const offset, U32 const ll0)
+{
+ repcodes_t newReps;
+ if (offset >= ZSTD_REP_NUM) { /* full offset */
+ newReps.rep[2] = rep[1];
+ newReps.rep[1] = rep[0];
+ newReps.rep[0] = offset - ZSTD_REP_MOVE;
+ } else { /* repcode */
+ U32 const repCode = offset + ll0;
+ if (repCode > 0) { /* note : if repCode==0, no change */
+ U32 const currentOffset = (repCode==ZSTD_REP_NUM) ? (rep[0] - 1) : rep[repCode];
+ newReps.rep[2] = (repCode >= 2) ? rep[1] : rep[2];
+ newReps.rep[1] = rep[0];
+ newReps.rep[0] = currentOffset;
+ } else { /* repCode == 0 */
+ memcpy(&newReps, rep, sizeof(newReps));
+ }
+ }
+ return newReps;
+}
+
+/* ZSTD_cParam_withinBounds:
+ * @return 1 if value is within cParam bounds,
+ * 0 otherwise */
+MEM_STATIC int ZSTD_cParam_withinBounds(ZSTD_cParameter cParam, int value)
+{
+ ZSTD_bounds const bounds = ZSTD_cParam_getBounds(cParam);
+ if (ZSTD_isError(bounds.error)) return 0;
+ if (value < bounds.lowerBound) return 0;
+ if (value > bounds.upperBound) return 0;
+ return 1;
+}
+
+/* ZSTD_noCompressBlock() :
+ * Writes uncompressed block to dst buffer from given src.
+ * Returns the size of the block */
+MEM_STATIC size_t ZSTD_noCompressBlock (void* dst, size_t dstCapacity, const void* src, size_t srcSize, U32 lastBlock)
+{
+ U32 const cBlockHeader24 = lastBlock + (((U32)bt_raw)<<1) + (U32)(srcSize << 3);
+ RETURN_ERROR_IF(srcSize + ZSTD_blockHeaderSize > dstCapacity,
+ dstSize_tooSmall, "dst buf too small for uncompressed block");
+ MEM_writeLE24(dst, cBlockHeader24);
+ memcpy((BYTE*)dst + ZSTD_blockHeaderSize, src, srcSize);
+ return ZSTD_blockHeaderSize + srcSize;
+}
+
+MEM_STATIC size_t ZSTD_rleCompressBlock (void* dst, size_t dstCapacity, BYTE src, size_t srcSize, U32 lastBlock)
+{
+ BYTE* const op = (BYTE*)dst;
+ U32 const cBlockHeader = lastBlock + (((U32)bt_rle)<<1) + (U32)(srcSize << 3);
+ RETURN_ERROR_IF(dstCapacity < 4, dstSize_tooSmall, "");
+ MEM_writeLE24(op, cBlockHeader);
+ op[3] = src;
+ return 4;
+}
+
+
+/* ZSTD_minGain() :
+ * minimum compression required
+ * to generate a compress block or a compressed literals section.
+ * note : use same formula for both situations */
+MEM_STATIC size_t ZSTD_minGain(size_t srcSize, ZSTD_strategy strat)
+{
+ U32 const minlog = (strat>=ZSTD_btultra) ? (U32)(strat) - 1 : 6;
+ ZSTD_STATIC_ASSERT(ZSTD_btultra == 8);
+ assert(ZSTD_cParam_withinBounds(ZSTD_c_strategy, strat));
+ return (srcSize >> minlog) + 2;
+}
+
+MEM_STATIC int ZSTD_disableLiteralsCompression(const ZSTD_CCtx_params* cctxParams)
+{
+ switch (cctxParams->literalCompressionMode) {
+ case ZSTD_lcm_huffman:
+ return 0;
+ case ZSTD_lcm_uncompressed:
+ return 1;
+ default:
+ assert(0 /* impossible: pre-validated */);
+ /* fall-through */
+ case ZSTD_lcm_auto:
+ return (cctxParams->cParams.strategy == ZSTD_fast) && (cctxParams->cParams.targetLength > 0);
+ }
+}
+
+/*! ZSTD_safecopyLiterals() :
+ * memcpy() function that won't read beyond more than WILDCOPY_OVERLENGTH bytes past ilimit_w.
+ * Only called when the sequence ends past ilimit_w, so it only needs to be optimized for single
+ * large copies.
+ */
+static void ZSTD_safecopyLiterals(BYTE* op, BYTE const* ip, BYTE const* const iend, BYTE const* ilimit_w) {
+ assert(iend > ilimit_w);
+ if (ip <= ilimit_w) {
+ ZSTD_wildcopy(op, ip, ilimit_w - ip, ZSTD_no_overlap);
+ op += ilimit_w - ip;
+ ip = ilimit_w;
+ }
+ while (ip < iend) *op++ = *ip++;
+}
+
/*! ZSTD_storeSeq() :
- * Store a sequence (literal length, literals, offset code and match length code) into seqStore_t.
- * `offsetCode` : distance to match + 3 (values 1-3 are repCodes).
+ * Store a sequence (litlen, litPtr, offCode and mlBase) into seqStore_t.
+ * `offCode` : distance to match + ZSTD_REP_MOVE (values <= ZSTD_REP_MOVE are repCodes).
* `mlBase` : matchLength - MINMATCH
+ * Allowed to overread literals up to litLimit.
*/
-MEM_STATIC void ZSTD_storeSeq(seqStore_t* seqStorePtr, size_t litLength, const void* literals, U32 offsetCode, size_t mlBase)
+HINT_INLINE UNUSED_ATTR
+void ZSTD_storeSeq(seqStore_t* seqStorePtr, size_t litLength, const BYTE* literals, const BYTE* litLimit, U32 offCode, size_t mlBase)
{
+ BYTE const* const litLimit_w = litLimit - WILDCOPY_OVERLENGTH;
+ BYTE const* const litEnd = literals + litLength;
#if defined(DEBUGLEVEL) && (DEBUGLEVEL >= 6)
static const BYTE* g_start = NULL;
if (g_start==NULL) g_start = (const BYTE*)literals; /* note : index only works for compression within a single segment */
{ U32 const pos = (U32)((const BYTE*)literals - g_start);
DEBUGLOG(6, "Cpos%7u :%3u literals, match%4u bytes at offCode%7u",
- pos, (U32)litLength, (U32)mlBase+MINMATCH, (U32)offsetCode);
+ pos, (U32)litLength, (U32)mlBase+MINMATCH, (U32)offCode);
}
#endif
assert((size_t)(seqStorePtr->sequences - seqStorePtr->sequencesStart) < seqStorePtr->maxNbSeq);
/* copy Literals */
assert(seqStorePtr->maxNbLit <= 128 KB);
assert(seqStorePtr->lit + litLength <= seqStorePtr->litStart + seqStorePtr->maxNbLit);
- ZSTD_wildcopy(seqStorePtr->lit, literals, litLength);
+ assert(literals + litLength <= litLimit);
+ if (litEnd <= litLimit_w) {
+ /* Common case we can use wildcopy.
+ * First copy 16 bytes, because literals are likely short.
+ */
+ assert(WILDCOPY_OVERLENGTH >= 16);
+ ZSTD_copy16(seqStorePtr->lit, literals);
+ if (litLength > 16) {
+ ZSTD_wildcopy(seqStorePtr->lit+16, literals+16, (ptrdiff_t)litLength-16, ZSTD_no_overlap);
+ }
+ } else {
+ ZSTD_safecopyLiterals(seqStorePtr->lit, literals, litEnd, litLimit_w);
+ }
seqStorePtr->lit += litLength;
/* literal Length */
@@ -326,7 +476,7 @@ MEM_STATIC void ZSTD_storeSeq(seqStore_t* seqStorePtr, size_t litLength, const v
seqStorePtr->sequences[0].litLength = (U16)litLength;
/* match offset */
- seqStorePtr->sequences[0].offset = offsetCode + 1;
+ seqStorePtr->sequences[0].offset = offCode + 1;
/* match Length */
if (mlBase>0xFFFF) {
@@ -349,8 +499,7 @@ static unsigned ZSTD_NbCommonBytes (size_t val)
if (MEM_64bits()) {
# if defined(_MSC_VER) && defined(_WIN64)
unsigned long r = 0;
- _BitScanForward64( &r, (U64)val );
- return (unsigned)(r>>3);
+ return _BitScanForward64( &r, (U64)val ) ? (unsigned)(r >> 3) : 0;
# elif defined(__GNUC__) && (__GNUC__ >= 4)
return (__builtin_ctzll((U64)val) >> 3);
# else
@@ -367,8 +516,7 @@ static unsigned ZSTD_NbCommonBytes (size_t val)
} else { /* 32 bits */
# if defined(_MSC_VER)
unsigned long r=0;
- _BitScanForward( &r, (U32)val );
- return (unsigned)(r>>3);
+ return _BitScanForward( &r, (U32)val ) ? (unsigned)(r >> 3) : 0;
# elif defined(__GNUC__) && (__GNUC__ >= 3)
return (__builtin_ctz((U32)val) >> 3);
# else
@@ -383,8 +531,7 @@ static unsigned ZSTD_NbCommonBytes (size_t val)
if (MEM_64bits()) {
# if defined(_MSC_VER) && defined(_WIN64)
unsigned long r = 0;
- _BitScanReverse64( &r, val );
- return (unsigned)(r>>3);
+ return _BitScanReverse64( &r, val ) ? (unsigned)(r >> 3) : 0;
# elif defined(__GNUC__) && (__GNUC__ >= 4)
return (__builtin_clzll(val) >> 3);
# else
@@ -398,8 +545,7 @@ static unsigned ZSTD_NbCommonBytes (size_t val)
} else { /* 32 bits */
# if defined(_MSC_VER)
unsigned long r = 0;
- _BitScanReverse( &r, (unsigned long)val );
- return (unsigned)(r>>3);
+ return _BitScanReverse( &r, (unsigned long)val ) ? (unsigned)(r >> 3) : 0;
# elif defined(__GNUC__) && (__GNUC__ >= 3)
return (__builtin_clz((U32)val) >> 3);
# else
@@ -554,6 +700,9 @@ MEM_STATIC U64 ZSTD_rollingHash_rotate(U64 hash, BYTE toRemove, BYTE toAdd, U64
/*-*************************************
* Round buffer management
***************************************/
+#if (ZSTD_WINDOWLOG_MAX_64 > 31)
+# error "ZSTD_WINDOWLOG_MAX is too large : would overflow ZSTD_CURRENT_MAX"
+#endif
/* Max current allowed */
#define ZSTD_CURRENT_MAX ((3U << 29) + (1U << ZSTD_WINDOWLOG_MAX))
/* Maximum chunk size before overflow correction needs to be called again */
@@ -643,7 +792,10 @@ MEM_STATIC U32 ZSTD_window_correctOverflow(ZSTD_window_t* window, U32 cycleLog,
*/
U32 const cycleMask = (1U << cycleLog) - 1;
U32 const current = (U32)((BYTE const*)src - window->base);
- U32 const newCurrent = (current & cycleMask) + maxDist;
+ U32 const currentCycle0 = current & cycleMask;
+ /* Exclude zero so that newCurrent - maxDist >= 1. */
+ U32 const currentCycle1 = currentCycle0 == 0 ? (1U << cycleLog) : currentCycle0;
+ U32 const newCurrent = currentCycle1 + maxDist;
U32 const correction = current - newCurrent;
assert((maxDist & cycleMask) == 0);
assert(current > newCurrent);
@@ -652,8 +804,17 @@ MEM_STATIC U32 ZSTD_window_correctOverflow(ZSTD_window_t* window, U32 cycleLog,
window->base += correction;
window->dictBase += correction;
- window->lowLimit -= correction;
- window->dictLimit -= correction;
+ if (window->lowLimit <= correction) window->lowLimit = 1;
+ else window->lowLimit -= correction;
+ if (window->dictLimit <= correction) window->dictLimit = 1;
+ else window->dictLimit -= correction;
+
+ /* Ensure we can still reference the full window. */
+ assert(newCurrent >= maxDist);
+ assert(newCurrent - maxDist >= 1);
+ /* Ensure that lowLimit and dictLimit didn't underflow. */
+ assert(window->lowLimit <= newCurrent);
+ assert(window->dictLimit <= newCurrent);
DEBUGLOG(4, "Correction of 0x%x bytes to lowLimit=0x%x", correction,
window->lowLimit);
@@ -665,31 +826,49 @@ MEM_STATIC U32 ZSTD_window_correctOverflow(ZSTD_window_t* window, U32 cycleLog,
* Updates lowLimit so that:
* (srcEnd - base) - lowLimit == maxDist + loadedDictEnd
*
- * This allows a simple check that index >= lowLimit to see if index is valid.
- * This must be called before a block compression call, with srcEnd as the block
- * source end.
+ * It ensures index is valid as long as index >= lowLimit.
+ * This must be called before a block compression call.
+ *
+ * loadedDictEnd is only defined if a dictionary is in use for current compression.
+ * As the name implies, loadedDictEnd represents the index at end of dictionary.
+ * The value lies within context's referential, it can be directly compared to blockEndIdx.
*
- * If loadedDictEndPtr is not NULL, we set it to zero once we update lowLimit.
- * This is because dictionaries are allowed to be referenced as long as the last
- * byte of the dictionary is in the window, but once they are out of range,
- * they cannot be referenced. If loadedDictEndPtr is NULL, we use
- * loadedDictEnd == 0.
+ * If loadedDictEndPtr is NULL, no dictionary is in use, and we use loadedDictEnd == 0.
+ * If loadedDictEndPtr is not NULL, we set it to zero after updating lowLimit.
+ * This is because dictionaries are allowed to be referenced fully
+ * as long as the last byte of the dictionary is in the window.
+ * Once input has progressed beyond window size, dictionary cannot be referenced anymore.
*
- * In normal dict mode, the dict is between lowLimit and dictLimit. In
- * dictMatchState mode, lowLimit and dictLimit are the same, and the dictionary
- * is below them. forceWindow and dictMatchState are therefore incompatible.
+ * In normal dict mode, the dictionary lies between lowLimit and dictLimit.
+ * In dictMatchState mode, lowLimit and dictLimit are the same,
+ * and the dictionary is below them.
+ * forceWindow and dictMatchState are therefore incompatible.
*/
MEM_STATIC void
ZSTD_window_enforceMaxDist(ZSTD_window_t* window,
- void const* srcEnd,
- U32 maxDist,
- U32* loadedDictEndPtr,
+ const void* blockEnd,
+ U32 maxDist,
+ U32* loadedDictEndPtr,
const ZSTD_matchState_t** dictMatchStatePtr)
{
- U32 const blockEndIdx = (U32)((BYTE const*)srcEnd - window->base);
- U32 loadedDictEnd = (loadedDictEndPtr != NULL) ? *loadedDictEndPtr : 0;
- DEBUGLOG(5, "ZSTD_window_enforceMaxDist: blockEndIdx=%u, maxDist=%u",
- (unsigned)blockEndIdx, (unsigned)maxDist);
+ U32 const blockEndIdx = (U32)((BYTE const*)blockEnd - window->base);
+ U32 const loadedDictEnd = (loadedDictEndPtr != NULL) ? *loadedDictEndPtr : 0;
+ DEBUGLOG(5, "ZSTD_window_enforceMaxDist: blockEndIdx=%u, maxDist=%u, loadedDictEnd=%u",
+ (unsigned)blockEndIdx, (unsigned)maxDist, (unsigned)loadedDictEnd);
+
+ /* - When there is no dictionary : loadedDictEnd == 0.
+ In which case, the test (blockEndIdx > maxDist) is merely to avoid
+ overflowing next operation `newLowLimit = blockEndIdx - maxDist`.
+ - When there is a standard dictionary :
+ Index referential is copied from the dictionary,
+ which means it starts from 0.
+ In which case, loadedDictEnd == dictSize,
+ and it makes sense to compare `blockEndIdx > maxDist + dictSize`
+ since `blockEndIdx` also starts from zero.
+ - When there is an attached dictionary :
+ loadedDictEnd is expressed within the referential of the context,
+ so it can be directly compared against blockEndIdx.
+ */
if (blockEndIdx > maxDist + loadedDictEnd) {
U32 const newLowLimit = blockEndIdx - maxDist;
if (window->lowLimit < newLowLimit) window->lowLimit = newLowLimit;
@@ -698,11 +877,54 @@ ZSTD_window_enforceMaxDist(ZSTD_window_t* window,
(unsigned)window->dictLimit, (unsigned)window->lowLimit);
window->dictLimit = window->lowLimit;
}
- if (loadedDictEndPtr)
+ /* On reaching window size, dictionaries are invalidated */
+ if (loadedDictEndPtr) *loadedDictEndPtr = 0;
+ if (dictMatchStatePtr) *dictMatchStatePtr = NULL;
+ }
+}
+
+/* Similar to ZSTD_window_enforceMaxDist(),
+ * but only invalidates dictionary
+ * when input progresses beyond window size.
+ * assumption : loadedDictEndPtr and dictMatchStatePtr are valid (non NULL)
+ * loadedDictEnd uses same referential as window->base
+ * maxDist is the window size */
+MEM_STATIC void
+ZSTD_checkDictValidity(const ZSTD_window_t* window,
+ const void* blockEnd,
+ U32 maxDist,
+ U32* loadedDictEndPtr,
+ const ZSTD_matchState_t** dictMatchStatePtr)
+{
+ assert(loadedDictEndPtr != NULL);
+ assert(dictMatchStatePtr != NULL);
+ { U32 const blockEndIdx = (U32)((BYTE const*)blockEnd - window->base);
+ U32 const loadedDictEnd = *loadedDictEndPtr;
+ DEBUGLOG(5, "ZSTD_checkDictValidity: blockEndIdx=%u, maxDist=%u, loadedDictEnd=%u",
+ (unsigned)blockEndIdx, (unsigned)maxDist, (unsigned)loadedDictEnd);
+ assert(blockEndIdx >= loadedDictEnd);
+
+ if (blockEndIdx > loadedDictEnd + maxDist) {
+ /* On reaching window size, dictionaries are invalidated.
+ * For simplification, if window size is reached anywhere within next block,
+ * the dictionary is invalidated for the full block.
+ */
+ DEBUGLOG(6, "invalidating dictionary for current block (distance > windowSize)");
*loadedDictEndPtr = 0;
- if (dictMatchStatePtr)
*dictMatchStatePtr = NULL;
- }
+ } else {
+ if (*loadedDictEndPtr != 0) {
+ DEBUGLOG(6, "dictionary considered valid for current block");
+ } } }
+}
+
+MEM_STATIC void ZSTD_window_init(ZSTD_window_t* window) {
+ memset(window, 0, sizeof(*window));
+ window->base = (BYTE const*)"";
+ window->dictBase = (BYTE const*)"";
+ window->dictLimit = 1; /* start from 1, so that 1st position is valid */
+ window->lowLimit = 1; /* it ensures first and later CCtx usages compress the same */
+ window->nextSrc = window->base + 1; /* see issue #1241 */
}
/**
@@ -718,6 +940,10 @@ MEM_STATIC U32 ZSTD_window_update(ZSTD_window_t* window,
BYTE const* const ip = (BYTE const*)src;
U32 contiguous = 1;
DEBUGLOG(5, "ZSTD_window_update");
+ if (srcSize == 0)
+ return contiguous;
+ assert(window->base != NULL);
+ assert(window->dictBase != NULL);
/* Check if blocks follow each other */
if (src != window->nextSrc) {
/* not contiguous */
@@ -728,7 +954,7 @@ MEM_STATIC U32 ZSTD_window_update(ZSTD_window_t* window,
window->dictLimit = (U32)distanceFromBase;
window->dictBase = window->base;
window->base = ip - distanceFromBase;
- // ms->nextToUpdate = window->dictLimit;
+ /* ms->nextToUpdate = window->dictLimit; */
if (window->dictLimit - window->lowLimit < HASH_READ_SIZE) window->lowLimit = window->dictLimit; /* too small extDict */
contiguous = 0;
}
@@ -744,6 +970,33 @@ MEM_STATIC U32 ZSTD_window_update(ZSTD_window_t* window,
return contiguous;
}
+/**
+ * Returns the lowest allowed match index. It may either be in the ext-dict or the prefix.
+ */
+MEM_STATIC U32 ZSTD_getLowestMatchIndex(const ZSTD_matchState_t* ms, U32 current, unsigned windowLog)
+{
+ U32 const maxDistance = 1U << windowLog;
+ U32 const lowestValid = ms->window.lowLimit;
+ U32 const withinWindow = (current - lowestValid > maxDistance) ? current - maxDistance : lowestValid;
+ U32 const isDictionary = (ms->loadedDictEnd != 0);
+ U32 const matchLowest = isDictionary ? lowestValid : withinWindow;
+ return matchLowest;
+}
+
+/**
+ * Returns the lowest allowed match index in the prefix.
+ */
+MEM_STATIC U32 ZSTD_getLowestPrefixIndex(const ZSTD_matchState_t* ms, U32 current, unsigned windowLog)
+{
+ U32 const maxDistance = 1U << windowLog;
+ U32 const lowestValid = ms->window.dictLimit;
+ U32 const withinWindow = (current - lowestValid > maxDistance) ? current - maxDistance : lowestValid;
+ U32 const isDictionary = (ms->loadedDictEnd != 0);
+ U32 const matchLowest = isDictionary ? lowestValid : withinWindow;
+ return matchLowest;
+}
+
+
/* debug functions */
#if (DEBUGLEVEL>=2)
@@ -781,6 +1034,21 @@ MEM_STATIC void ZSTD_debugTable(const U32* table, U32 max)
}
#endif
+/* ===============================================================
+ * Shared internal declarations
+ * These prototypes may be called from sources not in lib/compress
+ * =============================================================== */
+
+/* ZSTD_loadCEntropy() :
+ * dict : must point at beginning of a valid zstd dictionary.
+ * return : size of dictionary header (size of magic number + dict ID + entropy tables)
+ * assumptions : magic number supposed already checked
+ * and dictSize >= 8 */
+size_t ZSTD_loadCEntropy(ZSTD_compressedBlockState_t* bs, void* workspace,
+ short* offcodeNCount, unsigned* offcodeMaxValue,
+ const void* const dict, size_t dictSize);
+
+void ZSTD_reset_compressedBlockState(ZSTD_compressedBlockState_t* bs);
/* ==============================================================
* Private declarations
@@ -790,6 +1058,7 @@ MEM_STATIC void ZSTD_debugTable(const U32* table, U32 max)
/* ZSTD_getCParamsFromCCtxParams() :
* cParams are built depending on compressionLevel, src size hints,
* LDM and manually set compression parameters.
+ * Note: srcSizeHint == 0 means 0!
*/
ZSTD_compressionParameters ZSTD_getCParamsFromCCtxParams(
const ZSTD_CCtx_params* CCtxParams, U64 srcSizeHint, size_t dictSize);
@@ -802,17 +1071,10 @@ ZSTD_compressionParameters ZSTD_getCParamsFromCCtxParams(
size_t ZSTD_initCStream_internal(ZSTD_CStream* zcs,
const void* dict, size_t dictSize,
const ZSTD_CDict* cdict,
- ZSTD_CCtx_params params, unsigned long long pledgedSrcSize);
+ const ZSTD_CCtx_params* params, unsigned long long pledgedSrcSize);
void ZSTD_resetSeqStore(seqStore_t* ssPtr);
-/*! ZSTD_compressStream_generic() :
- * Private use only. To be called from zstdmt_compress.c in single-thread mode. */
-size_t ZSTD_compressStream_generic(ZSTD_CStream* zcs,
- ZSTD_outBuffer* output,
- ZSTD_inBuffer* input,
- ZSTD_EndDirective const flushMode);
-
/*! ZSTD_getCParamsFromCDict() :
* as the name implies */
ZSTD_compressionParameters ZSTD_getCParamsFromCDict(const ZSTD_CDict* cdict);
@@ -824,7 +1086,7 @@ size_t ZSTD_compressBegin_advanced_internal(ZSTD_CCtx* cctx,
ZSTD_dictContentType_e dictContentType,
ZSTD_dictTableLoadMethod_e dtlm,
const ZSTD_CDict* cdict,
- ZSTD_CCtx_params params,
+ const ZSTD_CCtx_params* params,
unsigned long long pledgedSrcSize);
/* ZSTD_compress_advanced_internal() :
@@ -833,13 +1095,13 @@ size_t ZSTD_compress_advanced_internal(ZSTD_CCtx* cctx,
void* dst, size_t dstCapacity,
const void* src, size_t srcSize,
const void* dict,size_t dictSize,
- ZSTD_CCtx_params params);
+ const ZSTD_CCtx_params* params);
/* ZSTD_writeLastEmptyBlock() :
* output an empty Block with end-of-frame mark to complete a frame
* @return : size of data written into `dst` (== ZSTD_blockHeaderSize (defined in zstd_internal.h))
- * or an error code if `dstCapcity` is too small (<ZSTD_blockHeaderSize)
+ * or an error code if `dstCapacity` is too small (<ZSTD_blockHeaderSize)
*/
size_t ZSTD_writeLastEmptyBlock(void* dst, size_t dstCapacity);
@@ -856,5 +1118,8 @@ size_t ZSTD_writeLastEmptyBlock(void* dst, size_t dstCapacity);
*/
size_t ZSTD_referenceExternalSequences(ZSTD_CCtx* cctx, rawSeq* seq, size_t nbSeq);
+/** ZSTD_cycleLog() :
+ * condition for correct operation : hashLog > 1 */
+U32 ZSTD_cycleLog(U32 hashLog, ZSTD_strategy strat);
#endif /* ZSTD_COMPRESS_H */