diff options
Diffstat (limited to 'src/third_party/zstandard-1.4.3/zstd/contrib/experimental_dict_builders/randomDictBuilder/random.c')
-rw-r--r-- | src/third_party/zstandard-1.4.3/zstd/contrib/experimental_dict_builders/randomDictBuilder/random.c | 163 |
1 files changed, 163 insertions, 0 deletions
diff --git a/src/third_party/zstandard-1.4.3/zstd/contrib/experimental_dict_builders/randomDictBuilder/random.c b/src/third_party/zstandard-1.4.3/zstd/contrib/experimental_dict_builders/randomDictBuilder/random.c new file mode 100644 index 00000000000..5276bea96a5 --- /dev/null +++ b/src/third_party/zstandard-1.4.3/zstd/contrib/experimental_dict_builders/randomDictBuilder/random.c @@ -0,0 +1,163 @@ +/*-************************************* +* Dependencies +***************************************/ +#include <stdio.h> /* fprintf */ +#include <stdlib.h> /* malloc, free, qsort */ +#include <string.h> /* memset */ +#include <time.h> /* clock */ +#include "random.h" +#include "util.h" /* UTIL_getFileSize, UTIL_getTotalFileSize */ +#ifndef ZDICT_STATIC_LINKING_ONLY +#define ZDICT_STATIC_LINKING_ONLY +#endif +#include "zdict.h" + +/*-************************************* +* Console display +***************************************/ +#define DISPLAY(...) fprintf(stderr, __VA_ARGS__) +#define DISPLAYLEVEL(l, ...) if (displayLevel>=l) { DISPLAY(__VA_ARGS__); } + +#define LOCALDISPLAYUPDATE(displayLevel, l, ...) \ + if (displayLevel >= l) { \ + if ((clock() - g_time > refreshRate) || (displayLevel >= 4)) { \ + g_time = clock(); \ + DISPLAY(__VA_ARGS__); \ + } \ + } +#define DISPLAYUPDATE(l, ...) LOCALDISPLAYUPDATE(displayLevel, l, __VA_ARGS__) +static const clock_t refreshRate = CLOCKS_PER_SEC * 15 / 100; +static clock_t g_time = 0; + + + +/* ******************************************************** +* Random Dictionary Builder +**********************************************************/ +/** + * Returns the sum of the sample sizes. + */ +static size_t RANDOM_sum(const size_t *samplesSizes, unsigned nbSamples) { + size_t sum = 0; + unsigned i; + for (i = 0; i < nbSamples; ++i) { + sum += samplesSizes[i]; + } + return sum; +} + + +/** + * A segment is an inclusive range in the source. + */ +typedef struct { + U32 begin; + U32 end; +} RANDOM_segment_t; + + +/** + * Selects a random segment from totalSamplesSize - k + 1 possible segments + */ +static RANDOM_segment_t RANDOM_selectSegment(const size_t totalSamplesSize, + ZDICT_random_params_t parameters) { + const U32 k = parameters.k; + RANDOM_segment_t segment; + unsigned index; + + /* Randomly generate a number from 0 to sampleSizes - k */ + index = rand()%(totalSamplesSize - k + 1); + + /* inclusive */ + segment.begin = index; + segment.end = index + k - 1; + + return segment; +} + + +/** + * Check the validity of the parameters. + * Returns non-zero if the parameters are valid and 0 otherwise. + */ +static int RANDOM_checkParameters(ZDICT_random_params_t parameters, + size_t maxDictSize) { + /* k is a required parameter */ + if (parameters.k == 0) { + return 0; + } + /* k <= maxDictSize */ + if (parameters.k > maxDictSize) { + return 0; + } + return 1; +} + + +/** + * Given the prepared context build the dictionary. + */ +static size_t RANDOM_buildDictionary(const size_t totalSamplesSize, const BYTE *samples, + void *dictBuffer, size_t dictBufferCapacity, + ZDICT_random_params_t parameters) { + BYTE *const dict = (BYTE *)dictBuffer; + size_t tail = dictBufferCapacity; + const int displayLevel = parameters.zParams.notificationLevel; + while (tail > 0) { + + /* Select a segment */ + RANDOM_segment_t segment = RANDOM_selectSegment(totalSamplesSize, parameters); + + size_t segmentSize; + segmentSize = MIN(segment.end - segment.begin + 1, tail); + + tail -= segmentSize; + memcpy(dict + tail, samples + segment.begin, segmentSize); + DISPLAYUPDATE( + 2, "\r%u%% ", + (U32)(((dictBufferCapacity - tail) * 100) / dictBufferCapacity)); + } + + return tail; +} + + + + +ZDICTLIB_API size_t ZDICT_trainFromBuffer_random( + void *dictBuffer, size_t dictBufferCapacity, + const void *samplesBuffer, const size_t *samplesSizes, unsigned nbSamples, + ZDICT_random_params_t parameters) { + const int displayLevel = parameters.zParams.notificationLevel; + BYTE* const dict = (BYTE*)dictBuffer; + /* Checks */ + if (!RANDOM_checkParameters(parameters, dictBufferCapacity)) { + DISPLAYLEVEL(1, "k is incorrect\n"); + return ERROR(GENERIC); + } + if (nbSamples == 0) { + DISPLAYLEVEL(1, "Random must have at least one input file\n"); + return ERROR(GENERIC); + } + if (dictBufferCapacity < ZDICT_DICTSIZE_MIN) { + DISPLAYLEVEL(1, "dictBufferCapacity must be at least %u\n", + ZDICT_DICTSIZE_MIN); + return ERROR(dstSize_tooSmall); + } + const size_t totalSamplesSize = RANDOM_sum(samplesSizes, nbSamples); + const BYTE *const samples = (const BYTE *)samplesBuffer; + + DISPLAYLEVEL(2, "Building dictionary\n"); + { + const size_t tail = RANDOM_buildDictionary(totalSamplesSize, samples, + dictBuffer, dictBufferCapacity, parameters); + const size_t dictSize = ZDICT_finalizeDictionary( + dict, dictBufferCapacity, dict + tail, dictBufferCapacity - tail, + samplesBuffer, samplesSizes, nbSamples, parameters.zParams); + if (!ZSTD_isError(dictSize)) { + DISPLAYLEVEL(2, "Constructed dictionary of size %u\n", + (U32)dictSize); + } + return dictSize; + } +} |