summaryrefslogtreecommitdiff
path: root/src/third_party/zstandard-1.4.3/zstd/contrib/experimental_dict_builders/randomDictBuilder/random.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/third_party/zstandard-1.4.3/zstd/contrib/experimental_dict_builders/randomDictBuilder/random.c')
-rw-r--r--src/third_party/zstandard-1.4.3/zstd/contrib/experimental_dict_builders/randomDictBuilder/random.c163
1 files changed, 163 insertions, 0 deletions
diff --git a/src/third_party/zstandard-1.4.3/zstd/contrib/experimental_dict_builders/randomDictBuilder/random.c b/src/third_party/zstandard-1.4.3/zstd/contrib/experimental_dict_builders/randomDictBuilder/random.c
new file mode 100644
index 00000000000..5276bea96a5
--- /dev/null
+++ b/src/third_party/zstandard-1.4.3/zstd/contrib/experimental_dict_builders/randomDictBuilder/random.c
@@ -0,0 +1,163 @@
+/*-*************************************
+* Dependencies
+***************************************/
+#include <stdio.h> /* fprintf */
+#include <stdlib.h> /* malloc, free, qsort */
+#include <string.h> /* memset */
+#include <time.h> /* clock */
+#include "random.h"
+#include "util.h" /* UTIL_getFileSize, UTIL_getTotalFileSize */
+#ifndef ZDICT_STATIC_LINKING_ONLY
+#define ZDICT_STATIC_LINKING_ONLY
+#endif
+#include "zdict.h"
+
+/*-*************************************
+* Console display
+***************************************/
+#define DISPLAY(...) fprintf(stderr, __VA_ARGS__)
+#define DISPLAYLEVEL(l, ...) if (displayLevel>=l) { DISPLAY(__VA_ARGS__); }
+
+#define LOCALDISPLAYUPDATE(displayLevel, l, ...) \
+ if (displayLevel >= l) { \
+ if ((clock() - g_time > refreshRate) || (displayLevel >= 4)) { \
+ g_time = clock(); \
+ DISPLAY(__VA_ARGS__); \
+ } \
+ }
+#define DISPLAYUPDATE(l, ...) LOCALDISPLAYUPDATE(displayLevel, l, __VA_ARGS__)
+static const clock_t refreshRate = CLOCKS_PER_SEC * 15 / 100;
+static clock_t g_time = 0;
+
+
+
+/* ********************************************************
+* Random Dictionary Builder
+**********************************************************/
+/**
+ * Returns the sum of the sample sizes.
+ */
+static size_t RANDOM_sum(const size_t *samplesSizes, unsigned nbSamples) {
+ size_t sum = 0;
+ unsigned i;
+ for (i = 0; i < nbSamples; ++i) {
+ sum += samplesSizes[i];
+ }
+ return sum;
+}
+
+
+/**
+ * A segment is an inclusive range in the source.
+ */
+typedef struct {
+ U32 begin;
+ U32 end;
+} RANDOM_segment_t;
+
+
+/**
+ * Selects a random segment from totalSamplesSize - k + 1 possible segments
+ */
+static RANDOM_segment_t RANDOM_selectSegment(const size_t totalSamplesSize,
+ ZDICT_random_params_t parameters) {
+ const U32 k = parameters.k;
+ RANDOM_segment_t segment;
+ unsigned index;
+
+ /* Randomly generate a number from 0 to sampleSizes - k */
+ index = rand()%(totalSamplesSize - k + 1);
+
+ /* inclusive */
+ segment.begin = index;
+ segment.end = index + k - 1;
+
+ return segment;
+}
+
+
+/**
+ * Check the validity of the parameters.
+ * Returns non-zero if the parameters are valid and 0 otherwise.
+ */
+static int RANDOM_checkParameters(ZDICT_random_params_t parameters,
+ size_t maxDictSize) {
+ /* k is a required parameter */
+ if (parameters.k == 0) {
+ return 0;
+ }
+ /* k <= maxDictSize */
+ if (parameters.k > maxDictSize) {
+ return 0;
+ }
+ return 1;
+}
+
+
+/**
+ * Given the prepared context build the dictionary.
+ */
+static size_t RANDOM_buildDictionary(const size_t totalSamplesSize, const BYTE *samples,
+ void *dictBuffer, size_t dictBufferCapacity,
+ ZDICT_random_params_t parameters) {
+ BYTE *const dict = (BYTE *)dictBuffer;
+ size_t tail = dictBufferCapacity;
+ const int displayLevel = parameters.zParams.notificationLevel;
+ while (tail > 0) {
+
+ /* Select a segment */
+ RANDOM_segment_t segment = RANDOM_selectSegment(totalSamplesSize, parameters);
+
+ size_t segmentSize;
+ segmentSize = MIN(segment.end - segment.begin + 1, tail);
+
+ tail -= segmentSize;
+ memcpy(dict + tail, samples + segment.begin, segmentSize);
+ DISPLAYUPDATE(
+ 2, "\r%u%% ",
+ (U32)(((dictBufferCapacity - tail) * 100) / dictBufferCapacity));
+ }
+
+ return tail;
+}
+
+
+
+
+ZDICTLIB_API size_t ZDICT_trainFromBuffer_random(
+ void *dictBuffer, size_t dictBufferCapacity,
+ const void *samplesBuffer, const size_t *samplesSizes, unsigned nbSamples,
+ ZDICT_random_params_t parameters) {
+ const int displayLevel = parameters.zParams.notificationLevel;
+ BYTE* const dict = (BYTE*)dictBuffer;
+ /* Checks */
+ if (!RANDOM_checkParameters(parameters, dictBufferCapacity)) {
+ DISPLAYLEVEL(1, "k is incorrect\n");
+ return ERROR(GENERIC);
+ }
+ if (nbSamples == 0) {
+ DISPLAYLEVEL(1, "Random must have at least one input file\n");
+ return ERROR(GENERIC);
+ }
+ if (dictBufferCapacity < ZDICT_DICTSIZE_MIN) {
+ DISPLAYLEVEL(1, "dictBufferCapacity must be at least %u\n",
+ ZDICT_DICTSIZE_MIN);
+ return ERROR(dstSize_tooSmall);
+ }
+ const size_t totalSamplesSize = RANDOM_sum(samplesSizes, nbSamples);
+ const BYTE *const samples = (const BYTE *)samplesBuffer;
+
+ DISPLAYLEVEL(2, "Building dictionary\n");
+ {
+ const size_t tail = RANDOM_buildDictionary(totalSamplesSize, samples,
+ dictBuffer, dictBufferCapacity, parameters);
+ const size_t dictSize = ZDICT_finalizeDictionary(
+ dict, dictBufferCapacity, dict + tail, dictBufferCapacity - tail,
+ samplesBuffer, samplesSizes, nbSamples, parameters.zParams);
+ if (!ZSTD_isError(dictSize)) {
+ DISPLAYLEVEL(2, "Constructed dictionary of size %u\n",
+ (U32)dictSize);
+ }
+ return dictSize;
+ }
+}