summaryrefslogtreecommitdiff
path: root/src/mongo/db/sorter/file_iterator.h
diff options
context:
space:
mode:
Diffstat (limited to 'src/mongo/db/sorter/file_iterator.h')
-rw-r--r--src/mongo/db/sorter/file_iterator.h197
1 files changed, 0 insertions, 197 deletions
diff --git a/src/mongo/db/sorter/file_iterator.h b/src/mongo/db/sorter/file_iterator.h
deleted file mode 100644
index 3e740584353..00000000000
--- a/src/mongo/db/sorter/file_iterator.h
+++ /dev/null
@@ -1,197 +0,0 @@
-/**
- * Copyright (C) 2021-present MongoDB, Inc.
- *
- * This program is free software: you can redistribute it and/or modify
- * it under the terms of the Server Side Public License, version 1,
- * as published by MongoDB, Inc.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * Server Side Public License for more details.
- *
- * You should have received a copy of the Server Side Public License
- * along with this program. If not, see
- * <http://www.mongodb.com/licensing/server-side-public-license>.
- *
- * As a special exception, the copyright holders give permission to link the
- * code of portions of this program with the OpenSSL library under certain
- * conditions as described in each individual source file and distribute
- * linked combinations including the program with the OpenSSL library. You
- * must comply with the Server Side Public License in all respects for
- * all of the code used other than as permitted herein. If you modify file(s)
- * with this exception, you may extend this exception to your version of the
- * file(s), but you are not obligated to do so. If you do not wish to do so,
- * delete this exception statement from your version. If you delete this
- * exception statement from all source files in the program, then also delete
- * it in the license file.
- */
-
-#pragma once
-
-#include "mongo/db/sorter/sorted_data_iterator.h"
-
-#include "mongo/db/sorter/compression.h"
-#include "mongo/db/sorter/file.h"
-#include "mongo/db/sorter/util.h"
-
-namespace mongo::sorter {
-/**
- * Iterates over a sorted range within a file.
- */
-template <typename Key, typename Value>
-class FileIterator : public SortedDataIterator<Key, Value> {
-public:
- using Base = SortedDataIterator<Key, Value>;
- using Data = typename Base::Data;
- using Settings = typename Base::Settings;
-
- FileIterator(File* file,
- std::streamoff fileStartOffset,
- std::streamoff fileEndOffset,
- const uint32_t checksum,
- const Settings& settings,
- const boost::optional<std::string>& dbName)
- : _settings(settings),
- _file(file),
- _fileStartOffset(fileStartOffset),
- _fileCurrentOffset(fileStartOffset),
- _fileEndOffset(fileEndOffset),
- _originalChecksum(checksum),
- _dbName(dbName) {}
-
- ~FileIterator() {
- // If the file iterator reads through all data objects, we can ensure non-corrupt data by
- // comparing the newly calculated checksum with the original checksum from the data written
- // to disk. Some iterators do not read back all data from the file, which prohibits the
- // _afterReadChecksum from obtaining all the information needed. Thus, we only fassert if
- // all data that was written to disk is read back and the checksums are not equivalent.
- if (!more() && _bufferReader->atEof() && (_originalChecksum != _afterReadChecksum)) {
- fassert(31182,
- Status(ErrorCodes::Error::ChecksumMismatch,
- "Data read from disk does not match what was written to disk. Possible "
- "corruption of data."));
- }
- }
-
- bool more() const {
- return !_bufferReader || !_bufferReader->atEof() || _fileCurrentOffset < _fileEndOffset;
- }
-
- Data next() {
- if (!_bufferReader || _bufferReader->atEof()) {
- _fillBuffer();
- }
-
- const char* startOfNewData = static_cast<const char*>(_bufferReader->pos());
-
- // Note: calling read() on the _bufferReader buffer in the deserialize function advances the
- // buffer. Since Key comes before Value in the _bufferReader, and C++ makes no function
- // parameter evaluation order guarantees, we cannot deserialize Key and Value straight into
- // the Data constructor
- auto first = Key::deserializeForSorter(*_bufferReader, _settings.first);
- auto second = Value::deserializeForSorter(*_bufferReader, _settings.second);
-
- // The difference of _bufferReader's position before and after reading the data
- // will provide the length of the data that was just read.
- const char* endOfNewData = static_cast<const char*>(_bufferReader->pos());
-
- _afterReadChecksum =
- addDataToChecksum(startOfNewData, endOfNewData - startOfNewData, _afterReadChecksum);
-
- return {std::move(first), std::move(second)};
- }
-
- SorterRange getRange() const override {
- return {_fileStartOffset, _fileEndOffset, _originalChecksum};
- }
-
-private:
- /**
- * Fills the buffer by reading from disk.
- */
- void _fillBuffer() {
- int32_t rawSize;
- _read(&rawSize, sizeof(rawSize));
-
- // Negative size means compressed.
- const bool compressed = rawSize < 0;
- int32_t blockSize = std::abs(rawSize);
-
- _buffer.reset(new char[blockSize]);
- _read(_buffer.get(), blockSize);
-
- if (auto encryptionHooks = getEncryptionHooksIfEnabled()) {
- std::unique_ptr<char[]> out(new char[blockSize]);
- size_t outLen;
- Status status =
- encryptionHooks->unprotectTmpData(reinterpret_cast<const uint8_t*>(_buffer.get()),
- blockSize,
- reinterpret_cast<uint8_t*>(out.get()),
- blockSize,
- &outLen,
- _dbName);
- uassert(28841,
- str::stream() << "Failed to unprotect data: " << status.toString(),
- status.isOK());
- blockSize = outLen;
- _buffer.swap(out);
- }
-
- if (!compressed) {
- _bufferReader.reset(new BufReader(_buffer.get(), blockSize));
- return;
- }
-
- dassert(isValidCompressedBuffer(_buffer.get(), blockSize));
-
- size_t uncompressedSize;
- uassert(17061,
- "Failed to get uncompressed size",
- getUncompressedSize(_buffer.get(), blockSize, &uncompressedSize));
-
- std::unique_ptr<char[]> decompressionBuffer(new char[uncompressedSize]);
- uassert(17062,
- "Failed to decompress",
- decompress(_buffer.get(), blockSize, decompressionBuffer.get()));
-
- // Hold on to decompressed data and throw out compressed data at block exit.
- _buffer.swap(decompressionBuffer);
- _bufferReader.reset(new BufReader(_buffer.get(), uncompressedSize));
- }
-
- /**
- * Reads data from disk.
- */
- void _read(void* out, size_t size) {
- invariant(_fileCurrentOffset < _fileEndOffset,
- str::stream() << "Current file offset (" << _fileCurrentOffset
- << ") greater than end offset (" << _fileEndOffset << ")");
-
- _file->read(_fileCurrentOffset, size, out);
- _fileCurrentOffset += size;
- }
-
- const Settings _settings;
-
- std::unique_ptr<char[]> _buffer;
- std::unique_ptr<BufReader> _bufferReader;
-
- File* _file; // File containing the sorted data range.
- std::streamoff _fileStartOffset; // File offset at which the sorted data range starts.
- std::streamoff _fileCurrentOffset; // File offset at which we are currently reading from.
- std::streamoff _fileEndOffset; // File offset at which the sorted data range ends.
-
- // Checksum value retrieved from SortedFileWriter that was calculated as data was spilled
- // to disk. This is not modified, and is only used for comparison against _afterReadChecksum
- // when the FileIterator is exhausted to ensure no data corruption.
- const uint32_t _originalChecksum;
-
- // Checksum value that is updated with each read of a data object from disk. We can compare
- // this value with _originalChecksum to check for data corruption if and only if the
- // FileIterator is exhausted.
- uint32_t _afterReadChecksum = 0;
-
- boost::optional<std::string> _dbName;
-};
-} // namespace mongo::sorter