From ffb328eba6755fedccda53473d8733ae31fe0013 Mon Sep 17 00:00:00 2001 From: Drew Paroski Date: Sat, 2 Oct 2021 20:48:06 +0000 Subject: SERVER-60409 Make SBE VM's stack more cache-friendly --- src/mongo/db/exec/sbe/vm/vm.cpp | 72 +++++++++++++++----- src/mongo/db/exec/sbe/vm/vm.h | 141 ++++++++++++++++++++++++++++++---------- 2 files changed, 162 insertions(+), 51 deletions(-) diff --git a/src/mongo/db/exec/sbe/vm/vm.cpp b/src/mongo/db/exec/sbe/vm/vm.cpp index ea9695c63b4..5b46789312d 100644 --- a/src/mongo/db/exec/sbe/vm/vm.cpp +++ b/src/mongo/db/exec/sbe/vm/vm.cpp @@ -505,14 +505,45 @@ void CodeFragment::appendJumpNothing(int jumpOffset) { offset += writeToMemory(offset, jumpOffset); } -ByteCode::~ByteCode() { - auto size = _argStackOwned.size(); - invariant(_argStackTags.size() == size); - invariant(_argStackVals.size() == size); - for (size_t i = 0; i < size; ++i) { - if (_argStackOwned[i]) { - value::releaseValue(_argStackTags[i], _argStackVals[i]); +void ByteCode::Stack::growAndResize(size_t newSize) { + auto currentCapacity = capacity(); + if (newSize <= currentCapacity) { + _size = newSize; + return; + } + + auto newCapacity = newSize; + if (newCapacity > kMaxCapacity) { + uasserted(6040901, + str::stream() << "Requested capacity of " << newCapacity + << " elements exceeds the maximum capacity of " << kMaxCapacity); + return; + } + + if (currentCapacity >= kMaxCapacity / 2) { + newCapacity = kMaxCapacity; + } else if (2 * currentCapacity > newCapacity) { + newCapacity = 2 * currentCapacity; + } + + try { + auto numSegments = (_size + ElementsPerSegment - 1) / ElementsPerSegment; + auto numNewSegments = (newCapacity + ElementsPerSegment - 1) / ElementsPerSegment; + newCapacity = numNewSegments * ElementsPerSegment; + + auto newSegments = std::make_unique(numNewSegments); + + if (_segments.get() != nullptr && numSegments > 0) { + memcpy(newSegments.get(), _segments.get(), numSegments * sizeof(StackSegment)); } + + _segments = std::move(newSegments); + _capacity = newCapacity; + _size = newSize; + } catch (std::bad_alloc&) { + uasserted(6040902, + str::stream() << "Unable to allocate requested capacity of " << newCapacity + << " elements"); } } @@ -4923,18 +4954,29 @@ void ByteCode::runInternal(const CodeFragment* code, int64_t position) { } std::tuple ByteCode::run(const CodeFragment* code) { + uassert(6040900, "The evaluation stack must be empty", _argStack.size() == 0); + + ON_BLOCK_EXIT([&] { + auto size = _argStack.size(); + for (size_t i = 0; i < size; ++i) { + auto [owned, tag] = _argStack.ownedAndTag(i); + if (owned) { + value::releaseValue(tag, _argStack.value(i)); + } + } + + _argStack.resize(0); + }); + runInternal(code, 0); - uassert( - 4822801, "The evaluation stack must hold only a single value", _argStackOwned.size() == 1); + uassert(4822801, "The evaluation stack must hold only a single value", _argStack.size() == 1); - auto owned = _argStackOwned[0]; - auto tag = _argStackTags[0]; - auto val = _argStackVals[0]; + auto [owned, tag] = _argStack.ownedAndTag(0); + auto val = _argStack.value(0); - _argStackOwned.clear(); - _argStackTags.clear(); - _argStackVals.clear(); + // Transfer ownership of tag/val to the caller + _argStack.resize(0); return {owned, tag, val}; } diff --git a/src/mongo/db/exec/sbe/vm/vm.h b/src/mongo/db/exec/sbe/vm/vm.h index 2c9420d49f3..73bad25f65f 100644 --- a/src/mongo/db/exec/sbe/vm/vm.h +++ b/src/mongo/db/exec/sbe/vm/vm.h @@ -594,15 +594,91 @@ private: class ByteCode { public: - ~ByteCode(); - std::tuple run(const CodeFragment* code); bool runPredicate(const CodeFragment* code); private: - std::vector _argStackOwned; - std::vector _argStackTags; - std::vector _argStackVals; + // The VM stack is used to pass inputs to instructions and hold the outputs produced by + // instructions. Each element of the VM stack is 3-tuple comprised of a boolean ('owned'), + // a value::TypeTags ('tag'), and a value::Value ('value'). + // + // In order to make the VM stack cache-friendly, for each element we want 'owned', 'tag', + // and 'value' to be located relatively close together, and we also want to avoid wasting + // any bytes due to padding. + // + // To achieve these goals, the VM stack is organized as a vector of "stack segments". Each + // "segment" is large enough to hold 4 elements. The first 8 bytes of a segment holds the + // 'owned' and 'tag' components, and the remaining 32 bytes hold the 'value' components. + static constexpr size_t ElementsPerSegment = 4; + + struct OwnedAndTag { + uint8_t owned; + value::TypeTags tag; + }; + + struct StackSegment { + OwnedAndTag ownedAndTags[ElementsPerSegment]; + value::Value values[ElementsPerSegment]; + }; + + class Stack { + public: + static constexpr size_t kMaxCapacity = + ((std::numeric_limits::max() / 2) / sizeof(StackSegment)) * ElementsPerSegment; + + const auto& ownedAndTag(size_t index) const { + return _segments[index / ElementsPerSegment].ownedAndTags[index % ElementsPerSegment]; + } + auto& ownedAndTag(size_t index) { + return _segments[index / ElementsPerSegment].ownedAndTags[index % ElementsPerSegment]; + } + + const auto& owned(size_t index) const { + return ownedAndTag(index).owned; + } + auto& owned(size_t index) { + return ownedAndTag(index).owned; + } + + const auto& tag(size_t index) const { + return ownedAndTag(index).tag; + } + auto& tag(size_t index) { + return ownedAndTag(index).tag; + } + + const auto& value(size_t index) const { + return _segments[index / ElementsPerSegment].values[index % ElementsPerSegment]; + } + auto& value(size_t index) { + return _segments[index / ElementsPerSegment].values[index % ElementsPerSegment]; + } + + auto size() const { + return _size; + } + + auto capacity() const { + return _capacity; + } + + void resize(size_t newSize) { + if (MONGO_likely(newSize <= capacity())) { + _size = newSize; + return; + } + growAndResize(newSize); + } + + private: + MONGO_COMPILER_NOINLINE void growAndResize(size_t newSize); + + std::unique_ptr _segments; + size_t _size = 0; + size_t _capacity = 0; + }; + + Stack _argStack; void runInternal(const CodeFragment* code, int64_t position); std::tuple runLambdaInternal(const CodeFragment* code, @@ -899,61 +975,54 @@ private: std::tuple dispatchBuiltin(Builtin f, ArityType arity); std::tuple getFromStack(size_t offset) { - auto backOffset = _argStackOwned.size() - 1 - offset; - auto owned = _argStackOwned[backOffset]; - auto tag = _argStackTags[backOffset]; - auto val = _argStackVals[backOffset]; + auto backOffset = _argStack.size() - 1 - offset; - return {owned, tag, val}; + auto [owned, tag] = _argStack.ownedAndTag(backOffset); + auto val = _argStack.value(backOffset); + + return {static_cast(owned), tag, val}; } std::tuple moveFromStack(size_t offset) { - auto backOffset = _argStackOwned.size() - 1 - offset; - auto owned = _argStackOwned[backOffset]; - _argStackOwned[backOffset] = false; - auto tag = _argStackTags[backOffset]; - _argStackTags[backOffset] = value::TypeTags::Nothing; - auto val = _argStackVals[backOffset]; - _argStackVals[backOffset] = 0; + auto backOffset = _argStack.size() - 1 - offset; + auto [owned, tag] = _argStack.ownedAndTag(backOffset); + _argStack.ownedAndTag(backOffset) = {false, value::TypeTags::Nothing}; + auto val = _argStack.value(backOffset); + _argStack.value(backOffset) = 0; return {owned, tag, val}; } void setStack(size_t offset, bool owned, value::TypeTags tag, value::Value val) { - auto backOffset = _argStackOwned.size() - 1 - offset; - _argStackOwned[backOffset] = owned; - _argStackTags[backOffset] = tag; - _argStackVals[backOffset] = val; + auto backOffset = _argStack.size() - 1 - offset; + _argStack.ownedAndTag(backOffset) = {owned, tag}; + _argStack.value(backOffset) = val; } void pushStack(bool owned, value::TypeTags tag, value::Value val) { - _argStackOwned.push_back(owned); - _argStackTags.push_back(tag); - _argStackVals.push_back(val); + _argStack.resize(_argStack.size() + 1); + topStack(owned, tag, val); } void topStack(bool owned, value::TypeTags tag, value::Value val) { - _argStackOwned.back() = owned; - _argStackTags.back() = tag; - _argStackVals.back() = val; + size_t index = _argStack.size() - 1; + _argStack.ownedAndTag(index) = {owned, tag}; + _argStack.value(index) = val; } void popStack() { - _argStackOwned.pop_back(); - _argStackTags.pop_back(); - _argStackVals.pop_back(); + _argStack.resize(_argStack.size() - 1); } void popAndReleaseStack() { - auto owned = _argStackOwned.back(); - auto tag = _argStackTags.back(); - auto val = _argStackVals.back(); - - popStack(); + size_t index = _argStack.size() - 1; + auto [owned, tag] = _argStack.ownedAndTag(index); if (owned) { - value::releaseValue(tag, val); + value::releaseValue(tag, _argStack.value(index)); } + + popStack(); } void swapStack(); -- cgit v1.2.1