summaryrefslogtreecommitdiff
path: root/src/mongo/db/storage
diff options
context:
space:
mode:
authorwolfkdy <kdy71107216@aliyun.com>2017-03-04 16:10:58 +0800
committerGeert Bosch <geert@mongodb.com>2017-07-05 17:44:39 -0400
commit93beb0234eba9dc58ab6070ad472022f96e019e6 (patch)
tree2187cdfef5e3b602fb736d175677e4eb7db562f5 /src/mongo/db/storage
parent2aefd80d1acea065c77bd3bd69abf686a27ae3e0 (diff)
downloadmongo-93beb0234eba9dc58ab6070ad472022f96e019e6.tar.gz
SERVER-22766 wiredtiger engine support update oplogsize online
Diffstat (limited to 'src/mongo/db/storage')
-rw-r--r--src/mongo/db/storage/kv/kv_collection_catalog_entry.cpp6
-rw-r--r--src/mongo/db/storage/kv/kv_collection_catalog_entry.h2
-rw-r--r--src/mongo/db/storage/mmap_v1/catalog/namespace_details_collection_entry.cpp5
-rw-r--r--src/mongo/db/storage/mmap_v1/catalog/namespace_details_collection_entry.h2
-rw-r--r--src/mongo/db/storage/record_store.h8
-rw-r--r--src/mongo/db/storage/wiredtiger/wiredtiger_record_store.cpp59
-rw-r--r--src/mongo/db/storage/wiredtiger/wiredtiger_record_store.h4
-rw-r--r--src/mongo/db/storage/wiredtiger/wiredtiger_record_store_oplog_stones.h23
-rw-r--r--src/mongo/db/storage/wiredtiger/wiredtiger_record_store_test.cpp68
9 files changed, 93 insertions, 84 deletions
diff --git a/src/mongo/db/storage/kv/kv_collection_catalog_entry.cpp b/src/mongo/db/storage/kv/kv_collection_catalog_entry.cpp
index e96cddbb1a7..c775ada1a23 100644
--- a/src/mongo/db/storage/kv/kv_collection_catalog_entry.cpp
+++ b/src/mongo/db/storage/kv/kv_collection_catalog_entry.cpp
@@ -248,6 +248,12 @@ void KVCollectionCatalogEntry::updateValidator(OperationContext* opCtx,
_catalog->putMetaData(opCtx, ns().toString(), md);
}
+void KVCollectionCatalogEntry::updateCappedSize(OperationContext* opCtx, long long size) {
+ MetaData md = _getMetaData(opCtx);
+ md.options.cappedSize = size;
+ _catalog->putMetaData(opCtx, ns().toString(), md);
+}
+
BSONCollectionCatalogEntry::MetaData KVCollectionCatalogEntry::_getMetaData(
OperationContext* opCtx) const {
return _catalog->getMetaData(opCtx, ns().toString());
diff --git a/src/mongo/db/storage/kv/kv_collection_catalog_entry.h b/src/mongo/db/storage/kv/kv_collection_catalog_entry.h
index c999bee1fe5..21f31691739 100644
--- a/src/mongo/db/storage/kv/kv_collection_catalog_entry.h
+++ b/src/mongo/db/storage/kv/kv_collection_catalog_entry.h
@@ -78,6 +78,8 @@ public:
StringData validationLevel,
StringData validationAction) final;
+ void updateCappedSize(OperationContext*, long long int) final;
+
RecordStore* getRecordStore() {
return _recordStore.get();
}
diff --git a/src/mongo/db/storage/mmap_v1/catalog/namespace_details_collection_entry.cpp b/src/mongo/db/storage/mmap_v1/catalog/namespace_details_collection_entry.cpp
index 617a6739cfe..b37ca245298 100644
--- a/src/mongo/db/storage/mmap_v1/catalog/namespace_details_collection_entry.cpp
+++ b/src/mongo/db/storage/mmap_v1/catalog/namespace_details_collection_entry.cpp
@@ -442,4 +442,9 @@ void NamespaceDetailsCollectionCatalogEntry::setNamespacesRecordId(OperationCont
_namespacesRecordId = newId;
}
}
+
+void NamespaceDetailsCollectionCatalogEntry::updateCappedSize(OperationContext* opCtx,
+ long long size) {
+ invariant(false);
+}
}
diff --git a/src/mongo/db/storage/mmap_v1/catalog/namespace_details_collection_entry.h b/src/mongo/db/storage/mmap_v1/catalog/namespace_details_collection_entry.h
index f4e4410e317..aff16b22093 100644
--- a/src/mongo/db/storage/mmap_v1/catalog/namespace_details_collection_entry.h
+++ b/src/mongo/db/storage/mmap_v1/catalog/namespace_details_collection_entry.h
@@ -102,6 +102,8 @@ public:
StringData validationLevel,
StringData validationAction) final;
+ void updateCappedSize(OperationContext* opCtx, long long size) final;
+
// not part of interface, but available to my storage engine
int _findIndexNumber(OperationContext* opCtx, StringData indexName) const;
diff --git a/src/mongo/db/storage/record_store.h b/src/mongo/db/storage/record_store.h
index a6c2c0d9a09..8f7dbe7919a 100644
--- a/src/mongo/db/storage/record_store.h
+++ b/src/mongo/db/storage/record_store.h
@@ -623,6 +623,14 @@ public:
long long numRecords,
long long dataSize) = 0;
+ /**
+ * used to support online change oplog size.
+ */
+ virtual Status updateCappedSize(OperationContext* opCtx, long long cappedSize) {
+ return Status(ErrorCodes::CommandNotSupported,
+ "this storage engine does not support updateCappedSize");
+ }
+
protected:
std::string _ns;
};
diff --git a/src/mongo/db/storage/wiredtiger/wiredtiger_record_store.cpp b/src/mongo/db/storage/wiredtiger/wiredtiger_record_store.cpp
index 38da23da215..0f28ef6a71f 100644
--- a/src/mongo/db/storage/wiredtiger/wiredtiger_record_store.cpp
+++ b/src/mongo/db/storage/wiredtiger/wiredtiger_record_store.cpp
@@ -155,11 +155,11 @@ WiredTigerRecordStore::OplogStones::OplogStones(OperationContext* opCtx, WiredTi
const unsigned long long kMaxStonesToKeep = 100ULL;
unsigned long long numStones = maxSize / BSONObjMaxInternalSize;
- _numStonesToKeep = std::min(kMaxStonesToKeep, std::max(kMinStonesToKeep, numStones));
- _minBytesPerStone = maxSize / _numStonesToKeep;
+ size_t numStonesToKeep = std::min(kMaxStonesToKeep, std::max(kMinStonesToKeep, numStones));
+ _minBytesPerStone = maxSize / numStonesToKeep;
invariant(_minBytesPerStone > 0);
- _calculateStones(opCtx);
+ _calculateStones(opCtx, numStonesToKeep);
_pokeReclaimThreadIfNeeded(); // Reclaim stones if over the limit.
}
@@ -179,8 +179,14 @@ void WiredTigerRecordStore::OplogStones::kill() {
void WiredTigerRecordStore::OplogStones::awaitHasExcessStonesOrDead() {
// Wait until kill() is called or there are too many oplog stones.
stdx::unique_lock<stdx::mutex> lock(_oplogReclaimMutex);
- while (!_isDead && !hasExcessStones()) {
- MONGO_IDLE_THREAD_BLOCK;
+ while (!_isDead) {
+ {
+ MONGO_IDLE_THREAD_BLOCK;
+ stdx::lock_guard<stdx::mutex> lk(_mutex);
+ if (hasExcessStones_inlock()) {
+ break;
+ }
+ }
_oplogReclaimCv.wait(lock);
}
}
@@ -189,7 +195,7 @@ boost::optional<WiredTigerRecordStore::OplogStones::Stone>
WiredTigerRecordStore::OplogStones::peekOldestStoneIfNeeded() const {
stdx::lock_guard<stdx::mutex> lk(_mutex);
- if (!hasExcessStones()) {
+ if (!hasExcessStones_inlock()) {
return {};
}
@@ -221,6 +227,7 @@ void WiredTigerRecordStore::OplogStones::createNewStoneIfNeeded(RecordId lastRec
return;
}
+ LOG(2) << "create new oplogStone, current stones:" << _stones.size();
OplogStones::Stone stone = {_currentRecords.swap(0), _currentBytes.swap(0), lastRecord};
_stones.push_back(stone);
@@ -279,17 +286,8 @@ void WiredTigerRecordStore::OplogStones::setMinBytesPerStone(int64_t size) {
_minBytesPerStone = size;
}
-void WiredTigerRecordStore::OplogStones::setNumStonesToKeep(size_t numStones) {
- invariant(numStones > 0);
-
- stdx::lock_guard<stdx::mutex> lk(_mutex);
-
- // Only allow changing the number of stones to keep if no data has been inserted.
- invariant(_stones.size() == 0 && _currentRecords.load() == 0);
- _numStonesToKeep = numStones;
-}
-
-void WiredTigerRecordStore::OplogStones::_calculateStones(OperationContext* opCtx) {
+void WiredTigerRecordStore::OplogStones::_calculateStones(OperationContext* opCtx,
+ size_t numStonesToKeep) {
long long numRecords = _rs->numRecords(opCtx);
long long dataSize = _rs->dataSize(opCtx);
@@ -304,7 +302,7 @@ void WiredTigerRecordStore::OplogStones::_calculateStones(OperationContext* opCt
// oplog to determine where to put down stones.
if (numRecords <= 0 || dataSize <= 0 ||
uint64_t(numRecords) <
- kMinSampleRatioForRandCursor * kRandomSamplesPerStone * _numStonesToKeep) {
+ kMinSampleRatioForRandCursor * kRandomSamplesPerStone * numStonesToKeep) {
_calculateStonesByScanning(opCtx);
return;
}
@@ -427,11 +425,23 @@ void WiredTigerRecordStore::OplogStones::_calculateStonesBySampling(OperationCon
}
void WiredTigerRecordStore::OplogStones::_pokeReclaimThreadIfNeeded() {
- if (hasExcessStones()) {
+ if (hasExcessStones_inlock()) {
_oplogReclaimCv.notify_one();
}
}
+void WiredTigerRecordStore::OplogStones::adjust(int64_t maxSize) {
+ stdx::lock_guard<stdx::mutex> lk(_mutex);
+ const unsigned long long kMinStonesToKeep = 10ULL;
+ const unsigned long long kMaxStonesToKeep = 100ULL;
+
+ unsigned long long numStones = maxSize / BSONObjMaxInternalSize;
+ size_t numStonesToKeep = std::min(kMaxStonesToKeep, std::max(kMinStonesToKeep, numStones));
+ _minBytesPerStone = maxSize / numStonesToKeep;
+ invariant(_minBytesPerStone > 0);
+ _pokeReclaimThreadIfNeeded();
+}
+
StatusWith<std::string> WiredTigerRecordStore::parseOptionsField(const BSONObj options) {
StringBuilder ss;
BSONForEach(elem, options) {
@@ -2098,4 +2108,15 @@ void WiredTigerRecordStorePrefixedCursor::initCursorToBeginning() {
}
}
+Status WiredTigerRecordStore::updateCappedSize(OperationContext* opCtx, long long cappedSize) {
+ if (_cappedMaxSize == cappedSize) {
+ return Status::OK();
+ }
+ _cappedMaxSize = cappedSize;
+ if (_oplogStones) {
+ _oplogStones->adjust(cappedSize);
+ }
+ return Status::OK();
+}
+
} // namespace mongo
diff --git a/src/mongo/db/storage/wiredtiger/wiredtiger_record_store.h b/src/mongo/db/storage/wiredtiger/wiredtiger_record_store.h
index 726febce42c..aca92fbea1d 100644
--- a/src/mongo/db/storage/wiredtiger/wiredtiger_record_store.h
+++ b/src/mongo/db/storage/wiredtiger/wiredtiger_record_store.h
@@ -218,6 +218,8 @@ public:
void waitForAllEarlierOplogWritesToBeVisible(OperationContext* opCtx) const override;
+ Status updateCappedSize(OperationContext* opCtx, long long cappedSize) final;
+
bool isOplog() const {
return _isOplog;
}
@@ -317,7 +319,7 @@ private:
const bool _isEphemeral;
// True if the namespace of this record store starts with "local.oplog.", and false otherwise.
const bool _isOplog;
- const int64_t _cappedMaxSize;
+ int64_t _cappedMaxSize;
const int64_t _cappedMaxSizeSlack; // when to start applying backpressure
const int64_t _cappedMaxDocs;
RecordId _cappedFirstRecord;
diff --git a/src/mongo/db/storage/wiredtiger/wiredtiger_record_store_oplog_stones.h b/src/mongo/db/storage/wiredtiger/wiredtiger_record_store_oplog_stones.h
index f42ade90db8..dfd3dddb902 100644
--- a/src/mongo/db/storage/wiredtiger/wiredtiger_record_store_oplog_stones.h
+++ b/src/mongo/db/storage/wiredtiger/wiredtiger_record_store_oplog_stones.h
@@ -57,8 +57,14 @@ public:
void kill();
- bool hasExcessStones() const {
- return _stones.size() > _numStonesToKeep;
+ bool hasExcessStones_inlock() const {
+ int64_t total_bytes = 0;
+ for (std::deque<OplogStones::Stone>::const_iterator it = _stones.begin();
+ it != _stones.end();
+ ++it) {
+ total_bytes += it->bytes;
+ }
+ return total_bytes > _rs->cappedMaxSize();
}
void awaitHasExcessStonesOrDead();
@@ -81,6 +87,9 @@ public:
int64_t bytesRemoved,
RecordId firstRemovedId);
+ // Resize oplog size
+ void adjust(int64_t maxSize);
+
// The start point of where to truncate next. Used by the background reclaim thread to
// efficiently truncate records with WiredTiger by skipping over tombstones, etc.
RecordId firstRecord;
@@ -104,13 +113,11 @@ public:
void setMinBytesPerStone(int64_t size);
- void setNumStonesToKeep(size_t numStones);
-
private:
class InsertChange;
class TruncateChange;
- void _calculateStones(OperationContext* opCtx);
+ void _calculateStones(OperationContext* opCtx, size_t size);
void _calculateStonesByScanning(OperationContext* opCtx);
void _calculateStonesBySampling(OperationContext* opCtx,
int64_t estRecordsPerStone,
@@ -129,12 +136,8 @@ private:
// database, and false otherwise.
bool _isDead = false;
- // Maximum number of stones to keep in the deque before the background reclaim thread should
- // truncate the oldest ones. Does not include the stone currently being filled. This value
- // should not be changed after initialization.
- size_t _numStonesToKeep;
// Minimum number of bytes the stone being filled should contain before it gets added to the
- // deque of oplog stones. This value should not be changed after initialization.
+ // deque of oplog stones.
int64_t _minBytesPerStone;
AtomicInt64 _currentRecords; // Number of records in the stone being filled.
diff --git a/src/mongo/db/storage/wiredtiger/wiredtiger_record_store_test.cpp b/src/mongo/db/storage/wiredtiger/wiredtiger_record_store_test.cpp
index 0d15a514950..75b5c11e673 100644
--- a/src/mongo/db/storage/wiredtiger/wiredtiger_record_store_test.cpp
+++ b/src/mongo/db/storage/wiredtiger/wiredtiger_record_store_test.cpp
@@ -1034,7 +1034,7 @@ TEST(WiredTigerRecordStoreTest, OplogStones_CappedTruncateAfter) {
}
}
-// Verify that oplog stones are reclaimed when the number of stones to keep is exceeded.
+// Verify that oplog stones are reclaimed when cappedMaxSize is exceeded.
TEST(WiredTigerRecordStoreTest, OplogStones_ReclaimStones) {
std::unique_ptr<RecordStoreHarnessHelper> harnessHelper = newRecordStoreHarnessHelper();
@@ -1045,8 +1045,12 @@ TEST(WiredTigerRecordStoreTest, OplogStones_ReclaimStones) {
WiredTigerRecordStore* wtrs = static_cast<WiredTigerRecordStore*>(rs.get());
WiredTigerRecordStore::OplogStones* oplogStones = wtrs->oplogStones();
+ {
+ ServiceContext::UniqueOperationContext opCtx(harnessHelper->newOperationContext());
+ ASSERT_OK(wtrs->updateCappedSize(opCtx.get(), 230U));
+ }
+
oplogStones->setMinBytesPerStone(100);
- oplogStones->setNumStonesToKeep(2U);
{
ServiceContext::UniqueOperationContext opCtx(harnessHelper->newOperationContext());
@@ -1062,7 +1066,7 @@ TEST(WiredTigerRecordStoreTest, OplogStones_ReclaimStones) {
ASSERT_EQ(0, oplogStones->currentBytes());
}
- // Truncate a stone when number of stones to keep is exceeded.
+ // Truncate a stone when cappedMaxSize is exceeded.
{
ServiceContext::UniqueOperationContext opCtx(harnessHelper->newOperationContext());
@@ -1095,71 +1099,27 @@ TEST(WiredTigerRecordStoreTest, OplogStones_ReclaimStones) {
wtrs->reclaimOplog(opCtx.get());
- ASSERT_EQ(3, rs->numRecords(opCtx.get()));
- ASSERT_EQ(320, rs->dataSize(opCtx.get()));
- ASSERT_EQ(2U, oplogStones->numStones());
+ ASSERT_EQ(2, rs->numRecords(opCtx.get()));
+ ASSERT_EQ(190, rs->dataSize(opCtx.get()));
+ ASSERT_EQ(1U, oplogStones->numStones());
ASSERT_EQ(1, oplogStones->currentRecords());
ASSERT_EQ(50, oplogStones->currentBytes());
}
- // No-op if the number of oplog stones is less than or equal to the number of stones to keep.
+ // No-op if dataSize <= cappedMaxSize.
{
ServiceContext::UniqueOperationContext opCtx(harnessHelper->newOperationContext());
wtrs->reclaimOplog(opCtx.get());
- ASSERT_EQ(3, rs->numRecords(opCtx.get()));
- ASSERT_EQ(320, rs->dataSize(opCtx.get()));
- ASSERT_EQ(2U, oplogStones->numStones());
+ ASSERT_EQ(2, rs->numRecords(opCtx.get()));
+ ASSERT_EQ(190, rs->dataSize(opCtx.get()));
+ ASSERT_EQ(1U, oplogStones->numStones());
ASSERT_EQ(1, oplogStones->currentRecords());
ASSERT_EQ(50, oplogStones->currentBytes());
}
}
-// Verify that oplog stones are not reclaimed even if the size of the record store exceeds
-// 'cappedMaxSize'.
-TEST(WiredTigerRecordStoreTest, OplogStones_ExceedCappedMaxSize) {
- std::unique_ptr<RecordStoreHarnessHelper> harnessHelper = newRecordStoreHarnessHelper();
-
- const int64_t cappedMaxSize = 256;
- unique_ptr<RecordStore> rs(
- harnessHelper->newCappedRecordStore("local.oplog.stones", cappedMaxSize, -1));
-
- WiredTigerRecordStore* wtrs = static_cast<WiredTigerRecordStore*>(rs.get());
- WiredTigerRecordStore::OplogStones* oplogStones = wtrs->oplogStones();
-
- oplogStones->setMinBytesPerStone(100);
- oplogStones->setNumStonesToKeep(10U);
-
- {
- ServiceContext::UniqueOperationContext opCtx(harnessHelper->newOperationContext());
-
- ASSERT_EQ(insertBSONWithSize(opCtx.get(), rs.get(), Timestamp(1, 1), 100), RecordId(1, 1));
- ASSERT_EQ(insertBSONWithSize(opCtx.get(), rs.get(), Timestamp(1, 2), 110), RecordId(1, 2));
- ASSERT_EQ(insertBSONWithSize(opCtx.get(), rs.get(), Timestamp(1, 3), 120), RecordId(1, 3));
-
- ASSERT_EQ(3, rs->numRecords(opCtx.get()));
- ASSERT_EQ(330, rs->dataSize(opCtx.get()));
- ASSERT_EQ(3U, oplogStones->numStones());
- ASSERT_EQ(0, oplogStones->currentRecords());
- ASSERT_EQ(0, oplogStones->currentBytes());
- }
-
- // Shouldn't truncate a stone when the number of oplog stones is less than the number of stones
- // to keep, even though the size of the record store exceeds 'cappedMaxSize'.
- {
- ServiceContext::UniqueOperationContext opCtx(harnessHelper->newOperationContext());
-
- wtrs->reclaimOplog(opCtx.get());
-
- ASSERT_EQ(3, rs->numRecords(opCtx.get()));
- ASSERT_EQ(330, rs->dataSize(opCtx.get()));
- ASSERT_EQ(3U, oplogStones->numStones());
- ASSERT_EQ(0, oplogStones->currentRecords());
- ASSERT_EQ(0, oplogStones->currentBytes());
- }
-}
-
// Verify that an oplog stone isn't created if it would cause the logical representation of the
// records to not be in increasing order.
TEST(WiredTigerRecordStoreTest, OplogStones_AscendingOrder) {