diff options
author | Igor Canadi <icanadi@fb.com> | 2015-01-29 15:58:11 -0800 |
---|---|---|
committer | Ramon Fernandez <ramon.fernandez@mongodb.com> | 2015-02-04 13:47:49 -0500 |
commit | 4f88b799c7b6829613907f3d5924071eb4ef9f45 (patch) | |
tree | 232235e518fd99844f9eac1bbbf69f7e3af74264 | |
parent | 332c6160fe7326e15a92aa7397c2e0f0edc7efe3 (diff) | |
download | mongo-4f88b799c7b6829613907f3d5924071eb4ef9f45.tar.gz |
SERVER-17126 Improve rocks engine
* checked_cast<> instead of dynamic_cast<> to speed up release build
* optimization to IndexCursor::pointsToSamePlaceAs()
* don't assume dupKeyError on Index::insert() conflict
* trigger massert() when RocksRecordStore::dataFor() can't find the RecordId. Currently we return nullptr which just causes SIGSEGV.
Signed-off-by: Benety Goh <benety@mongodb.com>
(cherry picked from commit e54a9667115b91aab0b815abe7362f197a79a52d)
-rw-r--r-- | src/mongo/db/storage/rocks/rocks_engine.cpp | 6 | ||||
-rw-r--r-- | src/mongo/db/storage/rocks/rocks_engine.h | 5 | ||||
-rw-r--r-- | src/mongo/db/storage/rocks/rocks_index.cpp | 27 | ||||
-rw-r--r-- | src/mongo/db/storage/rocks/rocks_index_test.cpp | 8 | ||||
-rw-r--r-- | src/mongo/db/storage/rocks/rocks_record_store.cpp | 9 | ||||
-rw-r--r-- | src/mongo/db/storage/rocks/rocks_record_store_test.cpp | 26 | ||||
-rw-r--r-- | src/mongo/db/storage/rocks/rocks_recovery_unit.cpp | 3 |
7 files changed, 28 insertions, 56 deletions
diff --git a/src/mongo/db/storage/rocks/rocks_engine.cpp b/src/mongo/db/storage/rocks/rocks_engine.cpp index 0e2cc8d2495..1781b7d89ee 100644 --- a/src/mongo/db/storage/rocks/rocks_engine.cpp +++ b/src/mongo/db/storage/rocks/rocks_engine.cpp @@ -207,12 +207,6 @@ namespace mongo { // non public api - rocksdb::ReadOptions RocksEngine::readOptionsWithSnapshot( OperationContext* opCtx ) { - rocksdb::ReadOptions options; - options.snapshot = dynamic_cast<RocksRecoveryUnit*>( opCtx->recoveryUnit() )->snapshot(); - return options; - } - bool RocksEngine::_existsColumnFamily(const StringData& ident) { boost::mutex::scoped_lock lk(_identColumnFamilyMapMutex); return _identColumnFamilyMap.find(ident) != _identColumnFamilyMap.end(); diff --git a/src/mongo/db/storage/rocks/rocks_engine.h b/src/mongo/db/storage/rocks/rocks_engine.h index 6c42ca4fac0..8f344eea60a 100644 --- a/src/mongo/db/storage/rocks/rocks_engine.h +++ b/src/mongo/db/storage/rocks/rocks_engine.h @@ -121,11 +121,6 @@ namespace mongo { rocksdb::DB* getDB() { return _db.get(); } const rocksdb::DB* getDB() const { return _db.get(); } - /** - * Returns a ReadOptions object that uses the snapshot contained in opCtx - */ - static rocksdb::ReadOptions readOptionsWithSnapshot( OperationContext* opCtx ); - private: bool _existsColumnFamily(const StringData& ident); Status _createColumnFamily(const rocksdb::ColumnFamilyOptions& options, diff --git a/src/mongo/db/storage/rocks/rocks_index.cpp b/src/mongo/db/storage/rocks/rocks_index.cpp index 1c6f9b506a4..2e7c0187efa 100644 --- a/src/mongo/db/storage/rocks/rocks_index.cpp +++ b/src/mongo/db/storage/rocks/rocks_index.cpp @@ -43,6 +43,7 @@ #include <rocksdb/iterator.h> #include <rocksdb/utilities/write_batch_with_index.h> +#include "mongo/base/checked_cast.h" #include "mongo/bson/bsonobjbuilder.h" #include "mongo/db/concurrency/write_conflict_exception.h" #include "mongo/db/storage/index_entry_comparison.h" @@ -120,22 +121,21 @@ namespace mongo { * All EOF locs should be considered equal. */ bool pointsToSamePlaceAs(const Cursor& genOther) const { - const RocksCursorBase& other = dynamic_cast<const RocksCursorBase&>(genOther); + const RocksCursorBase& other = checked_cast<const RocksCursorBase&>(genOther); if (isEOF() && other.isEOF()) { return true; } else if (isEOF() || other.isEOF()) { return false; } - if (getRecordId() != other.getRecordId()) { + + if (_iterator->key() != other._iterator->key()) { return false; } - loadKeyIfNeeded(); - other.loadKeyIfNeeded(); - - return _key.getSize() == other._key.getSize() && - memcmp(_key.getBuffer(), other._key.getBuffer(), _key.getSize()) == 0; + // even if keys are equal, record IDs might be different (for unique indexes, since + // in non-unique indexes RecordID is already encoded in the key) + return getRecordId() == other.getRecordId(); } bool locate(const BSONObj& key, const RecordId& loc) { @@ -580,17 +580,8 @@ namespace mongo { KeyString encodedKey(key, _order); rocksdb::Slice keySlice(encodedKey.getBuffer(), encodedKey.getSize()); - bool conflict = !ru->transaction()->registerWrite(_getTransactionID(encodedKey)); - if (conflict) { - if (!dupsAllowed) { - // if there is a conflict on a unique key, it means there is a dup key - // even if someone else is deleting at the same time, its ok to fail this - // insert as a dup key as it a race - return Status(ErrorCodes::DuplicateKey, dupKeyError(key)); - } else { - // if dups are allowed, we just throw exception on conflict - throw WriteConflictException(); - } + if (!ru->transaction()->registerWrite(_getTransactionID(encodedKey))) { + throw WriteConflictException(); } std::string currentValue; diff --git a/src/mongo/db/storage/rocks/rocks_index_test.cpp b/src/mongo/db/storage/rocks/rocks_index_test.cpp index 2b4d4a510ab..5b1c10b6aef 100644 --- a/src/mongo/db/storage/rocks/rocks_index_test.cpp +++ b/src/mongo/db/storage/rocks/rocks_index_test.cpp @@ -127,8 +127,8 @@ namespace mongo { ASSERT_OK(sorted->insert(t1.get(), key3, loc3, false)); ASSERT_OK(sorted->insert(t2.get(), key4, loc4, false)); - // this should return duplicate key - ASSERT_NOT_OK(sorted->insert(t2.get(), key3, loc5, false)); + // this should throw + ASSERT_THROWS(sorted->insert(t2.get(), key3, loc5, false), WriteConflictException); w1->commit(); // this should succeed } @@ -152,8 +152,8 @@ namespace mongo { w1->commit(); } - // this should return duplicate key - ASSERT_NOT_OK(sorted->insert(t2.get(), key5, loc3, false)); + // this should throw + ASSERT_THROWS(sorted->insert(t2.get(), key5, loc3, false), WriteConflictException); } } } diff --git a/src/mongo/db/storage/rocks/rocks_record_store.cpp b/src/mongo/db/storage/rocks/rocks_record_store.cpp index 36218732db2..e4dedc742f7 100644 --- a/src/mongo/db/storage/rocks/rocks_record_store.cpp +++ b/src/mongo/db/storage/rocks/rocks_record_store.cpp @@ -44,6 +44,7 @@ #include <rocksdb/slice.h> #include <rocksdb/utilities/write_batch_with_index.h> +#include "mongo/base/checked_cast.h" #include "mongo/bson/bsonobjbuilder.h" #include "mongo/db/concurrency/write_conflict_exception.h" #include "mongo/db/namespace_string.h" @@ -222,8 +223,10 @@ namespace mongo { return static_cast<int64_t>( storageSize ); } - RecordData RocksRecordStore::dataFor( OperationContext* txn, const RecordId& loc) const { - return _getDataFor(_db, _columnFamily.get(), txn, loc); + RecordData RocksRecordStore::dataFor(OperationContext* txn, const RecordId& loc) const { + RecordData rd = _getDataFor(_db, _columnFamily.get(), txn, loc); + massert(28605, "Didn't find RecordId in RocksRecordStore", (rd.data() != nullptr)); + return rd; } void RocksRecordStore::deleteRecord( OperationContext* txn, const RecordId& dl ) { @@ -295,7 +298,7 @@ namespace mongo { // we do this is a sub transaction in case it aborts RocksRecoveryUnit* realRecoveryUnit = - dynamic_cast<RocksRecoveryUnit*>(txn->releaseRecoveryUnit()); + checked_cast<RocksRecoveryUnit*>(txn->releaseRecoveryUnit()); invariant(realRecoveryUnit); txn->setRecoveryUnit(realRecoveryUnit->newRocksRecoveryUnit()); diff --git a/src/mongo/db/storage/rocks/rocks_record_store_test.cpp b/src/mongo/db/storage/rocks/rocks_record_store_test.cpp index 07fbb200858..1a52b18a9b2 100644 --- a/src/mongo/db/storage/rocks/rocks_record_store_test.cpp +++ b/src/mongo/db/storage/rocks/rocks_record_store_test.cpp @@ -136,15 +136,9 @@ namespace mongo { ASSERT_OK( rs->updateRecord( t1.get(), loc1, "b", 2, false, NULL ).getStatus() ); ASSERT_OK( rs->updateRecord( t1.get(), loc2, "B", 2, false, NULL ).getStatus() ); - try { - // this should fail - rs->updateRecord( t2.get(), loc1, "c", 2, false, NULL ); - ASSERT( 0 ); - } - catch ( WriteConflictException& dle ) { - w2.reset( NULL ); - t2.reset( NULL ); - } + // this should throw + ASSERT_THROWS(rs->updateRecord(t2.get(), loc1, "c", 2, false, NULL), + WriteConflictException); w1->commit(); // this should succeed } @@ -190,17 +184,11 @@ namespace mongo { { WriteUnitOfWork w( t2.get() ); - ASSERT_EQUALS( string("a"), rs->dataFor( t2.get(), loc1 ).data() ); - try { - // this should fail as our version of loc1 is too old - rs->updateRecord( t2.get(), loc1, "c", 2, false, NULL ); - ASSERT( 0 ); - } - catch ( WriteConflictException& dle ) { - } - + ASSERT_EQUALS(string("a"), rs->dataFor(t2.get(), loc1).data()); + // this should fail as our version of loc1 is too old + ASSERT_THROWS(rs->updateRecord(t2.get(), loc1, "c", 2, false, NULL), + WriteConflictException); } - } } diff --git a/src/mongo/db/storage/rocks/rocks_recovery_unit.cpp b/src/mongo/db/storage/rocks/rocks_recovery_unit.cpp index d857c1dcf7e..eecf0da7957 100644 --- a/src/mongo/db/storage/rocks/rocks_recovery_unit.cpp +++ b/src/mongo/db/storage/rocks/rocks_recovery_unit.cpp @@ -40,6 +40,7 @@ #include <rocksdb/write_batch.h> #include <rocksdb/utilities/write_batch_with_index.h> +#include "mongo/base/checked_cast.h" #include "mongo/db/concurrency/write_conflict_exception.h" #include "mongo/db/operation_context.h" #include "mongo/db/storage/rocks/rocks_transaction.h" @@ -234,7 +235,7 @@ namespace mongo { } RocksRecoveryUnit* RocksRecoveryUnit::getRocksRecoveryUnit(OperationContext* opCtx) { - return dynamic_cast<RocksRecoveryUnit*>(opCtx->recoveryUnit()); + return checked_cast<RocksRecoveryUnit*>(opCtx->recoveryUnit()); } } |