summaryrefslogtreecommitdiff
path: root/src/mongo/db/storage
diff options
context:
space:
mode:
authorMark Benvenuto <mark.benvenuto@mongodb.com>2015-06-20 00:22:50 -0400
committerMark Benvenuto <mark.benvenuto@mongodb.com>2015-06-20 10:56:02 -0400
commit9c2ed42daa8fbbef4a919c21ec564e2db55e8d60 (patch)
tree3814f79c10d7b490948d8cb7b112ac1dd41ceff1 /src/mongo/db/storage
parent01965cf52bce6976637ecb8f4a622aeb05ab256a (diff)
downloadmongo-9c2ed42daa8fbbef4a919c21ec564e2db55e8d60.tar.gz
SERVER-18579: Clang-Format - reformat code, no comment reflow
Diffstat (limited to 'src/mongo/db/storage')
-rw-r--r--src/mongo/db/storage/bson_collection_catalog_entry.cpp248
-rw-r--r--src/mongo/db/storage/bson_collection_catalog_entry.h97
-rw-r--r--src/mongo/db/storage/capped_callback.h37
-rw-r--r--src/mongo/db/storage/devnull/devnull_init.cpp55
-rw-r--r--src/mongo/db/storage/devnull/devnull_kv_engine.cpp384
-rw-r--r--src/mongo/db/storage/devnull/devnull_kv_engine.h135
-rw-r--r--src/mongo/db/storage/in_memory/in_memory_btree_impl.cpp726
-rw-r--r--src/mongo/db/storage/in_memory/in_memory_btree_impl.h16
-rw-r--r--src/mongo/db/storage/in_memory/in_memory_btree_impl_test.cpp35
-rw-r--r--src/mongo/db/storage/in_memory/in_memory_engine.cpp108
-rw-r--r--src/mongo/db/storage/in_memory/in_memory_engine.h86
-rw-r--r--src/mongo/db/storage/in_memory/in_memory_engine_test.cpp34
-rw-r--r--src/mongo/db/storage/in_memory/in_memory_init.cpp58
-rw-r--r--src/mongo/db/storage/in_memory/in_memory_record_store.cpp963
-rw-r--r--src/mongo/db/storage/in_memory/in_memory_record_store.h253
-rw-r--r--src/mongo/db/storage/in_memory/in_memory_record_store_test.cpp30
-rw-r--r--src/mongo/db/storage/in_memory/in_memory_recovery_unit.cpp40
-rw-r--r--src/mongo/db/storage/in_memory/in_memory_recovery_unit.h50
-rw-r--r--src/mongo/db/storage/index_entry_comparison.cpp234
-rw-r--r--src/mongo/db/storage/index_entry_comparison.h302
-rw-r--r--src/mongo/db/storage/key_string.cpp1979
-rw-r--r--src/mongo/db/storage/key_string.h500
-rw-r--r--src/mongo/db/storage/key_string_test.cpp223
-rw-r--r--src/mongo/db/storage/kv/kv_catalog.cpp621
-rw-r--r--src/mongo/db/storage/kv/kv_catalog.h164
-rw-r--r--src/mongo/db/storage/kv/kv_collection_catalog_entry.cpp250
-rw-r--r--src/mongo/db/storage/kv/kv_collection_catalog_entry.h79
-rw-r--r--src/mongo/db/storage/kv/kv_database_catalog_entry.cpp498
-rw-r--r--src/mongo/db/storage/kv/kv_database_catalog_entry.h87
-rw-r--r--src/mongo/db/storage/kv/kv_database_catalog_entry_get_index.cpp49
-rw-r--r--src/mongo/db/storage/kv/kv_database_catalog_entry_get_index_mock.cpp13
-rw-r--r--src/mongo/db/storage/kv/kv_database_catalog_entry_test.cpp94
-rw-r--r--src/mongo/db/storage/kv/kv_engine.h193
-rw-r--r--src/mongo/db/storage/kv/kv_engine_test_harness.cpp622
-rw-r--r--src/mongo/db/storage/kv/kv_engine_test_harness.h16
-rw-r--r--src/mongo/db/storage/kv/kv_storage_engine.cpp364
-rw-r--r--src/mongo/db/storage/kv/kv_storage_engine.h106
-rw-r--r--src/mongo/db/storage/mmap_v1/aligned_builder.cpp207
-rw-r--r--src/mongo/db/storage/mmap_v1/aligned_builder.h211
-rw-r--r--src/mongo/db/storage/mmap_v1/btree/btree_interface.cpp545
-rw-r--r--src/mongo/db/storage/mmap_v1/btree/btree_interface.h14
-rw-r--r--src/mongo/db/storage/mmap_v1/btree/btree_interface_test.cpp54
-rw-r--r--src/mongo/db/storage/mmap_v1/btree/btree_logic.cpp4066
-rw-r--r--src/mongo/db/storage/mmap_v1/btree/btree_logic.h823
-rw-r--r--src/mongo/db/storage/mmap_v1/btree/btree_logic_test.cpp3986
-rw-r--r--src/mongo/db/storage/mmap_v1/btree/btree_ondisk.cpp32
-rw-r--r--src/mongo/db/storage/mmap_v1/btree/btree_ondisk.h519
-rw-r--r--src/mongo/db/storage/mmap_v1/btree/btree_test_help.cpp343
-rw-r--r--src/mongo/db/storage/mmap_v1/btree/btree_test_help.h196
-rw-r--r--src/mongo/db/storage/mmap_v1/btree/key.cpp1040
-rw-r--r--src/mongo/db/storage/mmap_v1/btree/key.h213
-rw-r--r--src/mongo/db/storage/mmap_v1/catalog/hashtab.cpp82
-rw-r--r--src/mongo/db/storage/mmap_v1/catalog/hashtab.h158
-rw-r--r--src/mongo/db/storage/mmap_v1/catalog/index_details.cpp9
-rw-r--r--src/mongo/db/storage/mmap_v1/catalog/index_details.h51
-rw-r--r--src/mongo/db/storage/mmap_v1/catalog/namespace-inl.h69
-rw-r--r--src/mongo/db/storage/mmap_v1/catalog/namespace.cpp17
-rw-r--r--src/mongo/db/storage/mmap_v1/catalog/namespace.h116
-rw-r--r--src/mongo/db/storage/mmap_v1/catalog/namespace_details.cpp344
-rw-r--r--src/mongo/db/storage/mmap_v1/catalog/namespace_details.h346
-rw-r--r--src/mongo/db/storage/mmap_v1/catalog/namespace_details_collection_entry.cpp553
-rw-r--r--src/mongo/db/storage/mmap_v1/catalog/namespace_details_collection_entry.h101
-rw-r--r--src/mongo/db/storage/mmap_v1/catalog/namespace_details_rsv1_metadata.cpp258
-rw-r--r--src/mongo/db/storage/mmap_v1/catalog/namespace_details_rsv1_metadata.h91
-rw-r--r--src/mongo/db/storage/mmap_v1/catalog/namespace_index.cpp305
-rw-r--r--src/mongo/db/storage/mmap_v1/catalog/namespace_index.h70
-rw-r--r--src/mongo/db/storage/mmap_v1/catalog/namespace_test.cpp49
-rw-r--r--src/mongo/db/storage/mmap_v1/compress.cpp36
-rw-r--r--src/mongo/db/storage/mmap_v1/compress.h19
-rw-r--r--src/mongo/db/storage/mmap_v1/data_file.cpp323
-rw-r--r--src/mongo/db/storage/mmap_v1/data_file.h273
-rw-r--r--src/mongo/db/storage/mmap_v1/data_file_sync.cpp141
-rw-r--r--src/mongo/db/storage/mmap_v1/data_file_sync.h42
-rw-r--r--src/mongo/db/storage/mmap_v1/diskloc.h271
-rw-r--r--src/mongo/db/storage/mmap_v1/dur.cpp1272
-rw-r--r--src/mongo/db/storage/mmap_v1/dur.h188
-rw-r--r--src/mongo/db/storage/mmap_v1/dur_commitjob.cpp107
-rw-r--r--src/mongo/db/storage/mmap_v1/dur_commitjob.h304
-rw-r--r--src/mongo/db/storage/mmap_v1/dur_journal.cpp1263
-rw-r--r--src/mongo/db/storage/mmap_v1/dur_journal.h85
-rw-r--r--src/mongo/db/storage/mmap_v1/dur_journal_writer.cpp419
-rw-r--r--src/mongo/db/storage/mmap_v1/dur_journal_writer.h264
-rw-r--r--src/mongo/db/storage/mmap_v1/dur_journalformat.h304
-rw-r--r--src/mongo/db/storage/mmap_v1/dur_journalimpl.h136
-rw-r--r--src/mongo/db/storage/mmap_v1/dur_preplogbuffer.cpp255
-rw-r--r--src/mongo/db/storage/mmap_v1/dur_recover.cpp1019
-rw-r--r--src/mongo/db/storage/mmap_v1/dur_recover.h88
-rw-r--r--src/mongo/db/storage/mmap_v1/dur_recovery_unit.cpp432
-rw-r--r--src/mongo/db/storage/mmap_v1/dur_recovery_unit.h236
-rw-r--r--src/mongo/db/storage/mmap_v1/dur_stats.h85
-rw-r--r--src/mongo/db/storage/mmap_v1/durable_mapped_file.cpp408
-rw-r--r--src/mongo/db/storage/mmap_v1/durable_mapped_file.h361
-rw-r--r--src/mongo/db/storage/mmap_v1/durop.cpp233
-rw-r--r--src/mongo/db/storage/mmap_v1/durop.h165
-rw-r--r--src/mongo/db/storage/mmap_v1/extent.cpp124
-rw-r--r--src/mongo/db/storage/mmap_v1/extent.h63
-rw-r--r--src/mongo/db/storage/mmap_v1/extent_manager.cpp96
-rw-r--r--src/mongo/db/storage/mmap_v1/extent_manager.h260
-rw-r--r--src/mongo/db/storage/mmap_v1/file_allocator.cpp644
-rw-r--r--src/mongo/db/storage/mmap_v1/file_allocator.h100
-rw-r--r--src/mongo/db/storage/mmap_v1/heap_record_store_btree.cpp208
-rw-r--r--src/mongo/db/storage/mmap_v1/heap_record_store_btree.h326
-rw-r--r--src/mongo/db/storage/mmap_v1/journal_latency_test_cmd.cpp176
-rw-r--r--src/mongo/db/storage/mmap_v1/logfile.cpp294
-rw-r--r--src/mongo/db/storage/mmap_v1/logfile.h55
-rw-r--r--src/mongo/db/storage/mmap_v1/mmap.cpp363
-rw-r--r--src/mongo/db/storage/mmap_v1/mmap.h392
-rw-r--r--src/mongo/db/storage/mmap_v1/mmap_posix.cpp394
-rw-r--r--src/mongo/db/storage/mmap_v1/mmap_v1_database_catalog_entry.cpp1274
-rw-r--r--src/mongo/db/storage/mmap_v1/mmap_v1_database_catalog_entry.h261
-rw-r--r--src/mongo/db/storage/mmap_v1/mmap_v1_engine.cpp480
-rw-r--r--src/mongo/db/storage/mmap_v1/mmap_v1_engine.h98
-rw-r--r--src/mongo/db/storage/mmap_v1/mmap_v1_extent_manager.cpp980
-rw-r--r--src/mongo/db/storage/mmap_v1/mmap_v1_extent_manager.h322
-rw-r--r--src/mongo/db/storage/mmap_v1/mmap_v1_init.cpp66
-rw-r--r--src/mongo/db/storage/mmap_v1/mmap_v1_init_test.cpp158
-rw-r--r--src/mongo/db/storage/mmap_v1/mmap_v1_options.cpp110
-rw-r--r--src/mongo/db/storage/mmap_v1/mmap_v1_options.h97
-rw-r--r--src/mongo/db/storage/mmap_v1/mmap_v1_record_store_test.cpp77
-rw-r--r--src/mongo/db/storage/mmap_v1/mmap_windows.cpp762
-rw-r--r--src/mongo/db/storage/mmap_v1/record.h211
-rw-r--r--src/mongo/db/storage/mmap_v1/record_access_tracker.cpp497
-rw-r--r--src/mongo/db/storage/mmap_v1/record_access_tracker.h199
-rw-r--r--src/mongo/db/storage/mmap_v1/record_access_tracker_test.cpp198
-rw-r--r--src/mongo/db/storage/mmap_v1/record_store_v1_base.cpp1517
-rw-r--r--src/mongo/db/storage/mmap_v1/record_store_v1_base.h471
-rw-r--r--src/mongo/db/storage/mmap_v1/record_store_v1_capped.cpp1068
-rw-r--r--src/mongo/db/storage/mmap_v1/record_store_v1_capped.h179
-rw-r--r--src/mongo/db/storage/mmap_v1/record_store_v1_capped_iterator.cpp290
-rw-r--r--src/mongo/db/storage/mmap_v1/record_store_v1_capped_iterator.h100
-rw-r--r--src/mongo/db/storage/mmap_v1/record_store_v1_capped_test.cpp1274
-rw-r--r--src/mongo/db/storage/mmap_v1/record_store_v1_repair_iterator.cpp234
-rw-r--r--src/mongo/db/storage/mmap_v1/record_store_v1_repair_iterator.h109
-rw-r--r--src/mongo/db/storage/mmap_v1/record_store_v1_simple.cpp720
-rw-r--r--src/mongo/db/storage/mmap_v1/record_store_v1_simple.h127
-rw-r--r--src/mongo/db/storage/mmap_v1/record_store_v1_simple_iterator.cpp156
-rw-r--r--src/mongo/db/storage/mmap_v1/record_store_v1_simple_iterator.h60
-rw-r--r--src/mongo/db/storage/mmap_v1/record_store_v1_simple_test.cpp786
-rw-r--r--src/mongo/db/storage/mmap_v1/record_store_v1_test_help.cpp1016
-rw-r--r--src/mongo/db/storage/mmap_v1/record_store_v1_test_help.h243
-rw-r--r--src/mongo/db/storage/mmap_v1/repair_database.cpp700
-rw-r--r--src/mongo/db/storage/oplog_hack.cpp58
-rw-r--r--src/mongo/db/storage/oplog_hack.h22
-rw-r--r--src/mongo/db/storage/paths.cpp122
-rw-r--r--src/mongo/db/storage/paths.h99
-rw-r--r--src/mongo/db/storage/record_data.h113
-rw-r--r--src/mongo/db/storage/record_fetcher.h38
-rw-r--r--src/mongo/db/storage/record_store.h935
-rw-r--r--src/mongo/db/storage/record_store_test_datafor.cpp144
-rw-r--r--src/mongo/db/storage/record_store_test_datasize.cpp90
-rw-r--r--src/mongo/db/storage/record_store_test_deleterecord.cpp150
-rw-r--r--src/mongo/db/storage/record_store_test_docwriter.h36
-rw-r--r--src/mongo/db/storage/record_store_test_harness.cpp588
-rw-r--r--src/mongo/db/storage/record_store_test_harness.h32
-rw-r--r--src/mongo/db/storage/record_store_test_insertrecord.cpp218
-rw-r--r--src/mongo/db/storage/record_store_test_manyiter.cpp108
-rw-r--r--src/mongo/db/storage/record_store_test_recorditer.cpp572
-rw-r--r--src/mongo/db/storage/record_store_test_recordstore.cpp46
-rw-r--r--src/mongo/db/storage/record_store_test_repairiter.cpp204
-rw-r--r--src/mongo/db/storage/record_store_test_storagesize.cpp62
-rw-r--r--src/mongo/db/storage/record_store_test_touch.cpp202
-rw-r--r--src/mongo/db/storage/record_store_test_truncate.cpp116
-rw-r--r--src/mongo/db/storage/record_store_test_updaterecord.cpp304
-rw-r--r--src/mongo/db/storage/record_store_test_updaterecord.h77
-rw-r--r--src/mongo/db/storage/record_store_test_updatewithdamages.cpp372
-rw-r--r--src/mongo/db/storage/record_store_test_validate.cpp352
-rw-r--r--src/mongo/db/storage/record_store_test_validate.h122
-rw-r--r--src/mongo/db/storage/recovery_unit.h249
-rw-r--r--src/mongo/db/storage/recovery_unit_noop.h74
-rw-r--r--src/mongo/db/storage/snapshot.h108
-rw-r--r--src/mongo/db/storage/sorted_data_interface.h549
-rw-r--r--src/mongo/db/storage/sorted_data_interface_test_bulkbuilder.cpp324
-rw-r--r--src/mongo/db/storage/sorted_data_interface_test_cursor.cpp198
-rw-r--r--src/mongo/db/storage/sorted_data_interface_test_cursor_advanceto.cpp966
-rw-r--r--src/mongo/db/storage/sorted_data_interface_test_cursor_end_position.cpp728
-rw-r--r--src/mongo/db/storage/sorted_data_interface_test_cursor_locate.cpp945
-rw-r--r--src/mongo/db/storage/sorted_data_interface_test_cursor_saverestore.cpp843
-rw-r--r--src/mongo/db/storage/sorted_data_interface_test_cursor_seek_exact.cpp194
-rw-r--r--src/mongo/db/storage/sorted_data_interface_test_dupkeycheck.cpp188
-rw-r--r--src/mongo/db/storage/sorted_data_interface_test_fullvalidate.cpp62
-rw-r--r--src/mongo/db/storage/sorted_data_interface_test_harness.cpp789
-rw-r--r--src/mongo/db/storage/sorted_data_interface_test_harness.h169
-rw-r--r--src/mongo/db/storage/sorted_data_interface_test_insert.cpp476
-rw-r--r--src/mongo/db/storage/sorted_data_interface_test_isempty.cpp64
-rw-r--r--src/mongo/db/storage/sorted_data_interface_test_rollback.cpp178
-rw-r--r--src/mongo/db/storage/sorted_data_interface_test_spaceused.cpp106
-rw-r--r--src/mongo/db/storage/sorted_data_interface_test_touch.cpp74
-rw-r--r--src/mongo/db/storage/sorted_data_interface_test_unindex.cpp362
-rw-r--r--src/mongo/db/storage/storage_engine.h289
-rw-r--r--src/mongo/db/storage/storage_engine_lock_file.h110
-rw-r--r--src/mongo/db/storage/storage_engine_lock_file_posix.cpp247
-rw-r--r--src/mongo/db/storage/storage_engine_lock_file_test.cpp244
-rw-r--r--src/mongo/db/storage/storage_engine_lock_file_windows.cpp250
-rw-r--r--src/mongo/db/storage/storage_engine_metadata.cpp339
-rw-r--r--src/mongo/db/storage/storage_engine_metadata.h151
-rw-r--r--src/mongo/db/storage/storage_engine_metadata_test.cpp436
-rw-r--r--src/mongo/db/storage/storage_init.cpp23
-rw-r--r--src/mongo/db/storage/wiredtiger/wiredtiger_customization_hooks.cpp54
-rw-r--r--src/mongo/db/storage/wiredtiger/wiredtiger_customization_hooks.h53
-rw-r--r--src/mongo/db/storage/wiredtiger/wiredtiger_global_options.cpp232
-rw-r--r--src/mongo/db/storage/wiredtiger/wiredtiger_global_options.h54
-rw-r--r--src/mongo/db/storage/wiredtiger/wiredtiger_index.cpp1851
-rw-r--r--src/mongo/db/storage/wiredtiger/wiredtiger_index.h278
-rw-r--r--src/mongo/db/storage/wiredtiger/wiredtiger_index_test.cpp153
-rw-r--r--src/mongo/db/storage/wiredtiger/wiredtiger_init.cpp129
-rw-r--r--src/mongo/db/storage/wiredtiger/wiredtiger_init_test.cpp221
-rw-r--r--src/mongo/db/storage/wiredtiger/wiredtiger_kv_engine.cpp677
-rw-r--r--src/mongo/db/storage/wiredtiger/wiredtiger_kv_engine.h159
-rw-r--r--src/mongo/db/storage/wiredtiger/wiredtiger_kv_engine_test.cpp45
-rw-r--r--src/mongo/db/storage/wiredtiger/wiredtiger_options_init.cpp29
-rw-r--r--src/mongo/db/storage/wiredtiger/wiredtiger_parameters.cpp34
-rw-r--r--src/mongo/db/storage/wiredtiger/wiredtiger_parameters.h28
-rw-r--r--src/mongo/db/storage/wiredtiger/wiredtiger_record_store.cpp1882
-rw-r--r--src/mongo/db/storage/wiredtiger/wiredtiger_record_store.h426
-rw-r--r--src/mongo/db/storage/wiredtiger/wiredtiger_record_store_mock.cpp16
-rw-r--r--src/mongo/db/storage/wiredtiger/wiredtiger_record_store_mongod.cpp200
-rw-r--r--src/mongo/db/storage/wiredtiger/wiredtiger_record_store_test.cpp1310
-rw-r--r--src/mongo/db/storage/wiredtiger/wiredtiger_recovery_unit.cpp576
-rw-r--r--src/mongo/db/storage/wiredtiger/wiredtiger_recovery_unit.h191
-rw-r--r--src/mongo/db/storage/wiredtiger/wiredtiger_server_status.cpp61
-rw-r--r--src/mongo/db/storage/wiredtiger/wiredtiger_server_status.h28
-rw-r--r--src/mongo/db/storage/wiredtiger/wiredtiger_session_cache.cpp300
-rw-r--r--src/mongo/db/storage/wiredtiger/wiredtiger_session_cache.h167
-rw-r--r--src/mongo/db/storage/wiredtiger/wiredtiger_size_storer.cpp310
-rw-r--r--src/mongo/db/storage/wiredtiger/wiredtiger_size_storer.h74
-rw-r--r--src/mongo/db/storage/wiredtiger/wiredtiger_util.cpp660
-rw-r--r--src/mongo/db/storage/wiredtiger/wiredtiger_util.h422
-rw-r--r--src/mongo/db/storage/wiredtiger/wiredtiger_util_test.cpp537
228 files changed, 38701 insertions, 39054 deletions
diff --git a/src/mongo/db/storage/bson_collection_catalog_entry.cpp b/src/mongo/db/storage/bson_collection_catalog_entry.cpp
index e1c7e527d69..976636b0bc6 100644
--- a/src/mongo/db/storage/bson_collection_catalog_entry.cpp
+++ b/src/mongo/db/storage/bson_collection_catalog_entry.cpp
@@ -32,169 +32,165 @@
namespace mongo {
- BSONCollectionCatalogEntry::BSONCollectionCatalogEntry( StringData ns )
- : CollectionCatalogEntry( ns ) {
- }
+BSONCollectionCatalogEntry::BSONCollectionCatalogEntry(StringData ns)
+ : CollectionCatalogEntry(ns) {}
- CollectionOptions BSONCollectionCatalogEntry::getCollectionOptions( OperationContext* txn ) const {
- MetaData md = _getMetaData( txn );
- return md.options;
- }
+CollectionOptions BSONCollectionCatalogEntry::getCollectionOptions(OperationContext* txn) const {
+ MetaData md = _getMetaData(txn);
+ return md.options;
+}
- int BSONCollectionCatalogEntry::getTotalIndexCount( OperationContext* txn ) const {
- MetaData md = _getMetaData( txn );
+int BSONCollectionCatalogEntry::getTotalIndexCount(OperationContext* txn) const {
+ MetaData md = _getMetaData(txn);
- return static_cast<int>( md.indexes.size() );
- }
+ return static_cast<int>(md.indexes.size());
+}
- int BSONCollectionCatalogEntry::getCompletedIndexCount( OperationContext* txn ) const {
- MetaData md = _getMetaData( txn );
+int BSONCollectionCatalogEntry::getCompletedIndexCount(OperationContext* txn) const {
+ MetaData md = _getMetaData(txn);
- int num = 0;
- for ( unsigned i = 0; i < md.indexes.size(); i++ ) {
- if ( md.indexes[i].ready )
- num++;
- }
- return num;
+ int num = 0;
+ for (unsigned i = 0; i < md.indexes.size(); i++) {
+ if (md.indexes[i].ready)
+ num++;
}
+ return num;
+}
- BSONObj BSONCollectionCatalogEntry::getIndexSpec( OperationContext* txn,
- StringData indexName ) const {
- MetaData md = _getMetaData( txn );
+BSONObj BSONCollectionCatalogEntry::getIndexSpec(OperationContext* txn,
+ StringData indexName) const {
+ MetaData md = _getMetaData(txn);
- int offset = md.findIndexOffset( indexName );
- invariant( offset >= 0 );
- return md.indexes[offset].spec.getOwned();
- }
+ int offset = md.findIndexOffset(indexName);
+ invariant(offset >= 0);
+ return md.indexes[offset].spec.getOwned();
+}
- void BSONCollectionCatalogEntry::getAllIndexes( OperationContext* txn,
- std::vector<std::string>* names ) const {
- MetaData md = _getMetaData( txn );
+void BSONCollectionCatalogEntry::getAllIndexes(OperationContext* txn,
+ std::vector<std::string>* names) const {
+ MetaData md = _getMetaData(txn);
- for ( unsigned i = 0; i < md.indexes.size(); i++ ) {
- names->push_back( md.indexes[i].spec["name"].String() );
- }
+ for (unsigned i = 0; i < md.indexes.size(); i++) {
+ names->push_back(md.indexes[i].spec["name"].String());
}
+}
- bool BSONCollectionCatalogEntry::isIndexMultikey( OperationContext* txn,
- StringData indexName) const {
- MetaData md = _getMetaData( txn );
+bool BSONCollectionCatalogEntry::isIndexMultikey(OperationContext* txn,
+ StringData indexName) const {
+ MetaData md = _getMetaData(txn);
- int offset = md.findIndexOffset( indexName );
- invariant( offset >= 0 );
- return md.indexes[offset].multikey;
- }
+ int offset = md.findIndexOffset(indexName);
+ invariant(offset >= 0);
+ return md.indexes[offset].multikey;
+}
- RecordId BSONCollectionCatalogEntry::getIndexHead( OperationContext* txn,
- StringData indexName ) const {
- MetaData md = _getMetaData( txn );
+RecordId BSONCollectionCatalogEntry::getIndexHead(OperationContext* txn,
+ StringData indexName) const {
+ MetaData md = _getMetaData(txn);
- int offset = md.findIndexOffset( indexName );
- invariant( offset >= 0 );
- return md.indexes[offset].head;
- }
+ int offset = md.findIndexOffset(indexName);
+ invariant(offset >= 0);
+ return md.indexes[offset].head;
+}
- bool BSONCollectionCatalogEntry::isIndexReady( OperationContext* txn,
- StringData indexName ) const {
- MetaData md = _getMetaData( txn );
+bool BSONCollectionCatalogEntry::isIndexReady(OperationContext* txn, StringData indexName) const {
+ MetaData md = _getMetaData(txn);
- int offset = md.findIndexOffset( indexName );
- invariant( offset >= 0 );
- return md.indexes[offset].ready;
- }
+ int offset = md.findIndexOffset(indexName);
+ invariant(offset >= 0);
+ return md.indexes[offset].ready;
+}
- // --------------------------
+// --------------------------
- void BSONCollectionCatalogEntry::IndexMetaData::updateTTLSetting( long long newExpireSeconds ) {
- BSONObjBuilder b;
- for ( BSONObjIterator bi( spec ); bi.more(); ) {
- BSONElement e = bi.next();
- if ( e.fieldNameStringData() == "expireAfterSeconds" ) {
- continue;
- }
- b.append( e );
+void BSONCollectionCatalogEntry::IndexMetaData::updateTTLSetting(long long newExpireSeconds) {
+ BSONObjBuilder b;
+ for (BSONObjIterator bi(spec); bi.more();) {
+ BSONElement e = bi.next();
+ if (e.fieldNameStringData() == "expireAfterSeconds") {
+ continue;
}
-
- b.append( "expireAfterSeconds", newExpireSeconds );
- spec = b.obj();
+ b.append(e);
}
- // --------------------------
+ b.append("expireAfterSeconds", newExpireSeconds);
+ spec = b.obj();
+}
- int BSONCollectionCatalogEntry::MetaData::findIndexOffset( StringData name ) const {
- for ( unsigned i = 0; i < indexes.size(); i++ )
- if ( indexes[i].name() == name )
- return i;
- return -1;
- }
+// --------------------------
- bool BSONCollectionCatalogEntry::MetaData::eraseIndex( StringData name ) {
- int indexOffset = findIndexOffset( name );
+int BSONCollectionCatalogEntry::MetaData::findIndexOffset(StringData name) const {
+ for (unsigned i = 0; i < indexes.size(); i++)
+ if (indexes[i].name() == name)
+ return i;
+ return -1;
+}
- if ( indexOffset < 0 ) {
- return false;
- }
+bool BSONCollectionCatalogEntry::MetaData::eraseIndex(StringData name) {
+ int indexOffset = findIndexOffset(name);
- indexes.erase( indexes.begin() + indexOffset );
- return true;
+ if (indexOffset < 0) {
+ return false;
}
- void BSONCollectionCatalogEntry::MetaData::rename( StringData toNS ) {
- ns = toNS.toString();
- for ( size_t i = 0; i < indexes.size(); i++ ) {
- BSONObj spec = indexes[i].spec;
- BSONObjBuilder b;
- b.append( "ns", toNS );
- b.appendElementsUnique( spec );
- indexes[i].spec = b.obj();
- }
- }
+ indexes.erase(indexes.begin() + indexOffset);
+ return true;
+}
- BSONObj BSONCollectionCatalogEntry::MetaData::toBSON() const {
+void BSONCollectionCatalogEntry::MetaData::rename(StringData toNS) {
+ ns = toNS.toString();
+ for (size_t i = 0; i < indexes.size(); i++) {
+ BSONObj spec = indexes[i].spec;
BSONObjBuilder b;
- b.append( "ns", ns );
- b.append( "options", options.toBSON() );
- {
- BSONArrayBuilder arr( b.subarrayStart( "indexes" ) );
- for ( unsigned i = 0; i < indexes.size(); i++ ) {
- BSONObjBuilder sub( arr.subobjStart() );
- sub.append( "spec", indexes[i].spec );
- sub.appendBool( "ready", indexes[i].ready );
- sub.appendBool( "multikey", indexes[i].multikey );
- sub.append( "head", static_cast<long long>(indexes[i].head.repr()) );
- sub.done();
- }
- arr.done();
+ b.append("ns", toNS);
+ b.appendElementsUnique(spec);
+ indexes[i].spec = b.obj();
+ }
+}
+
+BSONObj BSONCollectionCatalogEntry::MetaData::toBSON() const {
+ BSONObjBuilder b;
+ b.append("ns", ns);
+ b.append("options", options.toBSON());
+ {
+ BSONArrayBuilder arr(b.subarrayStart("indexes"));
+ for (unsigned i = 0; i < indexes.size(); i++) {
+ BSONObjBuilder sub(arr.subobjStart());
+ sub.append("spec", indexes[i].spec);
+ sub.appendBool("ready", indexes[i].ready);
+ sub.appendBool("multikey", indexes[i].multikey);
+ sub.append("head", static_cast<long long>(indexes[i].head.repr()));
+ sub.done();
}
- return b.obj();
+ arr.done();
}
+ return b.obj();
+}
- void BSONCollectionCatalogEntry::MetaData::parse( const BSONObj& obj ) {
- ns = obj["ns"].valuestrsafe();
+void BSONCollectionCatalogEntry::MetaData::parse(const BSONObj& obj) {
+ ns = obj["ns"].valuestrsafe();
- if ( obj["options"].isABSONObj() ) {
- options.parse( obj["options"].Obj() );
- }
+ if (obj["options"].isABSONObj()) {
+ options.parse(obj["options"].Obj());
+ }
- BSONElement e = obj["indexes"];
- if ( e.isABSONObj() ) {
- std::vector<BSONElement> entries = e.Array();
- for ( unsigned i = 0; i < entries.size(); i++ ) {
- BSONObj idx = entries[i].Obj();
- IndexMetaData imd;
- imd.spec = idx["spec"].Obj().getOwned();
- imd.ready = idx["ready"].trueValue();
- if (idx.hasField("head")) {
- imd.head = RecordId(idx["head"].Long());
- }
- else {
- imd.head = RecordId( idx["head_a"].Int(),
- idx["head_b"].Int() );
- }
- imd.multikey = idx["multikey"].trueValue();
- indexes.push_back( imd );
+ BSONElement e = obj["indexes"];
+ if (e.isABSONObj()) {
+ std::vector<BSONElement> entries = e.Array();
+ for (unsigned i = 0; i < entries.size(); i++) {
+ BSONObj idx = entries[i].Obj();
+ IndexMetaData imd;
+ imd.spec = idx["spec"].Obj().getOwned();
+ imd.ready = idx["ready"].trueValue();
+ if (idx.hasField("head")) {
+ imd.head = RecordId(idx["head"].Long());
+ } else {
+ imd.head = RecordId(idx["head_a"].Int(), idx["head_b"].Int());
}
+ imd.multikey = idx["multikey"].trueValue();
+ indexes.push_back(imd);
}
}
}
+}
diff --git a/src/mongo/db/storage/bson_collection_catalog_entry.h b/src/mongo/db/storage/bson_collection_catalog_entry.h
index 1f40eea247c..179c64591db 100644
--- a/src/mongo/db/storage/bson_collection_catalog_entry.h
+++ b/src/mongo/db/storage/bson_collection_catalog_entry.h
@@ -37,76 +37,71 @@
namespace mongo {
- /**
- * This is a helper class for any storage engine that wants to store catalog information
- * as BSON. It is totally optional to use this.
- */
- class BSONCollectionCatalogEntry : public CollectionCatalogEntry {
- public:
- BSONCollectionCatalogEntry( StringData ns );
-
- virtual ~BSONCollectionCatalogEntry(){}
-
- virtual CollectionOptions getCollectionOptions( OperationContext* txn ) const;
+/**
+ * This is a helper class for any storage engine that wants to store catalog information
+ * as BSON. It is totally optional to use this.
+ */
+class BSONCollectionCatalogEntry : public CollectionCatalogEntry {
+public:
+ BSONCollectionCatalogEntry(StringData ns);
- virtual int getTotalIndexCount( OperationContext* txn ) const;
+ virtual ~BSONCollectionCatalogEntry() {}
- virtual int getCompletedIndexCount( OperationContext* txn ) const;
+ virtual CollectionOptions getCollectionOptions(OperationContext* txn) const;
- virtual BSONObj getIndexSpec( OperationContext* txn,
- StringData idxName ) const;
+ virtual int getTotalIndexCount(OperationContext* txn) const;
- virtual void getAllIndexes( OperationContext* txn,
- std::vector<std::string>* names ) const;
+ virtual int getCompletedIndexCount(OperationContext* txn) const;
- virtual bool isIndexMultikey( OperationContext* txn,
- StringData indexName) const;
+ virtual BSONObj getIndexSpec(OperationContext* txn, StringData idxName) const;
- virtual RecordId getIndexHead( OperationContext* txn,
- StringData indexName ) const;
+ virtual void getAllIndexes(OperationContext* txn, std::vector<std::string>* names) const;
- virtual bool isIndexReady( OperationContext* txn,
- StringData indexName ) const;
+ virtual bool isIndexMultikey(OperationContext* txn, StringData indexName) const;
- // ------ for implementors
+ virtual RecordId getIndexHead(OperationContext* txn, StringData indexName) const;
- struct IndexMetaData {
- IndexMetaData() {}
- IndexMetaData( BSONObj s, bool r, RecordId h, bool m )
- : spec( s ), ready( r ), head( h ), multikey( m ) {}
+ virtual bool isIndexReady(OperationContext* txn, StringData indexName) const;
- void updateTTLSetting( long long newExpireSeconds );
+ // ------ for implementors
- std::string name() const { return spec["name"].String(); }
+ struct IndexMetaData {
+ IndexMetaData() {}
+ IndexMetaData(BSONObj s, bool r, RecordId h, bool m)
+ : spec(s), ready(r), head(h), multikey(m) {}
- BSONObj spec;
- bool ready;
- RecordId head;
- bool multikey;
- };
+ void updateTTLSetting(long long newExpireSeconds);
- struct MetaData {
- void parse( const BSONObj& obj );
- BSONObj toBSON() const;
+ std::string name() const {
+ return spec["name"].String();
+ }
- int findIndexOffset( StringData name ) const;
+ BSONObj spec;
+ bool ready;
+ RecordId head;
+ bool multikey;
+ };
- /**
- * Removes information about an index from the MetaData. Returns true if an index
- * called name existed and was deleted, and false otherwise.
- */
- bool eraseIndex( StringData name );
+ struct MetaData {
+ void parse(const BSONObj& obj);
+ BSONObj toBSON() const;
- void rename( StringData toNS );
+ int findIndexOffset(StringData name) const;
- std::string ns;
- CollectionOptions options;
- std::vector<IndexMetaData> indexes;
- };
+ /**
+ * Removes information about an index from the MetaData. Returns true if an index
+ * called name existed and was deleted, and false otherwise.
+ */
+ bool eraseIndex(StringData name);
- protected:
- virtual MetaData _getMetaData( OperationContext* txn ) const = 0;
+ void rename(StringData toNS);
+ std::string ns;
+ CollectionOptions options;
+ std::vector<IndexMetaData> indexes;
};
+protected:
+ virtual MetaData _getMetaData(OperationContext* txn) const = 0;
+};
}
diff --git a/src/mongo/db/storage/capped_callback.h b/src/mongo/db/storage/capped_callback.h
index 0ee4511f66a..4aa040b27a2 100644
--- a/src/mongo/db/storage/capped_callback.h
+++ b/src/mongo/db/storage/capped_callback.h
@@ -34,26 +34,25 @@
namespace mongo {
- class OperationContext;
- class RecordData;
+class OperationContext;
+class RecordData;
+
+/**
+ * When a capped collection has to delete a document, it needs a way to tell the caller
+ * what its deleting so it can unindex or do any other cleanup.
+ * This is that way.
+ */
+class CappedDocumentDeleteCallback {
+public:
+ virtual ~CappedDocumentDeleteCallback() {}
/**
- * When a capped collection has to delete a document, it needs a way to tell the caller
- * what its deleting so it can unindex or do any other cleanup.
- * This is that way.
+ * This will be called right before loc is deleted when wrapping.
+ * If data is unowned, it is only valid inside of this call. If implementations wish to
+ * stash a pointer, they must copy it.
*/
- class CappedDocumentDeleteCallback {
- public:
- virtual ~CappedDocumentDeleteCallback(){}
-
- /**
- * This will be called right before loc is deleted when wrapping.
- * If data is unowned, it is only valid inside of this call. If implementations wish to
- * stash a pointer, they must copy it.
- */
- virtual Status aboutToDeleteCapped( OperationContext* txn,
- const RecordId& loc,
- RecordData data ) = 0;
- };
-
+ virtual Status aboutToDeleteCapped(OperationContext* txn,
+ const RecordId& loc,
+ RecordData data) = 0;
+};
}
diff --git a/src/mongo/db/storage/devnull/devnull_init.cpp b/src/mongo/db/storage/devnull/devnull_init.cpp
index ad22a5ca25e..1216d642e8e 100644
--- a/src/mongo/db/storage/devnull/devnull_init.cpp
+++ b/src/mongo/db/storage/devnull/devnull_init.cpp
@@ -38,38 +38,35 @@
namespace mongo {
- namespace {
- class DevNullStorageEngineFactory : public StorageEngine::Factory {
- public:
- virtual StorageEngine* create(const StorageGlobalParams& params,
- const StorageEngineLockFile& lockFile) const {
- KVStorageEngineOptions options;
- options.directoryPerDB = params.directoryperdb;
- options.forRepair = params.repair;
- return new KVStorageEngine( new DevNullKVEngine(), options );
- }
-
- virtual StringData getCanonicalName() const {
- return "devnull";
- }
-
- virtual Status validateMetadata(const StorageEngineMetadata& metadata,
- const StorageGlobalParams& params) const {
- return Status::OK();
- }
+namespace {
+class DevNullStorageEngineFactory : public StorageEngine::Factory {
+public:
+ virtual StorageEngine* create(const StorageGlobalParams& params,
+ const StorageEngineLockFile& lockFile) const {
+ KVStorageEngineOptions options;
+ options.directoryPerDB = params.directoryperdb;
+ options.forRepair = params.repair;
+ return new KVStorageEngine(new DevNullKVEngine(), options);
+ }
- virtual BSONObj createMetadataOptions(const StorageGlobalParams& params) const {
- return BSONObj();
- }
- };
- } // namespace
+ virtual StringData getCanonicalName() const {
+ return "devnull";
+ }
- MONGO_INITIALIZER_WITH_PREREQUISITES(DevNullEngineInit,
- ("SetGlobalEnvironment"))
- (InitializerContext* context ) {
- getGlobalServiceContext()->registerStorageEngine("devnull", new DevNullStorageEngineFactory() );
+ virtual Status validateMetadata(const StorageEngineMetadata& metadata,
+ const StorageGlobalParams& params) const {
return Status::OK();
}
-}
+ virtual BSONObj createMetadataOptions(const StorageGlobalParams& params) const {
+ return BSONObj();
+ }
+};
+} // namespace
+MONGO_INITIALIZER_WITH_PREREQUISITES(DevNullEngineInit, ("SetGlobalEnvironment"))
+(InitializerContext* context) {
+ getGlobalServiceContext()->registerStorageEngine("devnull", new DevNullStorageEngineFactory());
+ return Status::OK();
+}
+}
diff --git a/src/mongo/db/storage/devnull/devnull_kv_engine.cpp b/src/mongo/db/storage/devnull/devnull_kv_engine.cpp
index 1d1d039b7a6..25ebf6a5de6 100644
--- a/src/mongo/db/storage/devnull/devnull_kv_engine.cpp
+++ b/src/mongo/db/storage/devnull/devnull_kv_engine.cpp
@@ -38,192 +38,216 @@
namespace mongo {
- class EmptyRecordCursor final : public RecordCursor {
- public:
- boost::optional<Record> next() final { return {}; }
- boost::optional<Record> seekExact(const RecordId& id) final { return {}; }
- void savePositioned() final {}
- bool restore(OperationContext* txn) final { return true; }
- };
-
- class DevNullRecordStore : public RecordStore {
- public:
- DevNullRecordStore( StringData ns, const CollectionOptions& options )
- : RecordStore( ns ), _options( options ) {
- _numInserts = 0;
- _dummy = BSON( "_id" << 1 );
- }
-
- virtual const char* name() const { return "devnull"; }
-
- virtual void setCappedDeleteCallback(CappedDocumentDeleteCallback*){}
-
- virtual long long dataSize( OperationContext* txn ) const { return 0; }
-
- virtual long long numRecords( OperationContext* txn ) const { return 0; }
-
- virtual bool isCapped() const { return _options.capped; }
-
- virtual int64_t storageSize( OperationContext* txn,
- BSONObjBuilder* extraInfo = NULL,
- int infoLevel = 0 ) const {
- return 0;
- }
-
- virtual RecordData dataFor( OperationContext* txn, const RecordId& loc) const {
- return RecordData( _dummy.objdata(), _dummy.objsize() );
- }
-
- virtual bool findRecord( OperationContext* txn, const RecordId& loc, RecordData* rd ) const {
- return false;
- }
-
- virtual void deleteRecord( OperationContext* txn, const RecordId& dl ) {}
-
- virtual StatusWith<RecordId> insertRecord( OperationContext* txn,
- const char* data,
- int len,
- bool enforceQuota ) {
- _numInserts++;
- return StatusWith<RecordId>( RecordId( 6, 4 ) );
- }
-
- virtual StatusWith<RecordId> insertRecord( OperationContext* txn,
- const DocWriter* doc,
- bool enforceQuota ) {
- _numInserts++;
- return StatusWith<RecordId>( RecordId( 6, 4 ) );
- }
-
- virtual StatusWith<RecordId> updateRecord( OperationContext* txn,
- const RecordId& oldLocation,
- const char* data,
- int len,
- bool enforceQuota,
- UpdateNotifier* notifier ) {
- return StatusWith<RecordId>( oldLocation );
- }
-
- virtual bool updateWithDamagesSupported() const {
- return false;
- }
-
- virtual Status updateWithDamages( OperationContext* txn,
- const RecordId& loc,
- const RecordData& oldRec,
- const char* damageSource,
- const mutablebson::DamageVector& damages ) {
- invariant(false);
- }
-
-
- std::unique_ptr<RecordCursor> getCursor(OperationContext* txn, bool forward) const final {
- return stdx::make_unique<EmptyRecordCursor>();
- }
-
- virtual Status truncate( OperationContext* txn ) { return Status::OK(); }
-
- virtual void temp_cappedTruncateAfter(OperationContext* txn,
- RecordId end,
- bool inclusive) { }
-
- virtual Status validate( OperationContext* txn,
- bool full, bool scanData,
- ValidateAdaptor* adaptor,
- ValidateResults* results, BSONObjBuilder* output ) {
- return Status::OK();
- }
-
- virtual void appendCustomStats( OperationContext* txn,
- BSONObjBuilder* result,
- double scale ) const {
- result->appendNumber( "numInserts", _numInserts );
- }
-
- virtual Status touch( OperationContext* txn, BSONObjBuilder* output ) const {
- return Status::OK();
- }
-
- virtual void updateStatsAfterRepair(OperationContext* txn,
- long long numRecords,
- long long dataSize) {
- }
-
- private:
- CollectionOptions _options;
- long long _numInserts;
- BSONObj _dummy;
- };
-
- class DevNullSortedDataBuilderInterface : public SortedDataBuilderInterface {
- MONGO_DISALLOW_COPYING(DevNullSortedDataBuilderInterface);
-
- public:
- DevNullSortedDataBuilderInterface() { }
-
- virtual Status addKey(const BSONObj& key, const RecordId& loc) {
- return Status::OK();
- }
- };
-
- class DevNullSortedDataInterface : public SortedDataInterface {
- public:
- virtual ~DevNullSortedDataInterface() { }
-
- virtual SortedDataBuilderInterface* getBulkBuilder(OperationContext* txn,
- bool dupsAllowed) {
- return new DevNullSortedDataBuilderInterface();
- }
-
- virtual Status insert(OperationContext* txn,
- const BSONObj& key,
- const RecordId& loc,
- bool dupsAllowed) { return Status::OK(); }
-
- virtual void unindex(OperationContext* txn,
- const BSONObj& key,
- const RecordId& loc,
- bool dupsAllowed) { }
-
- virtual Status dupKeyCheck(OperationContext* txn,
- const BSONObj& key,
- const RecordId& loc) { return Status::OK(); }
-
- virtual void fullValidate(OperationContext* txn, bool full, long long* numKeysOut,
- BSONObjBuilder* output) const { }
-
- virtual bool appendCustomStats(OperationContext* txn, BSONObjBuilder* output, double scale)
- const {
- return false;
- }
-
- virtual long long getSpaceUsedBytes( OperationContext* txn ) const { return 0; }
-
- virtual bool isEmpty(OperationContext* txn) { return true; }
+class EmptyRecordCursor final : public RecordCursor {
+public:
+ boost::optional<Record> next() final {
+ return {};
+ }
+ boost::optional<Record> seekExact(const RecordId& id) final {
+ return {};
+ }
+ void savePositioned() final {}
+ bool restore(OperationContext* txn) final {
+ return true;
+ }
+};
+
+class DevNullRecordStore : public RecordStore {
+public:
+ DevNullRecordStore(StringData ns, const CollectionOptions& options)
+ : RecordStore(ns), _options(options) {
+ _numInserts = 0;
+ _dummy = BSON("_id" << 1);
+ }
- virtual std::unique_ptr<SortedDataInterface::Cursor> newCursor(OperationContext* txn,
- bool isForward) const {
- return {};
- }
+ virtual const char* name() const {
+ return "devnull";
+ }
- virtual Status initAsEmpty(OperationContext* txn) { return Status::OK(); }
- };
+ virtual void setCappedDeleteCallback(CappedDocumentDeleteCallback*) {}
+
+ virtual long long dataSize(OperationContext* txn) const {
+ return 0;
+ }
+
+ virtual long long numRecords(OperationContext* txn) const {
+ return 0;
+ }
+
+ virtual bool isCapped() const {
+ return _options.capped;
+ }
+
+ virtual int64_t storageSize(OperationContext* txn,
+ BSONObjBuilder* extraInfo = NULL,
+ int infoLevel = 0) const {
+ return 0;
+ }
+
+ virtual RecordData dataFor(OperationContext* txn, const RecordId& loc) const {
+ return RecordData(_dummy.objdata(), _dummy.objsize());
+ }
+
+ virtual bool findRecord(OperationContext* txn, const RecordId& loc, RecordData* rd) const {
+ return false;
+ }
+
+ virtual void deleteRecord(OperationContext* txn, const RecordId& dl) {}
+
+ virtual StatusWith<RecordId> insertRecord(OperationContext* txn,
+ const char* data,
+ int len,
+ bool enforceQuota) {
+ _numInserts++;
+ return StatusWith<RecordId>(RecordId(6, 4));
+ }
+
+ virtual StatusWith<RecordId> insertRecord(OperationContext* txn,
+ const DocWriter* doc,
+ bool enforceQuota) {
+ _numInserts++;
+ return StatusWith<RecordId>(RecordId(6, 4));
+ }
+
+ virtual StatusWith<RecordId> updateRecord(OperationContext* txn,
+ const RecordId& oldLocation,
+ const char* data,
+ int len,
+ bool enforceQuota,
+ UpdateNotifier* notifier) {
+ return StatusWith<RecordId>(oldLocation);
+ }
+
+ virtual bool updateWithDamagesSupported() const {
+ return false;
+ }
+
+ virtual Status updateWithDamages(OperationContext* txn,
+ const RecordId& loc,
+ const RecordData& oldRec,
+ const char* damageSource,
+ const mutablebson::DamageVector& damages) {
+ invariant(false);
+ }
+
+
+ std::unique_ptr<RecordCursor> getCursor(OperationContext* txn, bool forward) const final {
+ return stdx::make_unique<EmptyRecordCursor>();
+ }
+
+ virtual Status truncate(OperationContext* txn) {
+ return Status::OK();
+ }
+ virtual void temp_cappedTruncateAfter(OperationContext* txn, RecordId end, bool inclusive) {}
- RecordStore* DevNullKVEngine::getRecordStore( OperationContext* opCtx,
- StringData ns,
- StringData ident,
- const CollectionOptions& options ) {
- if ( ident == "_mdb_catalog" ) {
- return new InMemoryRecordStore( ns, &_catalogInfo );
- }
- return new DevNullRecordStore( ns, options );
+ virtual Status validate(OperationContext* txn,
+ bool full,
+ bool scanData,
+ ValidateAdaptor* adaptor,
+ ValidateResults* results,
+ BSONObjBuilder* output) {
+ return Status::OK();
}
- SortedDataInterface* DevNullKVEngine::getSortedDataInterface( OperationContext* opCtx,
- StringData ident,
- const IndexDescriptor* desc ) {
- return new DevNullSortedDataInterface();
+ virtual void appendCustomStats(OperationContext* txn,
+ BSONObjBuilder* result,
+ double scale) const {
+ result->appendNumber("numInserts", _numInserts);
}
+ virtual Status touch(OperationContext* txn, BSONObjBuilder* output) const {
+ return Status::OK();
+ }
+
+ virtual void updateStatsAfterRepair(OperationContext* txn,
+ long long numRecords,
+ long long dataSize) {}
+
+private:
+ CollectionOptions _options;
+ long long _numInserts;
+ BSONObj _dummy;
+};
+
+class DevNullSortedDataBuilderInterface : public SortedDataBuilderInterface {
+ MONGO_DISALLOW_COPYING(DevNullSortedDataBuilderInterface);
+
+public:
+ DevNullSortedDataBuilderInterface() {}
+
+ virtual Status addKey(const BSONObj& key, const RecordId& loc) {
+ return Status::OK();
+ }
+};
+
+class DevNullSortedDataInterface : public SortedDataInterface {
+public:
+ virtual ~DevNullSortedDataInterface() {}
+
+ virtual SortedDataBuilderInterface* getBulkBuilder(OperationContext* txn, bool dupsAllowed) {
+ return new DevNullSortedDataBuilderInterface();
+ }
+
+ virtual Status insert(OperationContext* txn,
+ const BSONObj& key,
+ const RecordId& loc,
+ bool dupsAllowed) {
+ return Status::OK();
+ }
+
+ virtual void unindex(OperationContext* txn,
+ const BSONObj& key,
+ const RecordId& loc,
+ bool dupsAllowed) {}
+
+ virtual Status dupKeyCheck(OperationContext* txn, const BSONObj& key, const RecordId& loc) {
+ return Status::OK();
+ }
+
+ virtual void fullValidate(OperationContext* txn,
+ bool full,
+ long long* numKeysOut,
+ BSONObjBuilder* output) const {}
+
+ virtual bool appendCustomStats(OperationContext* txn,
+ BSONObjBuilder* output,
+ double scale) const {
+ return false;
+ }
+
+ virtual long long getSpaceUsedBytes(OperationContext* txn) const {
+ return 0;
+ }
+
+ virtual bool isEmpty(OperationContext* txn) {
+ return true;
+ }
+
+ virtual std::unique_ptr<SortedDataInterface::Cursor> newCursor(OperationContext* txn,
+ bool isForward) const {
+ return {};
+ }
+
+ virtual Status initAsEmpty(OperationContext* txn) {
+ return Status::OK();
+ }
+};
+
+
+RecordStore* DevNullKVEngine::getRecordStore(OperationContext* opCtx,
+ StringData ns,
+ StringData ident,
+ const CollectionOptions& options) {
+ if (ident == "_mdb_catalog") {
+ return new InMemoryRecordStore(ns, &_catalogInfo);
+ }
+ return new DevNullRecordStore(ns, options);
+}
+
+SortedDataInterface* DevNullKVEngine::getSortedDataInterface(OperationContext* opCtx,
+ StringData ident,
+ const IndexDescriptor* desc) {
+ return new DevNullSortedDataInterface();
+}
}
diff --git a/src/mongo/db/storage/devnull/devnull_kv_engine.h b/src/mongo/db/storage/devnull/devnull_kv_engine.h
index b6d14c52399..a6d559ef35a 100644
--- a/src/mongo/db/storage/devnull/devnull_kv_engine.h
+++ b/src/mongo/db/storage/devnull/devnull_kv_engine.h
@@ -36,74 +36,71 @@
namespace mongo {
- class DevNullKVEngine : public KVEngine {
- public:
- virtual ~DevNullKVEngine(){}
-
- virtual RecoveryUnit* newRecoveryUnit() {
- return new RecoveryUnitNoop();
- }
-
- virtual Status createRecordStore( OperationContext* opCtx,
- StringData ns,
- StringData ident,
- const CollectionOptions& options ) {
- return Status::OK();
- }
-
- virtual RecordStore* getRecordStore( OperationContext* opCtx,
- StringData ns,
+class DevNullKVEngine : public KVEngine {
+public:
+ virtual ~DevNullKVEngine() {}
+
+ virtual RecoveryUnit* newRecoveryUnit() {
+ return new RecoveryUnitNoop();
+ }
+
+ virtual Status createRecordStore(OperationContext* opCtx,
+ StringData ns,
+ StringData ident,
+ const CollectionOptions& options) {
+ return Status::OK();
+ }
+
+ virtual RecordStore* getRecordStore(OperationContext* opCtx,
+ StringData ns,
+ StringData ident,
+ const CollectionOptions& options);
+
+ virtual Status createSortedDataInterface(OperationContext* opCtx,
StringData ident,
- const CollectionOptions& options );
-
- virtual Status createSortedDataInterface( OperationContext* opCtx,
- StringData ident,
- const IndexDescriptor* desc ) {
- return Status::OK();
- }
-
- virtual SortedDataInterface* getSortedDataInterface( OperationContext* opCtx,
- StringData ident,
- const IndexDescriptor* desc );
-
- virtual Status dropIdent( OperationContext* opCtx,
- StringData ident ) {
- return Status::OK();
- }
-
- virtual bool supportsDocLocking() const {
- return true;
- }
-
- virtual bool supportsDirectoryPerDB() const {
- return false;
- }
-
- virtual bool isDurable() const {
- return true;
- }
-
- virtual int64_t getIdentSize( OperationContext* opCtx,
- StringData ident ) {
- return 1;
- }
-
- virtual Status repairIdent( OperationContext* opCtx,
- StringData ident ) {
- return Status::OK();
- }
-
- virtual bool hasIdent(OperationContext* opCtx, StringData ident) const {
- return true;
- }
-
- std::vector<std::string> getAllIdents( OperationContext* opCtx ) const {
- return std::vector<std::string>();
- }
-
- virtual void cleanShutdown() {};
-
- private:
- std::shared_ptr<void> _catalogInfo;
- };
+ const IndexDescriptor* desc) {
+ return Status::OK();
+ }
+
+ virtual SortedDataInterface* getSortedDataInterface(OperationContext* opCtx,
+ StringData ident,
+ const IndexDescriptor* desc);
+
+ virtual Status dropIdent(OperationContext* opCtx, StringData ident) {
+ return Status::OK();
+ }
+
+ virtual bool supportsDocLocking() const {
+ return true;
+ }
+
+ virtual bool supportsDirectoryPerDB() const {
+ return false;
+ }
+
+ virtual bool isDurable() const {
+ return true;
+ }
+
+ virtual int64_t getIdentSize(OperationContext* opCtx, StringData ident) {
+ return 1;
+ }
+
+ virtual Status repairIdent(OperationContext* opCtx, StringData ident) {
+ return Status::OK();
+ }
+
+ virtual bool hasIdent(OperationContext* opCtx, StringData ident) const {
+ return true;
+ }
+
+ std::vector<std::string> getAllIdents(OperationContext* opCtx) const {
+ return std::vector<std::string>();
+ }
+
+ virtual void cleanShutdown(){};
+
+private:
+ std::shared_ptr<void> _catalogInfo;
+};
}
diff --git a/src/mongo/db/storage/in_memory/in_memory_btree_impl.cpp b/src/mongo/db/storage/in_memory/in_memory_btree_impl.cpp
index 40da9035fbd..f40dff8e7ff 100644
--- a/src/mongo/db/storage/in_memory/in_memory_btree_impl.cpp
+++ b/src/mongo/db/storage/in_memory/in_memory_btree_impl.cpp
@@ -42,448 +42,450 @@
namespace mongo {
- using std::shared_ptr;
- using std::string;
- using std::vector;
+using std::shared_ptr;
+using std::string;
+using std::vector;
namespace {
- const int TempKeyMaxSize = 1024; // this goes away with SERVER-3372
+const int TempKeyMaxSize = 1024; // this goes away with SERVER-3372
- bool hasFieldNames(const BSONObj& obj) {
- BSONForEach(e, obj) {
- if (e.fieldName()[0])
- return true;
- }
- return false;
+bool hasFieldNames(const BSONObj& obj) {
+ BSONForEach(e, obj) {
+ if (e.fieldName()[0])
+ return true;
}
+ return false;
+}
- BSONObj stripFieldNames(const BSONObj& query) {
- if (!hasFieldNames(query))
- return query;
+BSONObj stripFieldNames(const BSONObj& query) {
+ if (!hasFieldNames(query))
+ return query;
- BSONObjBuilder bb;
- BSONForEach(e, query) {
- bb.appendAs(e, StringData());
- }
- return bb.obj();
+ BSONObjBuilder bb;
+ BSONForEach(e, query) {
+ bb.appendAs(e, StringData());
}
+ return bb.obj();
+}
+
+typedef std::set<IndexKeyEntry, IndexEntryComparison> IndexSet;
+
+// taken from btree_logic.cpp
+Status dupKeyError(const BSONObj& key) {
+ StringBuilder sb;
+ sb << "E11000 duplicate key error ";
+ // sb << "index: " << _indexName << " "; // TODO
+ sb << "dup key: " << key;
+ return Status(ErrorCodes::DuplicateKey, sb.str());
+}
+
+bool isDup(const IndexSet& data, const BSONObj& key, RecordId loc) {
+ const IndexSet::const_iterator it = data.find(IndexKeyEntry(key, RecordId()));
+ if (it == data.end())
+ return false;
- typedef std::set<IndexKeyEntry, IndexEntryComparison> IndexSet;
-
- // taken from btree_logic.cpp
- Status dupKeyError(const BSONObj& key) {
- StringBuilder sb;
- sb << "E11000 duplicate key error ";
- // sb << "index: " << _indexName << " "; // TODO
- sb << "dup key: " << key;
- return Status(ErrorCodes::DuplicateKey, sb.str());
+ // Not a dup if the entry is for the same loc.
+ return it->loc != loc;
+}
+
+class InMemoryBtreeBuilderImpl : public SortedDataBuilderInterface {
+public:
+ InMemoryBtreeBuilderImpl(IndexSet* data, long long* currentKeySize, bool dupsAllowed)
+ : _data(data),
+ _currentKeySize(currentKeySize),
+ _dupsAllowed(dupsAllowed),
+ _comparator(_data->key_comp()) {
+ invariant(_data->empty());
}
- bool isDup(const IndexSet& data, const BSONObj& key, RecordId loc) {
- const IndexSet::const_iterator it = data.find(IndexKeyEntry(key, RecordId()));
- if (it == data.end())
- return false;
+ Status addKey(const BSONObj& key, const RecordId& loc) {
+ // inserts should be in ascending (key, RecordId) order.
- // Not a dup if the entry is for the same loc.
- return it->loc != loc;
- }
-
- class InMemoryBtreeBuilderImpl : public SortedDataBuilderInterface {
- public:
- InMemoryBtreeBuilderImpl(IndexSet* data, long long* currentKeySize, bool dupsAllowed)
- : _data(data),
- _currentKeySize( currentKeySize ),
- _dupsAllowed(dupsAllowed),
- _comparator(_data->key_comp()) {
- invariant(_data->empty());
+ if (key.objsize() >= TempKeyMaxSize) {
+ return Status(ErrorCodes::KeyTooLong, "key too big");
}
- Status addKey(const BSONObj& key, const RecordId& loc) {
- // inserts should be in ascending (key, RecordId) order.
+ invariant(loc.isNormal());
+ invariant(!hasFieldNames(key));
- if ( key.objsize() >= TempKeyMaxSize ) {
- return Status(ErrorCodes::KeyTooLong, "key too big");
+ if (!_data->empty()) {
+ // Compare specified key with last inserted key, ignoring its RecordId
+ int cmp = _comparator.compare(IndexKeyEntry(key, RecordId()), *_last);
+ if (cmp < 0 || (_dupsAllowed && cmp == 0 && loc < _last->loc)) {
+ return Status(ErrorCodes::InternalError,
+ "expected ascending (key, RecordId) order in bulk builder");
+ } else if (!_dupsAllowed && cmp == 0 && loc != _last->loc) {
+ return dupKeyError(key);
}
+ }
- invariant(loc.isNormal());
- invariant(!hasFieldNames(key));
+ BSONObj owned = key.getOwned();
+ _last = _data->insert(_data->end(), IndexKeyEntry(owned, loc));
+ *_currentKeySize += key.objsize();
- if (!_data->empty()) {
- // Compare specified key with last inserted key, ignoring its RecordId
- int cmp = _comparator.compare(IndexKeyEntry(key, RecordId()), *_last);
- if (cmp < 0 || (_dupsAllowed && cmp == 0 && loc < _last->loc)) {
- return Status(ErrorCodes::InternalError,
- "expected ascending (key, RecordId) order in bulk builder");
- }
- else if (!_dupsAllowed && cmp == 0 && loc != _last->loc) {
- return dupKeyError(key);
- }
- }
+ return Status::OK();
+ }
- BSONObj owned = key.getOwned();
- _last = _data->insert(_data->end(), IndexKeyEntry(owned, loc));
- *_currentKeySize += key.objsize();
+private:
+ IndexSet* const _data;
+ long long* _currentKeySize;
+ const bool _dupsAllowed;
- return Status::OK();
- }
+ IndexEntryComparison _comparator; // used by the bulk builder to detect duplicate keys
+ IndexSet::const_iterator _last; // or (key, RecordId) ordering violations
+};
- private:
- IndexSet* const _data;
- long long* _currentKeySize;
- const bool _dupsAllowed;
+class InMemoryBtreeImpl : public SortedDataInterface {
+public:
+ InMemoryBtreeImpl(IndexSet* data) : _data(data) {
+ _currentKeySize = 0;
+ }
- IndexEntryComparison _comparator; // used by the bulk builder to detect duplicate keys
- IndexSet::const_iterator _last; // or (key, RecordId) ordering violations
- };
+ virtual SortedDataBuilderInterface* getBulkBuilder(OperationContext* txn, bool dupsAllowed) {
+ return new InMemoryBtreeBuilderImpl(_data, &_currentKeySize, dupsAllowed);
+ }
- class InMemoryBtreeImpl : public SortedDataInterface {
- public:
- InMemoryBtreeImpl(IndexSet* data)
- : _data(data) {
- _currentKeySize = 0;
+ virtual Status insert(OperationContext* txn,
+ const BSONObj& key,
+ const RecordId& loc,
+ bool dupsAllowed) {
+ invariant(loc.isNormal());
+ invariant(!hasFieldNames(key));
+
+ if (key.objsize() >= TempKeyMaxSize) {
+ string msg = mongoutils::str::stream()
+ << "InMemoryBtree::insert: key too large to index, failing " << ' ' << key.objsize()
+ << ' ' << key;
+ return Status(ErrorCodes::KeyTooLong, msg);
+ }
+
+ // TODO optimization: save the iterator from the dup-check to speed up insert
+ if (!dupsAllowed && isDup(*_data, key, loc))
+ return dupKeyError(key);
+
+ IndexKeyEntry entry(key.getOwned(), loc);
+ if (_data->insert(entry).second) {
+ _currentKeySize += key.objsize();
+ txn->recoveryUnit()->registerChange(new IndexChange(_data, entry, true));
}
+ return Status::OK();
+ }
- virtual SortedDataBuilderInterface* getBulkBuilder(OperationContext* txn,
- bool dupsAllowed) {
- return new InMemoryBtreeBuilderImpl(_data, &_currentKeySize, dupsAllowed);
+ virtual void unindex(OperationContext* txn,
+ const BSONObj& key,
+ const RecordId& loc,
+ bool dupsAllowed) {
+ invariant(loc.isNormal());
+ invariant(!hasFieldNames(key));
+
+ IndexKeyEntry entry(key.getOwned(), loc);
+ const size_t numDeleted = _data->erase(entry);
+ invariant(numDeleted <= 1);
+ if (numDeleted == 1) {
+ _currentKeySize -= key.objsize();
+ txn->recoveryUnit()->registerChange(new IndexChange(_data, entry, false));
}
+ }
- virtual Status insert(OperationContext* txn,
- const BSONObj& key,
- const RecordId& loc,
- bool dupsAllowed) {
+ virtual void fullValidate(OperationContext* txn,
+ bool full,
+ long long* numKeysOut,
+ BSONObjBuilder* output) const {
+ // TODO check invariants?
+ *numKeysOut = _data->size();
+ }
- invariant(loc.isNormal());
- invariant(!hasFieldNames(key));
+ virtual bool appendCustomStats(OperationContext* txn,
+ BSONObjBuilder* output,
+ double scale) const {
+ return false;
+ }
- if ( key.objsize() >= TempKeyMaxSize ) {
- string msg = mongoutils::str::stream()
- << "InMemoryBtree::insert: key too large to index, failing "
- << ' ' << key.objsize() << ' ' << key;
- return Status(ErrorCodes::KeyTooLong, msg);
- }
+ virtual long long getSpaceUsedBytes(OperationContext* txn) const {
+ return _currentKeySize + (sizeof(IndexKeyEntry) * _data->size());
+ }
- // TODO optimization: save the iterator from the dup-check to speed up insert
- if (!dupsAllowed && isDup(*_data, key, loc))
- return dupKeyError(key);
+ virtual Status dupKeyCheck(OperationContext* txn, const BSONObj& key, const RecordId& loc) {
+ invariant(!hasFieldNames(key));
+ if (isDup(*_data, key, loc))
+ return dupKeyError(key);
+ return Status::OK();
+ }
- IndexKeyEntry entry(key.getOwned(), loc);
- if ( _data->insert(entry).second ) {
- _currentKeySize += key.objsize();
- txn->recoveryUnit()->registerChange(new IndexChange(_data, entry, true));
- }
- return Status::OK();
- }
+ virtual bool isEmpty(OperationContext* txn) {
+ return _data->empty();
+ }
- virtual void unindex(OperationContext* txn,
- const BSONObj& key,
- const RecordId& loc,
- bool dupsAllowed) {
- invariant(loc.isNormal());
- invariant(!hasFieldNames(key));
-
- IndexKeyEntry entry(key.getOwned(), loc);
- const size_t numDeleted = _data->erase(entry);
- invariant(numDeleted <= 1);
- if ( numDeleted == 1 ) {
- _currentKeySize -= key.objsize();
- txn->recoveryUnit()->registerChange(new IndexChange(_data, entry, false));
- }
- }
+ virtual Status touch(OperationContext* txn) const {
+ // already in memory...
+ return Status::OK();
+ }
- virtual void fullValidate(OperationContext* txn, bool full, long long *numKeysOut,
- BSONObjBuilder* output) const {
- // TODO check invariants?
- *numKeysOut = _data->size();
- }
+ class Cursor final : public SortedDataInterface::Cursor {
+ public:
+ Cursor(OperationContext* txn, const IndexSet& data, bool isForward)
+ : _txn(txn), _data(data), _forward(isForward), _it(data.end()) {}
- virtual bool appendCustomStats(OperationContext* txn, BSONObjBuilder* output, double scale)
- const {
- return false;
- }
+ boost::optional<IndexKeyEntry> next(RequestedInfo parts) override {
+ if (_lastMoveWasRestore) {
+ // Return current position rather than advancing.
+ _lastMoveWasRestore = false;
+ } else {
+ advance();
+ if (atEndPoint())
+ _isEOF = true;
+ }
- virtual long long getSpaceUsedBytes( OperationContext* txn ) const {
- return _currentKeySize + ( sizeof(IndexKeyEntry) * _data->size() );
+ if (_isEOF)
+ return {};
+ return *_it;
}
- virtual Status dupKeyCheck(OperationContext* txn, const BSONObj& key, const RecordId& loc) {
- invariant(!hasFieldNames(key));
- if (isDup(*_data, key, loc))
- return dupKeyError(key);
- return Status::OK();
+ void setEndPosition(const BSONObj& key, bool inclusive) override {
+ if (key.isEmpty()) {
+ // This means scan to end of index.
+ _endState = {};
+ return;
+ }
+
+ // NOTE: this uses the opposite min/max rules as a normal seek because a forward
+ // scan should land after the key if inclusive and before if exclusive.
+ _endState = EndState(stripFieldNames(key),
+ _forward == inclusive ? RecordId::max() : RecordId::min());
+ seekEndCursor();
}
- virtual bool isEmpty(OperationContext* txn) {
- return _data->empty();
+ boost::optional<IndexKeyEntry> seek(const BSONObj& key,
+ bool inclusive,
+ RequestedInfo parts) override {
+ const BSONObj query = stripFieldNames(key);
+ locate(query, _forward == inclusive ? RecordId::min() : RecordId::max());
+ _lastMoveWasRestore = false;
+ if (_isEOF)
+ return {};
+ dassert(inclusive ? compareKeys(_it->key, query) >= 0
+ : compareKeys(_it->key, query) > 0);
+ return *_it;
}
- virtual Status touch(OperationContext* txn) const{
- // already in memory...
- return Status::OK();
+ boost::optional<IndexKeyEntry> seek(const IndexSeekPoint& seekPoint,
+ RequestedInfo parts) override {
+ // Query encodes exclusive case so it can be treated as an inclusive query.
+ const BSONObj query = IndexEntryComparison::makeQueryObject(seekPoint, _forward);
+ locate(query, _forward ? RecordId::min() : RecordId::max());
+ _lastMoveWasRestore = false;
+ if (_isEOF)
+ return {};
+ dassert(compareKeys(_it->key, query) >= 0);
+ return *_it;
}
- class Cursor final : public SortedDataInterface::Cursor {
- public:
- Cursor(OperationContext* txn, const IndexSet& data, bool isForward)
- : _txn(txn),
- _data(data),
- _forward(isForward),
- _it(data.end())
- {}
-
- boost::optional<IndexKeyEntry> next(RequestedInfo parts) override {
- if (_lastMoveWasRestore) {
- // Return current position rather than advancing.
- _lastMoveWasRestore = false;
- }
- else {
- advance();
- if (atEndPoint()) _isEOF = true;
- }
+ void savePositioned() override {
+ // Keep original position if we haven't moved since the last restore.
+ _txn = nullptr;
+ if (_lastMoveWasRestore)
+ return;
- if (_isEOF) return {};
- return *_it;
- }
-
- void setEndPosition(const BSONObj& key, bool inclusive) override {
- if (key.isEmpty()) {
- // This means scan to end of index.
- _endState = {};
- return;
- }
-
- // NOTE: this uses the opposite min/max rules as a normal seek because a forward
- // scan should land after the key if inclusive and before if exclusive.
- _endState = EndState(stripFieldNames(key),
- _forward == inclusive ? RecordId::max() : RecordId::min());
- seekEndCursor();
+ if (_isEOF) {
+ saveUnpositioned();
+ return;
}
- boost::optional<IndexKeyEntry> seek(const BSONObj& key, bool inclusive,
- RequestedInfo parts) override {
- const BSONObj query = stripFieldNames(key);
- locate(query, _forward == inclusive ? RecordId::min() : RecordId::max());
- _lastMoveWasRestore = false;
- if (_isEOF) return {};
- dassert(inclusive ? compareKeys(_it->key, query) >= 0
- : compareKeys(_it->key, query) > 0);
- return *_it;
- }
+ _savedAtEnd = false;
+ _savedKey = _it->key.getOwned();
+ _savedLoc = _it->loc;
+ // Doing nothing with end cursor since it will do full reseek on restore.
+ }
- boost::optional<IndexKeyEntry> seek(const IndexSeekPoint& seekPoint,
- RequestedInfo parts) override {
- // Query encodes exclusive case so it can be treated as an inclusive query.
- const BSONObj query = IndexEntryComparison::makeQueryObject(seekPoint, _forward);
- locate(query, _forward ? RecordId::min() : RecordId::max());
- _lastMoveWasRestore = false;
- if (_isEOF) return {};
- dassert(compareKeys(_it->key, query) >= 0);
- return *_it;
- }
+ void saveUnpositioned() override {
+ _txn = nullptr;
+ _savedAtEnd = true;
+ // Doing nothing with end cursor since it will do full reseek on restore.
+ }
- void savePositioned() override {
- // Keep original position if we haven't moved since the last restore.
- _txn = nullptr;
- if (_lastMoveWasRestore) return;
+ void restore(OperationContext* txn) override {
+ _txn = txn;
- if (_isEOF) {
- saveUnpositioned();
- return;
- }
+ // Always do a full seek on restore. We cannot use our last position since index
+ // entries may have been inserted closer to our endpoint and we would need to move
+ // over them.
+ seekEndCursor();
- _savedAtEnd = false;
- _savedKey = _it->key.getOwned();
- _savedLoc = _it->loc;
- // Doing nothing with end cursor since it will do full reseek on restore.
+ if (_savedAtEnd) {
+ _isEOF = true;
+ return;
}
- void saveUnpositioned() override {
- _txn = nullptr;
- _savedAtEnd = true;
- // Doing nothing with end cursor since it will do full reseek on restore.
- }
+ // Need to find our position from the root.
+ locate(_savedKey, _savedLoc);
- void restore(OperationContext* txn) override {
- _txn = txn;
+ _lastMoveWasRestore = _isEOF // We weren't EOF but now are.
+ || _data.value_comp().compare(*_it, {_savedKey, _savedLoc}) != 0;
+ }
- // Always do a full seek on restore. We cannot use our last position since index
- // entries may have been inserted closer to our endpoint and we would need to move
- // over them.
- seekEndCursor();
+ private:
+ bool atEndPoint() const {
+ return _endState && _it == _endState->it;
+ }
- if (_savedAtEnd) {
+ // Advances once in the direction of the scan, updating _isEOF as needed.
+ // Does nothing if already _isEOF.
+ void advance() {
+ if (_isEOF)
+ return;
+ if (_forward) {
+ if (_it != _data.end())
+ ++_it;
+ if (_it == _data.end() || atEndPoint())
_isEOF = true;
- return;
+ } else {
+ if (_it == _data.begin() || _data.empty()) {
+ _isEOF = true;
+ } else {
+ --_it;
}
-
- // Need to find our position from the root.
- locate(_savedKey, _savedLoc);
-
- _lastMoveWasRestore = _isEOF // We weren't EOF but now are.
- || _data.value_comp().compare(*_it, {_savedKey, _savedLoc}) != 0;
+ if (atEndPoint())
+ _isEOF = true;
}
+ }
- private:
- bool atEndPoint() const {
- return _endState && _it == _endState->it;
+ bool atOrPastEndPointAfterSeeking() const {
+ if (_isEOF)
+ return true;
+ if (!_endState)
+ return false;
+
+ const int cmp = _data.value_comp().compare(*_it, _endState->query);
+
+ // We set up _endState->query to be in between the last in-range value and the first
+ // out-of-range value. In particular, it is constructed to never equal any legal
+ // index key.
+ dassert(cmp != 0);
+
+ if (_forward) {
+ // We may have landed after the end point.
+ return cmp > 0;
+ } else {
+ // We may have landed before the end point.
+ return cmp < 0;
}
+ }
- // Advances once in the direction of the scan, updating _isEOF as needed.
- // Does nothing if already _isEOF.
- void advance() {
- if (_isEOF) return;
- if (_forward) {
- if (_it != _data.end()) ++_it;
- if (_it == _data.end() || atEndPoint()) _isEOF = true;
- }
- else {
- if (_it == _data.begin() || _data.empty()) {
- _isEOF = true;
- }
- else {
- --_it;
- }
- if (atEndPoint()) _isEOF = true;
- }
+ void locate(const BSONObj& key, const RecordId& loc) {
+ _isEOF = false;
+ const auto query = IndexKeyEntry(key, loc);
+ _it = _data.lower_bound(query);
+ if (_forward) {
+ if (_it == _data.end())
+ _isEOF = true;
+ } else {
+ // lower_bound lands us on or after query. Reverse cursors must be on or before.
+ if (_it == _data.end() || _data.value_comp().compare(*_it, query) > 0)
+ advance(); // sets _isEOF if there is nothing more to return.
}
- bool atOrPastEndPointAfterSeeking() const {
- if (_isEOF) return true;
- if (!_endState) return false;
-
- const int cmp = _data.value_comp().compare(*_it, _endState->query);
+ if (atOrPastEndPointAfterSeeking())
+ _isEOF = true;
+ }
- // We set up _endState->query to be in between the last in-range value and the first
- // out-of-range value. In particular, it is constructed to never equal any legal
- // index key.
- dassert(cmp != 0);
+ // Returns comparison relative to direction of scan. If rhs would be seen later, returns
+ // a positive value.
+ int compareKeys(const BSONObj& lhs, const BSONObj& rhs) const {
+ int cmp = _data.value_comp().compare({lhs, RecordId()}, {rhs, RecordId()});
+ return _forward ? cmp : -cmp;
+ }
- if (_forward) {
- // We may have landed after the end point.
- return cmp > 0;
- }
- else {
- // We may have landed before the end point.
- return cmp < 0;
+ void seekEndCursor() {
+ if (!_endState || _data.empty())
+ return;
+
+ auto it = _data.lower_bound(_endState->query);
+ if (!_forward) {
+ // lower_bound lands us on or after query. Reverse cursors must be on or before.
+ if (it == _data.end() || _data.value_comp().compare(*it, _endState->query) > 0) {
+ if (it == _data.begin()) {
+ it = _data.end(); // all existing data in range.
+ } else {
+ --it;
+ }
}
}
- void locate(const BSONObj& key, const RecordId& loc) {
- _isEOF = false;
- const auto query = IndexKeyEntry(key, loc);
- _it = _data.lower_bound(query);
- if (_forward) {
- if (_it == _data.end()) _isEOF = true;
- }
- else {
- // lower_bound lands us on or after query. Reverse cursors must be on or before.
- if (_it == _data.end() || _data.value_comp().compare(*_it, query) > 0)
- advance(); // sets _isEOF if there is nothing more to return.
- }
+ if (it != _data.end())
+ dassert(compareKeys(it->key, _endState->query.key) >= 0);
+ _endState->it = it;
+ }
- if (atOrPastEndPointAfterSeeking()) _isEOF = true;
- }
+ OperationContext* _txn; // not owned
+ const IndexSet& _data;
+ const bool _forward;
+ bool _isEOF = true;
+ IndexSet::const_iterator _it;
- // Returns comparison relative to direction of scan. If rhs would be seen later, returns
- // a positive value.
- int compareKeys(const BSONObj& lhs, const BSONObj& rhs) const {
- int cmp = _data.value_comp().compare({lhs, RecordId()}, {rhs, RecordId()});
- return _forward ? cmp : -cmp;
- }
+ struct EndState {
+ EndState(BSONObj key, RecordId loc) : query(std::move(key), loc) {}
- void seekEndCursor() {
- if (!_endState || _data.empty()) return;
-
- auto it = _data.lower_bound(_endState->query);
- if (!_forward) {
- // lower_bound lands us on or after query. Reverse cursors must be on or before.
- if (it == _data.end() || _data.value_comp().compare(*it,
- _endState->query) > 0) {
- if (it == _data.begin()) {
- it = _data.end(); // all existing data in range.
- }
- else {
- --it;
- }
- }
- }
+ IndexKeyEntry query;
+ IndexSet::const_iterator it;
+ };
+ boost::optional<EndState> _endState;
- if (it != _data.end()) dassert(compareKeys(it->key, _endState->query.key) >= 0);
- _endState->it = it;
- }
+ // Used by next to decide to return current position rather than moving. Should be reset
+ // to false by any operation that moves the cursor, other than subsequent save/restore
+ // pairs.
+ bool _lastMoveWasRestore = false;
- OperationContext* _txn; // not owned
- const IndexSet& _data;
- const bool _forward;
- bool _isEOF = true;
- IndexSet::const_iterator _it;
-
- struct EndState {
- EndState(BSONObj key, RecordId loc) : query(std::move(key), loc) {}
-
- IndexKeyEntry query;
- IndexSet::const_iterator it;
- };
- boost::optional<EndState> _endState;
-
- // Used by next to decide to return current position rather than moving. Should be reset
- // to false by any operation that moves the cursor, other than subsequent save/restore
- // pairs.
- bool _lastMoveWasRestore = false;
-
- // For save/restore since _it may be invalidated during a yield.
- bool _savedAtEnd = false;
- BSONObj _savedKey;
- RecordId _savedLoc;
- };
+ // For save/restore since _it may be invalidated during a yield.
+ bool _savedAtEnd = false;
+ BSONObj _savedKey;
+ RecordId _savedLoc;
+ };
- virtual std::unique_ptr<SortedDataInterface::Cursor> newCursor(
- OperationContext* txn,
- bool isForward) const {
- return stdx::make_unique<Cursor>(txn, *_data, isForward);
- }
+ virtual std::unique_ptr<SortedDataInterface::Cursor> newCursor(OperationContext* txn,
+ bool isForward) const {
+ return stdx::make_unique<Cursor>(txn, *_data, isForward);
+ }
- virtual Status initAsEmpty(OperationContext* txn) {
- // No-op
- return Status::OK();
+ virtual Status initAsEmpty(OperationContext* txn) {
+ // No-op
+ return Status::OK();
+ }
+
+private:
+ class IndexChange : public RecoveryUnit::Change {
+ public:
+ IndexChange(IndexSet* data, const IndexKeyEntry& entry, bool insert)
+ : _data(data), _entry(entry), _insert(insert) {}
+
+ virtual void commit() {}
+ virtual void rollback() {
+ if (_insert)
+ _data->erase(_entry);
+ else
+ _data->insert(_entry);
}
private:
- class IndexChange : public RecoveryUnit::Change {
- public:
- IndexChange(IndexSet* data, const IndexKeyEntry& entry, bool insert)
- : _data(data), _entry(entry), _insert(insert)
- {}
-
- virtual void commit() {}
- virtual void rollback() {
- if (_insert)
- _data->erase(_entry);
- else
- _data->insert(_entry);
- }
-
- private:
- IndexSet* _data;
- const IndexKeyEntry _entry;
- const bool _insert;
- };
-
IndexSet* _data;
- long long _currentKeySize;
+ const IndexKeyEntry _entry;
+ const bool _insert;
};
-} // namespace
-
- // IndexCatalogEntry argument taken by non-const pointer for consistency with other Btree
- // factories. We don't actually modify it.
- SortedDataInterface* getInMemoryBtreeImpl(const Ordering& ordering,
- std::shared_ptr<void>* dataInOut) {
- invariant(dataInOut);
- if (!*dataInOut) {
- *dataInOut = std::make_shared<IndexSet>(IndexEntryComparison(ordering));
- }
- return new InMemoryBtreeImpl(static_cast<IndexSet*>(dataInOut->get()));
+
+ IndexSet* _data;
+ long long _currentKeySize;
+};
+} // namespace
+
+// IndexCatalogEntry argument taken by non-const pointer for consistency with other Btree
+// factories. We don't actually modify it.
+SortedDataInterface* getInMemoryBtreeImpl(const Ordering& ordering,
+ std::shared_ptr<void>* dataInOut) {
+ invariant(dataInOut);
+ if (!*dataInOut) {
+ *dataInOut = std::make_shared<IndexSet>(IndexEntryComparison(ordering));
}
+ return new InMemoryBtreeImpl(static_cast<IndexSet*>(dataInOut->get()));
+}
} // namespace mongo
diff --git a/src/mongo/db/storage/in_memory/in_memory_btree_impl.h b/src/mongo/db/storage/in_memory/in_memory_btree_impl.h
index ee318312c78..ed330d40d10 100644
--- a/src/mongo/db/storage/in_memory/in_memory_btree_impl.h
+++ b/src/mongo/db/storage/in_memory/in_memory_btree_impl.h
@@ -35,13 +35,13 @@
namespace mongo {
- class IndexCatalogEntry;
-
- /**
- * Caller takes ownership.
- * All permanent data will be stored and fetch from dataInOut.
- */
- SortedDataInterface* getInMemoryBtreeImpl(const Ordering& ordering,
- std::shared_ptr<void>* dataInOut);
+class IndexCatalogEntry;
+
+/**
+ * Caller takes ownership.
+ * All permanent data will be stored and fetch from dataInOut.
+ */
+SortedDataInterface* getInMemoryBtreeImpl(const Ordering& ordering,
+ std::shared_ptr<void>* dataInOut);
} // namespace mongo
diff --git a/src/mongo/db/storage/in_memory/in_memory_btree_impl_test.cpp b/src/mongo/db/storage/in_memory/in_memory_btree_impl_test.cpp
index 867a093b3e0..719e187d548 100644
--- a/src/mongo/db/storage/in_memory/in_memory_btree_impl_test.cpp
+++ b/src/mongo/db/storage/in_memory/in_memory_btree_impl_test.cpp
@@ -38,27 +38,24 @@
namespace mongo {
- class InMemoryHarnessHelper final : public HarnessHelper {
- public:
- InMemoryHarnessHelper()
- : _order( Ordering::make( BSONObj() ) ) {
- }
+class InMemoryHarnessHelper final : public HarnessHelper {
+public:
+ InMemoryHarnessHelper() : _order(Ordering::make(BSONObj())) {}
- std::unique_ptr<SortedDataInterface> newSortedDataInterface( bool unique ) final {
- return std::unique_ptr<SortedDataInterface>(getInMemoryBtreeImpl(_order, &_data));
- }
-
- std::unique_ptr<RecoveryUnit> newRecoveryUnit() final {
- return stdx::make_unique<InMemoryRecoveryUnit>();
- }
-
- private:
- std::shared_ptr<void> _data; // used by InMemoryBtreeImpl
- Ordering _order;
- };
+ std::unique_ptr<SortedDataInterface> newSortedDataInterface(bool unique) final {
+ return std::unique_ptr<SortedDataInterface>(getInMemoryBtreeImpl(_order, &_data));
+ }
- std::unique_ptr<HarnessHelper> newHarnessHelper() {
- return stdx::make_unique<InMemoryHarnessHelper>();
+ std::unique_ptr<RecoveryUnit> newRecoveryUnit() final {
+ return stdx::make_unique<InMemoryRecoveryUnit>();
}
+private:
+ std::shared_ptr<void> _data; // used by InMemoryBtreeImpl
+ Ordering _order;
+};
+
+std::unique_ptr<HarnessHelper> newHarnessHelper() {
+ return stdx::make_unique<InMemoryHarnessHelper>();
+}
}
diff --git a/src/mongo/db/storage/in_memory/in_memory_engine.cpp b/src/mongo/db/storage/in_memory/in_memory_engine.cpp
index 395d002b26a..46a12c28b4c 100644
--- a/src/mongo/db/storage/in_memory/in_memory_engine.cpp
+++ b/src/mongo/db/storage/in_memory/in_memory_engine.cpp
@@ -37,70 +37,66 @@
namespace mongo {
- RecoveryUnit* InMemoryEngine::newRecoveryUnit() {
- return new InMemoryRecoveryUnit();
- }
+RecoveryUnit* InMemoryEngine::newRecoveryUnit() {
+ return new InMemoryRecoveryUnit();
+}
- Status InMemoryEngine::createRecordStore(OperationContext* opCtx,
- StringData ns,
- StringData ident,
- const CollectionOptions& options) {
- // All work done in getRecordStore
- return Status::OK();
- }
+Status InMemoryEngine::createRecordStore(OperationContext* opCtx,
+ StringData ns,
+ StringData ident,
+ const CollectionOptions& options) {
+ // All work done in getRecordStore
+ return Status::OK();
+}
- RecordStore* InMemoryEngine::getRecordStore(OperationContext* opCtx,
- StringData ns,
- StringData ident,
- const CollectionOptions& options) {
- stdx::lock_guard<stdx::mutex> lk(_mutex);
- if (options.capped) {
- return new InMemoryRecordStore(ns,
- &_dataMap[ident],
- true,
- options.cappedSize ? options.cappedSize : 4096,
- options.cappedMaxDocs ? options.cappedMaxDocs : -1);
- }
- else {
- return new InMemoryRecordStore(ns, &_dataMap[ident]);
- }
+RecordStore* InMemoryEngine::getRecordStore(OperationContext* opCtx,
+ StringData ns,
+ StringData ident,
+ const CollectionOptions& options) {
+ stdx::lock_guard<stdx::mutex> lk(_mutex);
+ if (options.capped) {
+ return new InMemoryRecordStore(ns,
+ &_dataMap[ident],
+ true,
+ options.cappedSize ? options.cappedSize : 4096,
+ options.cappedMaxDocs ? options.cappedMaxDocs : -1);
+ } else {
+ return new InMemoryRecordStore(ns, &_dataMap[ident]);
}
+}
- Status InMemoryEngine::createSortedDataInterface(OperationContext* opCtx,
- StringData ident,
- const IndexDescriptor* desc) {
-
- // All work done in getSortedDataInterface
- return Status::OK();
- }
+Status InMemoryEngine::createSortedDataInterface(OperationContext* opCtx,
+ StringData ident,
+ const IndexDescriptor* desc) {
+ // All work done in getSortedDataInterface
+ return Status::OK();
+}
- SortedDataInterface* InMemoryEngine::getSortedDataInterface(OperationContext* opCtx,
- StringData ident,
- const IndexDescriptor* desc) {
- stdx::lock_guard<stdx::mutex> lk(_mutex);
- return getInMemoryBtreeImpl(Ordering::make(desc->keyPattern()), &_dataMap[ident]);
- }
+SortedDataInterface* InMemoryEngine::getSortedDataInterface(OperationContext* opCtx,
+ StringData ident,
+ const IndexDescriptor* desc) {
+ stdx::lock_guard<stdx::mutex> lk(_mutex);
+ return getInMemoryBtreeImpl(Ordering::make(desc->keyPattern()), &_dataMap[ident]);
+}
- Status InMemoryEngine::dropIdent(OperationContext* opCtx,
- StringData ident) {
- stdx::lock_guard<stdx::mutex> lk(_mutex);
- _dataMap.erase(ident);
- return Status::OK();
- }
+Status InMemoryEngine::dropIdent(OperationContext* opCtx, StringData ident) {
+ stdx::lock_guard<stdx::mutex> lk(_mutex);
+ _dataMap.erase(ident);
+ return Status::OK();
+}
- int64_t InMemoryEngine::getIdentSize( OperationContext* opCtx,
- StringData ident ) {
- return 1;
- }
+int64_t InMemoryEngine::getIdentSize(OperationContext* opCtx, StringData ident) {
+ return 1;
+}
- std::vector<std::string> InMemoryEngine::getAllIdents( OperationContext* opCtx ) const {
- std::vector<std::string> all;
- {
- stdx::lock_guard<stdx::mutex> lk(_mutex);
- for ( DataMap::const_iterator it = _dataMap.begin(); it != _dataMap.end(); ++it ) {
- all.push_back( it->first );
- }
+std::vector<std::string> InMemoryEngine::getAllIdents(OperationContext* opCtx) const {
+ std::vector<std::string> all;
+ {
+ stdx::lock_guard<stdx::mutex> lk(_mutex);
+ for (DataMap::const_iterator it = _dataMap.begin(); it != _dataMap.end(); ++it) {
+ all.push_back(it->first);
}
- return all;
}
+ return all;
+}
}
diff --git a/src/mongo/db/storage/in_memory/in_memory_engine.h b/src/mongo/db/storage/in_memory/in_memory_engine.h
index c7e527ec2f7..55f9c463055 100644
--- a/src/mongo/db/storage/in_memory/in_memory_engine.h
+++ b/src/mongo/db/storage/in_memory/in_memory_engine.h
@@ -36,60 +36,64 @@
namespace mongo {
- class InMemoryEngine : public KVEngine {
- public:
- virtual RecoveryUnit* newRecoveryUnit();
+class InMemoryEngine : public KVEngine {
+public:
+ virtual RecoveryUnit* newRecoveryUnit();
- virtual Status createRecordStore( OperationContext* opCtx,
- StringData ns,
- StringData ident,
- const CollectionOptions& options );
+ virtual Status createRecordStore(OperationContext* opCtx,
+ StringData ns,
+ StringData ident,
+ const CollectionOptions& options);
- virtual RecordStore* getRecordStore( OperationContext* opCtx,
- StringData ns,
- StringData ident,
- const CollectionOptions& options );
+ virtual RecordStore* getRecordStore(OperationContext* opCtx,
+ StringData ns,
+ StringData ident,
+ const CollectionOptions& options);
- virtual Status createSortedDataInterface( OperationContext* opCtx,
- StringData ident,
- const IndexDescriptor* desc );
+ virtual Status createSortedDataInterface(OperationContext* opCtx,
+ StringData ident,
+ const IndexDescriptor* desc);
- virtual SortedDataInterface* getSortedDataInterface( OperationContext* opCtx,
- StringData ident,
- const IndexDescriptor* desc );
+ virtual SortedDataInterface* getSortedDataInterface(OperationContext* opCtx,
+ StringData ident,
+ const IndexDescriptor* desc);
- virtual Status dropIdent( OperationContext* opCtx,
- StringData ident );
+ virtual Status dropIdent(OperationContext* opCtx, StringData ident);
- virtual bool supportsDocLocking() const { return false; }
+ virtual bool supportsDocLocking() const {
+ return false;
+ }
- virtual bool supportsDirectoryPerDB() const { return false; }
+ virtual bool supportsDirectoryPerDB() const {
+ return false;
+ }
- /**
- * This is sort of strange since "durable" has no meaning...
- */
- virtual bool isDurable() const { return true; }
+ /**
+ * This is sort of strange since "durable" has no meaning...
+ */
+ virtual bool isDurable() const {
+ return true;
+ }
- virtual int64_t getIdentSize( OperationContext* opCtx,
- StringData ident );
+ virtual int64_t getIdentSize(OperationContext* opCtx, StringData ident);
- virtual Status repairIdent( OperationContext* opCtx,
- StringData ident ) {
- return Status::OK();
- }
+ virtual Status repairIdent(OperationContext* opCtx, StringData ident) {
+ return Status::OK();
+ }
- virtual void cleanShutdown() {};
+ virtual void cleanShutdown(){};
- virtual bool hasIdent(OperationContext* opCtx, StringData ident) const {
- return _dataMap.find(ident) != _dataMap.end();;
- }
+ virtual bool hasIdent(OperationContext* opCtx, StringData ident) const {
+ return _dataMap.find(ident) != _dataMap.end();
+ ;
+ }
- std::vector<std::string> getAllIdents( OperationContext* opCtx ) const;
- private:
- typedef StringMap<std::shared_ptr<void> > DataMap;
+ std::vector<std::string> getAllIdents(OperationContext* opCtx) const;
- mutable stdx::mutex _mutex;
- DataMap _dataMap; // All actual data is owned in here
- };
+private:
+ typedef StringMap<std::shared_ptr<void>> DataMap;
+ mutable stdx::mutex _mutex;
+ DataMap _dataMap; // All actual data is owned in here
+};
}
diff --git a/src/mongo/db/storage/in_memory/in_memory_engine_test.cpp b/src/mongo/db/storage/in_memory/in_memory_engine_test.cpp
index ec31394baae..1427810637f 100644
--- a/src/mongo/db/storage/in_memory/in_memory_engine_test.cpp
+++ b/src/mongo/db/storage/in_memory/in_memory_engine_test.cpp
@@ -33,23 +33,25 @@
namespace mongo {
- class InMemoryKVHarnessHelper : public KVHarnessHelper {
- public:
- InMemoryKVHarnessHelper() : _engine( new InMemoryEngine()) {}
-
- virtual KVEngine* restartEngine() {
- // Intentionally not restarting since the in-memory storage engine
- // does not persist data across restarts
- return _engine.get();
- }
+class InMemoryKVHarnessHelper : public KVHarnessHelper {
+public:
+ InMemoryKVHarnessHelper() : _engine(new InMemoryEngine()) {}
+
+ virtual KVEngine* restartEngine() {
+ // Intentionally not restarting since the in-memory storage engine
+ // does not persist data across restarts
+ return _engine.get();
+ }
- virtual KVEngine* getEngine() { return _engine.get(); }
+ virtual KVEngine* getEngine() {
+ return _engine.get();
+ }
- private:
- std::unique_ptr<InMemoryEngine> _engine;
- };
+private:
+ std::unique_ptr<InMemoryEngine> _engine;
+};
- KVHarnessHelper* KVHarnessHelper::create() {
- return new InMemoryKVHarnessHelper();
- }
+KVHarnessHelper* KVHarnessHelper::create() {
+ return new InMemoryKVHarnessHelper();
+}
}
diff --git a/src/mongo/db/storage/in_memory/in_memory_init.cpp b/src/mongo/db/storage/in_memory/in_memory_init.cpp
index bca7d60ef6c..c3f36f0224a 100644
--- a/src/mongo/db/storage/in_memory/in_memory_init.cpp
+++ b/src/mongo/db/storage/in_memory/in_memory_init.cpp
@@ -37,41 +37,39 @@
namespace mongo {
- namespace {
+namespace {
- class InMemoryFactory : public StorageEngine::Factory {
- public:
- virtual ~InMemoryFactory() { }
- virtual StorageEngine* create(const StorageGlobalParams& params,
- const StorageEngineLockFile& lockFile) const {
- KVStorageEngineOptions options;
- options.directoryPerDB = params.directoryperdb;
- options.forRepair = params.repair;
- return new KVStorageEngine(new InMemoryEngine(), options);
- }
-
- virtual StringData getCanonicalName() const {
- return "inMemoryExperiment";
- }
+class InMemoryFactory : public StorageEngine::Factory {
+public:
+ virtual ~InMemoryFactory() {}
+ virtual StorageEngine* create(const StorageGlobalParams& params,
+ const StorageEngineLockFile& lockFile) const {
+ KVStorageEngineOptions options;
+ options.directoryPerDB = params.directoryperdb;
+ options.forRepair = params.repair;
+ return new KVStorageEngine(new InMemoryEngine(), options);
+ }
- virtual Status validateMetadata(const StorageEngineMetadata& metadata,
- const StorageGlobalParams& params) const {
- return Status::OK();
- }
+ virtual StringData getCanonicalName() const {
+ return "inMemoryExperiment";
+ }
- virtual BSONObj createMetadataOptions(const StorageGlobalParams& params) const {
- return BSONObj();
- }
- };
+ virtual Status validateMetadata(const StorageEngineMetadata& metadata,
+ const StorageGlobalParams& params) const {
+ return Status::OK();
+ }
- } // namespace
+ virtual BSONObj createMetadataOptions(const StorageGlobalParams& params) const {
+ return BSONObj();
+ }
+};
- MONGO_INITIALIZER_WITH_PREREQUISITES(InMemoryEngineInit,
- ("SetGlobalEnvironment"))
- (InitializerContext* context) {
+} // namespace
- getGlobalServiceContext()->registerStorageEngine("inMemoryExperiment", new InMemoryFactory());
- return Status::OK();
- }
+MONGO_INITIALIZER_WITH_PREREQUISITES(InMemoryEngineInit, ("SetGlobalEnvironment"))
+(InitializerContext* context) {
+ getGlobalServiceContext()->registerStorageEngine("inMemoryExperiment", new InMemoryFactory());
+ return Status::OK();
+}
} // namespace mongo
diff --git a/src/mongo/db/storage/in_memory/in_memory_record_store.cpp b/src/mongo/db/storage/in_memory/in_memory_record_store.cpp
index b0c583954f6..af596f7a569 100644
--- a/src/mongo/db/storage/in_memory/in_memory_record_store.cpp
+++ b/src/mongo/db/storage/in_memory/in_memory_record_store.cpp
@@ -46,584 +46,569 @@
namespace mongo {
- using std::shared_ptr;
-
- class InMemoryRecordStore::InsertChange : public RecoveryUnit::Change {
- public:
- InsertChange(Data* data, RecordId loc) :_data(data), _loc(loc) {}
- virtual void commit() {}
- virtual void rollback() {
- Records::iterator it = _data->records.find(_loc);
- if (it != _data->records.end()) {
- _data->dataSize -= it->second.size;
- _data->records.erase(it);
- }
- }
-
- private:
- Data* const _data;
- const RecordId _loc;
- };
-
- // Works for both removes and updates
- class InMemoryRecordStore::RemoveChange : public RecoveryUnit::Change {
- public:
- RemoveChange(Data* data, RecordId loc, const InMemoryRecord& rec)
- :_data(data), _loc(loc), _rec(rec)
- {}
-
- virtual void commit() {}
- virtual void rollback() {
- Records::iterator it = _data->records.find(_loc);
- if (it != _data->records.end()) {
- _data->dataSize -= it->second.size;
- }
-
- _data->dataSize += _rec.size;
- _data->records[_loc] = _rec;
+using std::shared_ptr;
+
+class InMemoryRecordStore::InsertChange : public RecoveryUnit::Change {
+public:
+ InsertChange(Data* data, RecordId loc) : _data(data), _loc(loc) {}
+ virtual void commit() {}
+ virtual void rollback() {
+ Records::iterator it = _data->records.find(_loc);
+ if (it != _data->records.end()) {
+ _data->dataSize -= it->second.size;
+ _data->records.erase(it);
}
+ }
- private:
- Data* const _data;
- const RecordId _loc;
- const InMemoryRecord _rec;
- };
-
- class InMemoryRecordStore::TruncateChange : public RecoveryUnit::Change {
- public:
- TruncateChange(Data* data) : _data(data), _dataSize(0) {
- using std::swap;
- swap(_dataSize, _data->dataSize);
- swap(_records, _data->records);
+private:
+ Data* const _data;
+ const RecordId _loc;
+};
+
+// Works for both removes and updates
+class InMemoryRecordStore::RemoveChange : public RecoveryUnit::Change {
+public:
+ RemoveChange(Data* data, RecordId loc, const InMemoryRecord& rec)
+ : _data(data), _loc(loc), _rec(rec) {}
+
+ virtual void commit() {}
+ virtual void rollback() {
+ Records::iterator it = _data->records.find(_loc);
+ if (it != _data->records.end()) {
+ _data->dataSize -= it->second.size;
}
- virtual void commit() {}
- virtual void rollback() {
- using std::swap;
- swap(_dataSize, _data->dataSize);
- swap(_records, _data->records);
- }
+ _data->dataSize += _rec.size;
+ _data->records[_loc] = _rec;
+ }
- private:
- Data* const _data;
- int64_t _dataSize;
- Records _records;
- };
-
- class InMemoryRecordStore::Cursor final : public RecordCursor {
- public:
- Cursor(OperationContext* txn, const InMemoryRecordStore& rs)
- : _txn(txn)
- , _records(rs._data->records)
- , _isCapped(rs.isCapped())
- {}
-
- boost::optional<Record> next() final {
- if (_needFirstSeek) {
- _needFirstSeek = false;
- _it = _records.begin();
- }
- else if (!_lastMoveWasRestore && _it != _records.end()) {
- ++_it;
- }
- _lastMoveWasRestore = false;
+private:
+ Data* const _data;
+ const RecordId _loc;
+ const InMemoryRecord _rec;
+};
+
+class InMemoryRecordStore::TruncateChange : public RecoveryUnit::Change {
+public:
+ TruncateChange(Data* data) : _data(data), _dataSize(0) {
+ using std::swap;
+ swap(_dataSize, _data->dataSize);
+ swap(_records, _data->records);
+ }
- if (_it == _records.end()) return {};
- return {{_it->first, _it->second.toRecordData()}};
- }
+ virtual void commit() {}
+ virtual void rollback() {
+ using std::swap;
+ swap(_dataSize, _data->dataSize);
+ swap(_records, _data->records);
+ }
- boost::optional<Record> seekExact(const RecordId& id) final {
- _lastMoveWasRestore = false;
- _needFirstSeek = false;
- _it = _records.find(id);
- if (_it == _records.end()) return {};
- return {{_it->first, _it->second.toRecordData()}};
- }
+private:
+ Data* const _data;
+ int64_t _dataSize;
+ Records _records;
+};
- void savePositioned() final {
- _txn = nullptr;
- if (!_needFirstSeek && !_lastMoveWasRestore)
- _savedId = _it == _records.end() ? RecordId() : _it->first;
- }
+class InMemoryRecordStore::Cursor final : public RecordCursor {
+public:
+ Cursor(OperationContext* txn, const InMemoryRecordStore& rs)
+ : _txn(txn), _records(rs._data->records), _isCapped(rs.isCapped()) {}
- void saveUnpositioned() final {
- _txn = nullptr;
- _savedId = RecordId();
+ boost::optional<Record> next() final {
+ if (_needFirstSeek) {
+ _needFirstSeek = false;
+ _it = _records.begin();
+ } else if (!_lastMoveWasRestore && _it != _records.end()) {
+ ++_it;
}
+ _lastMoveWasRestore = false;
- bool restore(OperationContext* txn) final {
- _txn = txn;
- if (_savedId.isNull()) {
- _it = _records.end();
- return true;
- }
+ if (_it == _records.end())
+ return {};
+ return {{_it->first, _it->second.toRecordData()}};
+ }
- _it = _records.lower_bound(_savedId);
- _lastMoveWasRestore = _it == _records.end() || _it->first != _savedId;
+ boost::optional<Record> seekExact(const RecordId& id) final {
+ _lastMoveWasRestore = false;
+ _needFirstSeek = false;
+ _it = _records.find(id);
+ if (_it == _records.end())
+ return {};
+ return {{_it->first, _it->second.toRecordData()}};
+ }
- // Capped iterators die on invalidation rather than advancing.
- return !(_isCapped && _lastMoveWasRestore);
- }
+ void savePositioned() final {
+ _txn = nullptr;
+ if (!_needFirstSeek && !_lastMoveWasRestore)
+ _savedId = _it == _records.end() ? RecordId() : _it->first;
+ }
- private:
- unowned_ptr<OperationContext> _txn;
- Records::const_iterator _it;
- bool _needFirstSeek = true;
- bool _lastMoveWasRestore = false;
- RecordId _savedId; // Location to restore() to. Null means EOF.
-
- const InMemoryRecordStore::Records& _records;
- const bool _isCapped;
- };
-
- class InMemoryRecordStore::ReverseCursor final : public RecordCursor {
- public:
- ReverseCursor(OperationContext* txn, const InMemoryRecordStore& rs)
- : _txn(txn)
- , _records(rs._data->records)
- , _isCapped(rs.isCapped())
- {}
-
- boost::optional<Record> next() final {
- if (_needFirstSeek) {
- _needFirstSeek = false;
- _it = _records.rbegin();
- }
- else if (!_lastMoveWasRestore && _it != _records.rend()) {
- ++_it;
- }
- _lastMoveWasRestore = false;
+ void saveUnpositioned() final {
+ _txn = nullptr;
+ _savedId = RecordId();
+ }
- if (_it == _records.rend()) return {};
- return {{_it->first, _it->second.toRecordData()}};
+ bool restore(OperationContext* txn) final {
+ _txn = txn;
+ if (_savedId.isNull()) {
+ _it = _records.end();
+ return true;
}
- boost::optional<Record> seekExact(const RecordId& id) final {
- _lastMoveWasRestore = false;
- _needFirstSeek = false;
-
- auto forwardIt = _records.find(id);
- if (forwardIt == _records.end()) {
- _it = _records.rend();
- return {};
- }
+ _it = _records.lower_bound(_savedId);
+ _lastMoveWasRestore = _it == _records.end() || _it->first != _savedId;
- // The reverse_iterator will point to the preceding element, so increment the base
- // iterator to make it point past the found element.
- ++forwardIt;
- _it = Records::const_reverse_iterator(forwardIt);
- dassert(_it != _records.rend());
- dassert(_it->first == id);
- return {{_it->first, _it->second.toRecordData()}};
- }
-
- void savePositioned() final {
- _txn = nullptr;
- if (!_needFirstSeek && !_lastMoveWasRestore)
- _savedId = _it == _records.rend() ? RecordId() : _it->first;
- }
+ // Capped iterators die on invalidation rather than advancing.
+ return !(_isCapped && _lastMoveWasRestore);
+ }
- void saveUnpositioned() final {
- _txn = nullptr;
- _savedId = RecordId();
- }
+private:
+ unowned_ptr<OperationContext> _txn;
+ Records::const_iterator _it;
+ bool _needFirstSeek = true;
+ bool _lastMoveWasRestore = false;
+ RecordId _savedId; // Location to restore() to. Null means EOF.
- bool restore(OperationContext* txn) final {
- _txn = txn;
- if (_savedId.isNull()) {
- _it = _records.rend();
- return true;
- }
+ const InMemoryRecordStore::Records& _records;
+ const bool _isCapped;
+};
- // Note: upper_bound returns the first entry > _savedId and reverse_iterators
- // dereference to the element before their base iterator. This combine to make this
- // dereference to the first element <= _savedId which is what we want here.
- _it = Records::const_reverse_iterator(_records.upper_bound(_savedId));
- _lastMoveWasRestore = _it == _records.rend() || _it->first != _savedId;
+class InMemoryRecordStore::ReverseCursor final : public RecordCursor {
+public:
+ ReverseCursor(OperationContext* txn, const InMemoryRecordStore& rs)
+ : _txn(txn), _records(rs._data->records), _isCapped(rs.isCapped()) {}
- // Capped iterators die on invalidation rather than advancing.
- return !(_isCapped && _lastMoveWasRestore);
+ boost::optional<Record> next() final {
+ if (_needFirstSeek) {
+ _needFirstSeek = false;
+ _it = _records.rbegin();
+ } else if (!_lastMoveWasRestore && _it != _records.rend()) {
+ ++_it;
}
+ _lastMoveWasRestore = false;
- private:
- unowned_ptr<OperationContext> _txn;
- Records::const_reverse_iterator _it;
- bool _needFirstSeek = true;
- bool _lastMoveWasRestore = false;
- RecordId _savedId; // Location to restore() to. Null means EOF.
- const InMemoryRecordStore::Records& _records;
- const bool _isCapped;
- };
+ if (_it == _records.rend())
+ return {};
+ return {{_it->first, _it->second.toRecordData()}};
+ }
+ boost::optional<Record> seekExact(const RecordId& id) final {
+ _lastMoveWasRestore = false;
+ _needFirstSeek = false;
- //
- // RecordStore
- //
-
- InMemoryRecordStore::InMemoryRecordStore(StringData ns,
- std::shared_ptr<void>* dataInOut,
- bool isCapped,
- int64_t cappedMaxSize,
- int64_t cappedMaxDocs,
- CappedDocumentDeleteCallback* cappedDeleteCallback)
- : RecordStore(ns),
- _isCapped(isCapped),
- _cappedMaxSize(cappedMaxSize),
- _cappedMaxDocs(cappedMaxDocs),
- _cappedDeleteCallback(cappedDeleteCallback),
- _data(*dataInOut ? static_cast<Data*>(dataInOut->get())
- : new Data(NamespaceString::oplog(ns))) {
- if (!*dataInOut) {
- dataInOut->reset(_data); // takes ownership
+ auto forwardIt = _records.find(id);
+ if (forwardIt == _records.end()) {
+ _it = _records.rend();
+ return {};
}
- if (_isCapped) {
- invariant(_cappedMaxSize > 0);
- invariant(_cappedMaxDocs == -1 || _cappedMaxDocs > 0);
- }
- else {
- invariant(_cappedMaxSize == -1);
- invariant(_cappedMaxDocs == -1);
- }
+ // The reverse_iterator will point to the preceding element, so increment the base
+ // iterator to make it point past the found element.
+ ++forwardIt;
+ _it = Records::const_reverse_iterator(forwardIt);
+ dassert(_it != _records.rend());
+ dassert(_it->first == id);
+ return {{_it->first, _it->second.toRecordData()}};
}
- const char* InMemoryRecordStore::name() const { return "InMemory"; }
-
- RecordData InMemoryRecordStore::dataFor( OperationContext* txn, const RecordId& loc ) const {
- return recordFor(loc)->toRecordData();
+ void savePositioned() final {
+ _txn = nullptr;
+ if (!_needFirstSeek && !_lastMoveWasRestore)
+ _savedId = _it == _records.rend() ? RecordId() : _it->first;
}
- const InMemoryRecordStore::InMemoryRecord* InMemoryRecordStore::recordFor(
- const RecordId& loc) const {
- Records::const_iterator it = _data->records.find(loc);
- if ( it == _data->records.end() ) {
- error() << "InMemoryRecordStore::recordFor cannot find record for " << ns()
- << ":" << loc;
- }
- invariant(it != _data->records.end());
- return &it->second;
+ void saveUnpositioned() final {
+ _txn = nullptr;
+ _savedId = RecordId();
}
- InMemoryRecordStore::InMemoryRecord* InMemoryRecordStore::recordFor(const RecordId& loc) {
- Records::iterator it = _data->records.find(loc);
- if ( it == _data->records.end() ) {
- error() << "InMemoryRecordStore::recordFor cannot find record for " << ns()
- << ":" << loc;
+ bool restore(OperationContext* txn) final {
+ _txn = txn;
+ if (_savedId.isNull()) {
+ _it = _records.rend();
+ return true;
}
- invariant(it != _data->records.end());
- return &it->second;
- }
- bool InMemoryRecordStore::findRecord( OperationContext* txn,
- const RecordId& loc, RecordData* rd ) const {
- Records::const_iterator it = _data->records.find(loc);
- if ( it == _data->records.end() ) {
- return false;
- }
- *rd = it->second.toRecordData();
- return true;
+ // Note: upper_bound returns the first entry > _savedId and reverse_iterators
+ // dereference to the element before their base iterator. This combine to make this
+ // dereference to the first element <= _savedId which is what we want here.
+ _it = Records::const_reverse_iterator(_records.upper_bound(_savedId));
+ _lastMoveWasRestore = _it == _records.rend() || _it->first != _savedId;
+
+ // Capped iterators die on invalidation rather than advancing.
+ return !(_isCapped && _lastMoveWasRestore);
}
- void InMemoryRecordStore::deleteRecord(OperationContext* txn, const RecordId& loc) {
- InMemoryRecord* rec = recordFor(loc);
- txn->recoveryUnit()->registerChange(new RemoveChange(_data, loc, *rec));
- _data->dataSize -= rec->size;
- invariant(_data->records.erase(loc) == 1);
+private:
+ unowned_ptr<OperationContext> _txn;
+ Records::const_reverse_iterator _it;
+ bool _needFirstSeek = true;
+ bool _lastMoveWasRestore = false;
+ RecordId _savedId; // Location to restore() to. Null means EOF.
+ const InMemoryRecordStore::Records& _records;
+ const bool _isCapped;
+};
+
+
+//
+// RecordStore
+//
+
+InMemoryRecordStore::InMemoryRecordStore(StringData ns,
+ std::shared_ptr<void>* dataInOut,
+ bool isCapped,
+ int64_t cappedMaxSize,
+ int64_t cappedMaxDocs,
+ CappedDocumentDeleteCallback* cappedDeleteCallback)
+ : RecordStore(ns),
+ _isCapped(isCapped),
+ _cappedMaxSize(cappedMaxSize),
+ _cappedMaxDocs(cappedMaxDocs),
+ _cappedDeleteCallback(cappedDeleteCallback),
+ _data(*dataInOut ? static_cast<Data*>(dataInOut->get())
+ : new Data(NamespaceString::oplog(ns))) {
+ if (!*dataInOut) {
+ dataInOut->reset(_data); // takes ownership
}
- bool InMemoryRecordStore::cappedAndNeedDelete(OperationContext* txn) const {
- if (!_isCapped)
- return false;
+ if (_isCapped) {
+ invariant(_cappedMaxSize > 0);
+ invariant(_cappedMaxDocs == -1 || _cappedMaxDocs > 0);
+ } else {
+ invariant(_cappedMaxSize == -1);
+ invariant(_cappedMaxDocs == -1);
+ }
+}
- if (_data->dataSize > _cappedMaxSize)
- return true;
+const char* InMemoryRecordStore::name() const {
+ return "InMemory";
+}
- if ((_cappedMaxDocs != -1) && (numRecords(txn) > _cappedMaxDocs))
- return true;
+RecordData InMemoryRecordStore::dataFor(OperationContext* txn, const RecordId& loc) const {
+ return recordFor(loc)->toRecordData();
+}
+const InMemoryRecordStore::InMemoryRecord* InMemoryRecordStore::recordFor(
+ const RecordId& loc) const {
+ Records::const_iterator it = _data->records.find(loc);
+ if (it == _data->records.end()) {
+ error() << "InMemoryRecordStore::recordFor cannot find record for " << ns() << ":" << loc;
+ }
+ invariant(it != _data->records.end());
+ return &it->second;
+}
+
+InMemoryRecordStore::InMemoryRecord* InMemoryRecordStore::recordFor(const RecordId& loc) {
+ Records::iterator it = _data->records.find(loc);
+ if (it == _data->records.end()) {
+ error() << "InMemoryRecordStore::recordFor cannot find record for " << ns() << ":" << loc;
+ }
+ invariant(it != _data->records.end());
+ return &it->second;
+}
+
+bool InMemoryRecordStore::findRecord(OperationContext* txn,
+ const RecordId& loc,
+ RecordData* rd) const {
+ Records::const_iterator it = _data->records.find(loc);
+ if (it == _data->records.end()) {
return false;
}
+ *rd = it->second.toRecordData();
+ return true;
+}
+
+void InMemoryRecordStore::deleteRecord(OperationContext* txn, const RecordId& loc) {
+ InMemoryRecord* rec = recordFor(loc);
+ txn->recoveryUnit()->registerChange(new RemoveChange(_data, loc, *rec));
+ _data->dataSize -= rec->size;
+ invariant(_data->records.erase(loc) == 1);
+}
+
+bool InMemoryRecordStore::cappedAndNeedDelete(OperationContext* txn) const {
+ if (!_isCapped)
+ return false;
- void InMemoryRecordStore::cappedDeleteAsNeeded(OperationContext* txn) {
- while (cappedAndNeedDelete(txn)) {
- invariant(!_data->records.empty());
+ if (_data->dataSize > _cappedMaxSize)
+ return true;
- Records::iterator oldest = _data->records.begin();
- RecordId id = oldest->first;
- RecordData data = oldest->second.toRecordData();
+ if ((_cappedMaxDocs != -1) && (numRecords(txn) > _cappedMaxDocs))
+ return true;
- if (_cappedDeleteCallback)
- uassertStatusOK(_cappedDeleteCallback->aboutToDeleteCapped(txn, id, data));
+ return false;
+}
- deleteRecord(txn, id);
- }
- }
+void InMemoryRecordStore::cappedDeleteAsNeeded(OperationContext* txn) {
+ while (cappedAndNeedDelete(txn)) {
+ invariant(!_data->records.empty());
- StatusWith<RecordId> InMemoryRecordStore::extractAndCheckLocForOplog(const char* data,
- int len) const {
- StatusWith<RecordId> status = oploghack::extractKey(data, len);
- if (!status.isOK())
- return status;
+ Records::iterator oldest = _data->records.begin();
+ RecordId id = oldest->first;
+ RecordData data = oldest->second.toRecordData();
- if (!_data->records.empty() && status.getValue() <= _data->records.rbegin()->first)
- return StatusWith<RecordId>(ErrorCodes::BadValue, "ts not higher than highest");
+ if (_cappedDeleteCallback)
+ uassertStatusOK(_cappedDeleteCallback->aboutToDeleteCapped(txn, id, data));
- return status;
+ deleteRecord(txn, id);
}
+}
- StatusWith<RecordId> InMemoryRecordStore::insertRecord(OperationContext* txn,
- const char* data,
- int len,
- bool enforceQuota) {
- if (_isCapped && len > _cappedMaxSize) {
- // We use dataSize for capped rollover and we don't want to delete everything if we know
- // this won't fit.
- return StatusWith<RecordId>(ErrorCodes::BadValue,
- "object to insert exceeds cappedMaxSize");
- }
-
- InMemoryRecord rec(len);
- memcpy(rec.data.get(), data, len);
-
- RecordId loc;
- if (_data->isOplog) {
- StatusWith<RecordId> status = extractAndCheckLocForOplog(data, len);
- if (!status.isOK())
- return status;
- loc = status.getValue();
- }
- else {
- loc = allocateLoc();
- }
+StatusWith<RecordId> InMemoryRecordStore::extractAndCheckLocForOplog(const char* data,
+ int len) const {
+ StatusWith<RecordId> status = oploghack::extractKey(data, len);
+ if (!status.isOK())
+ return status;
- txn->recoveryUnit()->registerChange(new InsertChange(_data, loc));
- _data->dataSize += len;
- _data->records[loc] = rec;
+ if (!_data->records.empty() && status.getValue() <= _data->records.rbegin()->first)
+ return StatusWith<RecordId>(ErrorCodes::BadValue, "ts not higher than highest");
- cappedDeleteAsNeeded(txn);
+ return status;
+}
- return StatusWith<RecordId>(loc);
+StatusWith<RecordId> InMemoryRecordStore::insertRecord(OperationContext* txn,
+ const char* data,
+ int len,
+ bool enforceQuota) {
+ if (_isCapped && len > _cappedMaxSize) {
+ // We use dataSize for capped rollover and we don't want to delete everything if we know
+ // this won't fit.
+ return StatusWith<RecordId>(ErrorCodes::BadValue, "object to insert exceeds cappedMaxSize");
}
- StatusWith<RecordId> InMemoryRecordStore::insertRecord(OperationContext* txn,
- const DocWriter* doc,
- bool enforceQuota) {
- const int len = doc->documentSize();
- if (_isCapped && len > _cappedMaxSize) {
- // We use dataSize for capped rollover and we don't want to delete everything if we know
- // this won't fit.
- return StatusWith<RecordId>(ErrorCodes::BadValue,
- "object to insert exceeds cappedMaxSize");
- }
-
- InMemoryRecord rec(len);
- doc->writeDocument(rec.data.get());
-
- RecordId loc;
- if (_data->isOplog) {
- StatusWith<RecordId> status = extractAndCheckLocForOplog(rec.data.get(), len);
- if (!status.isOK())
- return status;
- loc = status.getValue();
- }
- else {
- loc = allocateLoc();
- }
+ InMemoryRecord rec(len);
+ memcpy(rec.data.get(), data, len);
- txn->recoveryUnit()->registerChange(new InsertChange(_data, loc));
- _data->dataSize += len;
- _data->records[loc] = rec;
-
- cappedDeleteAsNeeded(txn);
-
- return StatusWith<RecordId>(loc);
+ RecordId loc;
+ if (_data->isOplog) {
+ StatusWith<RecordId> status = extractAndCheckLocForOplog(data, len);
+ if (!status.isOK())
+ return status;
+ loc = status.getValue();
+ } else {
+ loc = allocateLoc();
}
- StatusWith<RecordId> InMemoryRecordStore::updateRecord(OperationContext* txn,
- const RecordId& loc,
- const char* data,
- int len,
- bool enforceQuota,
- UpdateNotifier* notifier ) {
- InMemoryRecord* oldRecord = recordFor( loc );
- int oldLen = oldRecord->size;
-
- if (_isCapped && len > oldLen) {
- return StatusWith<RecordId>( ErrorCodes::InternalError,
- "failing update: objects in a capped ns cannot grow",
- 10003 );
- }
-
- if (notifier) {
- // The in-memory KV engine uses the invalidation framework (does not support
- // doc-locking), and therefore must notify that it is updating a document.
- Status callbackStatus = notifier->recordStoreGoingToUpdateInPlace(txn, loc);
- if (!callbackStatus.isOK()) {
- return StatusWith<RecordId>(callbackStatus);
- }
- }
+ txn->recoveryUnit()->registerChange(new InsertChange(_data, loc));
+ _data->dataSize += len;
+ _data->records[loc] = rec;
- InMemoryRecord newRecord(len);
- memcpy(newRecord.data.get(), data, len);
+ cappedDeleteAsNeeded(txn);
- txn->recoveryUnit()->registerChange(new RemoveChange(_data, loc, *oldRecord));
- _data->dataSize += len - oldLen;
- *oldRecord = newRecord;
+ return StatusWith<RecordId>(loc);
+}
- cappedDeleteAsNeeded(txn);
-
- return StatusWith<RecordId>(loc);
+StatusWith<RecordId> InMemoryRecordStore::insertRecord(OperationContext* txn,
+ const DocWriter* doc,
+ bool enforceQuota) {
+ const int len = doc->documentSize();
+ if (_isCapped && len > _cappedMaxSize) {
+ // We use dataSize for capped rollover and we don't want to delete everything if we know
+ // this won't fit.
+ return StatusWith<RecordId>(ErrorCodes::BadValue, "object to insert exceeds cappedMaxSize");
}
- bool InMemoryRecordStore::updateWithDamagesSupported() const {
- // TODO: Currently the UpdateStage assumes that updateWithDamages will apply the
- // damages directly to the unowned BSONObj containing the record to be modified.
- // The implementation of updateWithDamages() below copies the old record to a
- // a new one and then applies the damages.
- //
- // We should be able to enable updateWithDamages() here once this assumption is
- // relaxed.
- return false;
- }
+ InMemoryRecord rec(len);
+ doc->writeDocument(rec.data.get());
- Status InMemoryRecordStore::updateWithDamages( OperationContext* txn,
- const RecordId& loc,
- const RecordData& oldRec,
- const char* damageSource,
- const mutablebson::DamageVector& damages ) {
- InMemoryRecord* oldRecord = recordFor( loc );
- const int len = oldRecord->size;
-
- InMemoryRecord newRecord(len);
- memcpy(newRecord.data.get(), oldRecord->data.get(), len);
-
- txn->recoveryUnit()->registerChange(new RemoveChange(_data, loc, *oldRecord));
- *oldRecord = newRecord;
-
- cappedDeleteAsNeeded(txn);
-
- char* root = newRecord.data.get();
- mutablebson::DamageVector::const_iterator where = damages.begin();
- const mutablebson::DamageVector::const_iterator end = damages.end();
- for( ; where != end; ++where ) {
- const char* sourcePtr = damageSource + where->sourceOffset;
- char* targetPtr = root + where->targetOffset;
- std::memcpy(targetPtr, sourcePtr, where->size);
- }
+ RecordId loc;
+ if (_data->isOplog) {
+ StatusWith<RecordId> status = extractAndCheckLocForOplog(rec.data.get(), len);
+ if (!status.isOK())
+ return status;
+ loc = status.getValue();
+ } else {
+ loc = allocateLoc();
+ }
- *oldRecord = newRecord;
+ txn->recoveryUnit()->registerChange(new InsertChange(_data, loc));
+ _data->dataSize += len;
+ _data->records[loc] = rec;
- return Status::OK();
- }
+ cappedDeleteAsNeeded(txn);
- std::unique_ptr<RecordCursor> InMemoryRecordStore::getCursor(OperationContext* txn,
- bool forward) const {
+ return StatusWith<RecordId>(loc);
+}
- if (forward) return stdx::make_unique<Cursor>(txn, *this);
- return stdx::make_unique<ReverseCursor>(txn, *this);
- }
+StatusWith<RecordId> InMemoryRecordStore::updateRecord(OperationContext* txn,
+ const RecordId& loc,
+ const char* data,
+ int len,
+ bool enforceQuota,
+ UpdateNotifier* notifier) {
+ InMemoryRecord* oldRecord = recordFor(loc);
+ int oldLen = oldRecord->size;
- Status InMemoryRecordStore::truncate(OperationContext* txn) {
- // Unlike other changes, TruncateChange mutates _data on construction to perform the
- // truncate
- txn->recoveryUnit()->registerChange(new TruncateChange(_data));
- return Status::OK();
+ if (_isCapped && len > oldLen) {
+ return StatusWith<RecordId>(
+ ErrorCodes::InternalError, "failing update: objects in a capped ns cannot grow", 10003);
}
- void InMemoryRecordStore::temp_cappedTruncateAfter(OperationContext* txn,
- RecordId end,
- bool inclusive) {
- Records::iterator it = inclusive ? _data->records.lower_bound(end)
- : _data->records.upper_bound(end);
- while(it != _data->records.end()) {
- txn->recoveryUnit()->registerChange(new RemoveChange(_data, it->first, it->second));
- _data->dataSize -= it->second.size;
- _data->records.erase(it++);
+ if (notifier) {
+ // The in-memory KV engine uses the invalidation framework (does not support
+ // doc-locking), and therefore must notify that it is updating a document.
+ Status callbackStatus = notifier->recordStoreGoingToUpdateInPlace(txn, loc);
+ if (!callbackStatus.isOK()) {
+ return StatusWith<RecordId>(callbackStatus);
}
}
- Status InMemoryRecordStore::validate(OperationContext* txn,
- bool full,
- bool scanData,
- ValidateAdaptor* adaptor,
- ValidateResults* results,
- BSONObjBuilder* output) {
- results->valid = true;
- if (scanData && full) {
- for (Records::const_iterator it = _data->records.begin();
- it != _data->records.end(); ++it) {
- const InMemoryRecord& rec = it->second;
- size_t dataSize;
- const Status status = adaptor->validate(rec.toRecordData(), &dataSize);
- if (!status.isOK()) {
- results->valid = false;
- results->errors.push_back("invalid object detected (see logs)");
- log() << "Invalid object detected in " << _ns << ": " << status.reason();
- }
- }
- }
+ InMemoryRecord newRecord(len);
+ memcpy(newRecord.data.get(), data, len);
- output->appendNumber( "nrecords", _data->records.size() );
+ txn->recoveryUnit()->registerChange(new RemoveChange(_data, loc, *oldRecord));
+ _data->dataSize += len - oldLen;
+ *oldRecord = newRecord;
- return Status::OK();
+ cappedDeleteAsNeeded(txn);
- }
+ return StatusWith<RecordId>(loc);
+}
- void InMemoryRecordStore::appendCustomStats( OperationContext* txn,
- BSONObjBuilder* result,
- double scale ) const {
- result->appendBool( "capped", _isCapped );
- if ( _isCapped ) {
- result->appendIntOrLL( "max", _cappedMaxDocs );
- result->appendIntOrLL( "maxSize", _cappedMaxSize / scale );
- }
+bool InMemoryRecordStore::updateWithDamagesSupported() const {
+ // TODO: Currently the UpdateStage assumes that updateWithDamages will apply the
+ // damages directly to the unowned BSONObj containing the record to be modified.
+ // The implementation of updateWithDamages() below copies the old record to a
+ // a new one and then applies the damages.
+ //
+ // We should be able to enable updateWithDamages() here once this assumption is
+ // relaxed.
+ return false;
+}
+
+Status InMemoryRecordStore::updateWithDamages(OperationContext* txn,
+ const RecordId& loc,
+ const RecordData& oldRec,
+ const char* damageSource,
+ const mutablebson::DamageVector& damages) {
+ InMemoryRecord* oldRecord = recordFor(loc);
+ const int len = oldRecord->size;
+
+ InMemoryRecord newRecord(len);
+ memcpy(newRecord.data.get(), oldRecord->data.get(), len);
+
+ txn->recoveryUnit()->registerChange(new RemoveChange(_data, loc, *oldRecord));
+ *oldRecord = newRecord;
+
+ cappedDeleteAsNeeded(txn);
+
+ char* root = newRecord.data.get();
+ mutablebson::DamageVector::const_iterator where = damages.begin();
+ const mutablebson::DamageVector::const_iterator end = damages.end();
+ for (; where != end; ++where) {
+ const char* sourcePtr = damageSource + where->sourceOffset;
+ char* targetPtr = root + where->targetOffset;
+ std::memcpy(targetPtr, sourcePtr, where->size);
}
- Status InMemoryRecordStore::touch(OperationContext* txn, BSONObjBuilder* output) const {
- if (output) {
- output->append("numRanges", 1);
- output->append("millis", 0);
+ *oldRecord = newRecord;
+
+ return Status::OK();
+}
+
+std::unique_ptr<RecordCursor> InMemoryRecordStore::getCursor(OperationContext* txn,
+ bool forward) const {
+ if (forward)
+ return stdx::make_unique<Cursor>(txn, *this);
+ return stdx::make_unique<ReverseCursor>(txn, *this);
+}
+
+Status InMemoryRecordStore::truncate(OperationContext* txn) {
+ // Unlike other changes, TruncateChange mutates _data on construction to perform the
+ // truncate
+ txn->recoveryUnit()->registerChange(new TruncateChange(_data));
+ return Status::OK();
+}
+
+void InMemoryRecordStore::temp_cappedTruncateAfter(OperationContext* txn,
+ RecordId end,
+ bool inclusive) {
+ Records::iterator it =
+ inclusive ? _data->records.lower_bound(end) : _data->records.upper_bound(end);
+ while (it != _data->records.end()) {
+ txn->recoveryUnit()->registerChange(new RemoveChange(_data, it->first, it->second));
+ _data->dataSize -= it->second.size;
+ _data->records.erase(it++);
+ }
+}
+
+Status InMemoryRecordStore::validate(OperationContext* txn,
+ bool full,
+ bool scanData,
+ ValidateAdaptor* adaptor,
+ ValidateResults* results,
+ BSONObjBuilder* output) {
+ results->valid = true;
+ if (scanData && full) {
+ for (Records::const_iterator it = _data->records.begin(); it != _data->records.end();
+ ++it) {
+ const InMemoryRecord& rec = it->second;
+ size_t dataSize;
+ const Status status = adaptor->validate(rec.toRecordData(), &dataSize);
+ if (!status.isOK()) {
+ results->valid = false;
+ results->errors.push_back("invalid object detected (see logs)");
+ log() << "Invalid object detected in " << _ns << ": " << status.reason();
+ }
}
- return Status::OK();
}
- void InMemoryRecordStore::increaseStorageSize(OperationContext* txn,
- int size, bool enforceQuota) {
- // unclear what this would mean for this class. For now, just error if called.
- invariant(!"increaseStorageSize not yet implemented");
- }
+ output->appendNumber("nrecords", _data->records.size());
- int64_t InMemoryRecordStore::storageSize(OperationContext* txn,
- BSONObjBuilder* extraInfo,
- int infoLevel) const {
- // Note: not making use of extraInfo or infoLevel since we don't have extents
- const int64_t recordOverhead = numRecords(txn) * sizeof(InMemoryRecord);
- return _data->dataSize + recordOverhead;
- }
+ return Status::OK();
+}
- RecordId InMemoryRecordStore::allocateLoc() {
- RecordId out = RecordId(_data->nextId++);
- invariant(out < RecordId::max());
- return out;
+void InMemoryRecordStore::appendCustomStats(OperationContext* txn,
+ BSONObjBuilder* result,
+ double scale) const {
+ result->appendBool("capped", _isCapped);
+ if (_isCapped) {
+ result->appendIntOrLL("max", _cappedMaxDocs);
+ result->appendIntOrLL("maxSize", _cappedMaxSize / scale);
}
+}
- boost::optional<RecordId> InMemoryRecordStore::oplogStartHack(
- OperationContext* txn,
- const RecordId& startingPosition) const {
-
- if (!_data->isOplog)
- return boost::none;
-
- const Records& records = _data->records;
-
- if (records.empty())
- return RecordId();
-
- Records::const_iterator it = records.lower_bound(startingPosition);
- if (it == records.end() || it->first > startingPosition)
- --it;
-
- return it->first;
+Status InMemoryRecordStore::touch(OperationContext* txn, BSONObjBuilder* output) const {
+ if (output) {
+ output->append("numRanges", 1);
+ output->append("millis", 0);
}
-
-} // namespace mongo
+ return Status::OK();
+}
+
+void InMemoryRecordStore::increaseStorageSize(OperationContext* txn, int size, bool enforceQuota) {
+ // unclear what this would mean for this class. For now, just error if called.
+ invariant(!"increaseStorageSize not yet implemented");
+}
+
+int64_t InMemoryRecordStore::storageSize(OperationContext* txn,
+ BSONObjBuilder* extraInfo,
+ int infoLevel) const {
+ // Note: not making use of extraInfo or infoLevel since we don't have extents
+ const int64_t recordOverhead = numRecords(txn) * sizeof(InMemoryRecord);
+ return _data->dataSize + recordOverhead;
+}
+
+RecordId InMemoryRecordStore::allocateLoc() {
+ RecordId out = RecordId(_data->nextId++);
+ invariant(out < RecordId::max());
+ return out;
+}
+
+boost::optional<RecordId> InMemoryRecordStore::oplogStartHack(
+ OperationContext* txn, const RecordId& startingPosition) const {
+ if (!_data->isOplog)
+ return boost::none;
+
+ const Records& records = _data->records;
+
+ if (records.empty())
+ return RecordId();
+
+ Records::const_iterator it = records.lower_bound(startingPosition);
+ if (it == records.end() || it->first > startingPosition)
+ --it;
+
+ return it->first;
+}
+
+} // namespace mongo
diff --git a/src/mongo/db/storage/in_memory/in_memory_record_store.h b/src/mongo/db/storage/in_memory/in_memory_record_store.h
index 53df7883758..bd241555394 100644
--- a/src/mongo/db/storage/in_memory/in_memory_record_store.h
+++ b/src/mongo/db/storage/in_memory/in_memory_record_store.h
@@ -38,151 +38,164 @@
namespace mongo {
- /**
- * A RecordStore that stores all data in-memory.
- *
- * @param cappedMaxSize - required if isCapped. limit uses dataSize() in this impl.
- */
- class InMemoryRecordStore : public RecordStore {
- public:
- explicit InMemoryRecordStore(StringData ns,
- std::shared_ptr<void>* dataInOut,
- bool isCapped = false,
- int64_t cappedMaxSize = -1,
- int64_t cappedMaxDocs = -1,
- CappedDocumentDeleteCallback* cappedDeleteCallback = NULL);
-
- virtual const char* name() const;
-
- virtual RecordData dataFor( OperationContext* txn, const RecordId& loc ) const;
-
- virtual bool findRecord( OperationContext* txn, const RecordId& loc, RecordData* rd ) const;
-
- virtual void deleteRecord( OperationContext* txn, const RecordId& dl );
-
- virtual StatusWith<RecordId> insertRecord( OperationContext* txn,
- const char* data,
- int len,
- bool enforceQuota );
+/**
+ * A RecordStore that stores all data in-memory.
+ *
+ * @param cappedMaxSize - required if isCapped. limit uses dataSize() in this impl.
+ */
+class InMemoryRecordStore : public RecordStore {
+public:
+ explicit InMemoryRecordStore(StringData ns,
+ std::shared_ptr<void>* dataInOut,
+ bool isCapped = false,
+ int64_t cappedMaxSize = -1,
+ int64_t cappedMaxDocs = -1,
+ CappedDocumentDeleteCallback* cappedDeleteCallback = NULL);
- virtual StatusWith<RecordId> insertRecord( OperationContext* txn,
- const DocWriter* doc,
- bool enforceQuota );
+ virtual const char* name() const;
- virtual StatusWith<RecordId> updateRecord( OperationContext* txn,
- const RecordId& oldLocation,
- const char* data,
- int len,
- bool enforceQuota,
- UpdateNotifier* notifier );
+ virtual RecordData dataFor(OperationContext* txn, const RecordId& loc) const;
- virtual bool updateWithDamagesSupported() const;
+ virtual bool findRecord(OperationContext* txn, const RecordId& loc, RecordData* rd) const;
- virtual Status updateWithDamages( OperationContext* txn,
- const RecordId& loc,
- const RecordData& oldRec,
- const char* damageSource,
- const mutablebson::DamageVector& damages );
+ virtual void deleteRecord(OperationContext* txn, const RecordId& dl);
- std::unique_ptr<RecordCursor> getCursor(OperationContext* txn, bool forward) const final;
+ virtual StatusWith<RecordId> insertRecord(OperationContext* txn,
+ const char* data,
+ int len,
+ bool enforceQuota);
- virtual Status truncate( OperationContext* txn );
+ virtual StatusWith<RecordId> insertRecord(OperationContext* txn,
+ const DocWriter* doc,
+ bool enforceQuota);
- virtual void temp_cappedTruncateAfter( OperationContext* txn, RecordId end, bool inclusive );
+ virtual StatusWith<RecordId> updateRecord(OperationContext* txn,
+ const RecordId& oldLocation,
+ const char* data,
+ int len,
+ bool enforceQuota,
+ UpdateNotifier* notifier);
- virtual Status validate( OperationContext* txn,
- bool full,
- bool scanData,
- ValidateAdaptor* adaptor,
- ValidateResults* results, BSONObjBuilder* output );
+ virtual bool updateWithDamagesSupported() const;
- virtual void appendCustomStats( OperationContext* txn,
- BSONObjBuilder* result,
- double scale ) const;
+ virtual Status updateWithDamages(OperationContext* txn,
+ const RecordId& loc,
+ const RecordData& oldRec,
+ const char* damageSource,
+ const mutablebson::DamageVector& damages);
- virtual Status touch( OperationContext* txn, BSONObjBuilder* output ) const;
+ std::unique_ptr<RecordCursor> getCursor(OperationContext* txn, bool forward) const final;
- virtual void increaseStorageSize( OperationContext* txn, int size, bool enforceQuota );
+ virtual Status truncate(OperationContext* txn);
- virtual int64_t storageSize( OperationContext* txn,
- BSONObjBuilder* extraInfo = NULL,
- int infoLevel = 0) const;
+ virtual void temp_cappedTruncateAfter(OperationContext* txn, RecordId end, bool inclusive);
- virtual long long dataSize( OperationContext* txn ) const { return _data->dataSize; }
+ virtual Status validate(OperationContext* txn,
+ bool full,
+ bool scanData,
+ ValidateAdaptor* adaptor,
+ ValidateResults* results,
+ BSONObjBuilder* output);
- virtual long long numRecords( OperationContext* txn ) const {
- return _data->records.size();
- }
+ virtual void appendCustomStats(OperationContext* txn,
+ BSONObjBuilder* result,
+ double scale) const;
- virtual boost::optional<RecordId> oplogStartHack(OperationContext* txn,
- const RecordId& startingPosition) const;
+ virtual Status touch(OperationContext* txn, BSONObjBuilder* output) const;
- virtual void updateStatsAfterRepair(OperationContext* txn,
- long long numRecords,
- long long dataSize) {
- invariant(_data->records.size() == size_t(numRecords));
- _data->dataSize = dataSize;
- }
+ virtual void increaseStorageSize(OperationContext* txn, int size, bool enforceQuota);
- protected:
- struct InMemoryRecord {
- InMemoryRecord() :size(0) {}
- InMemoryRecord(int size) :size(size), data(new char[size]) {}
+ virtual int64_t storageSize(OperationContext* txn,
+ BSONObjBuilder* extraInfo = NULL,
+ int infoLevel = 0) const;
- RecordData toRecordData() const { return RecordData(data.get(), size); }
+ virtual long long dataSize(OperationContext* txn) const {
+ return _data->dataSize;
+ }
- int size;
- boost::shared_array<char> data;
- };
+ virtual long long numRecords(OperationContext* txn) const {
+ return _data->records.size();
+ }
- virtual const InMemoryRecord* recordFor( const RecordId& loc ) const;
- virtual InMemoryRecord* recordFor( const RecordId& loc );
+ virtual boost::optional<RecordId> oplogStartHack(OperationContext* txn,
+ const RecordId& startingPosition) const;
- public:
- //
- // Not in RecordStore interface
- //
+ virtual void updateStatsAfterRepair(OperationContext* txn,
+ long long numRecords,
+ long long dataSize) {
+ invariant(_data->records.size() == size_t(numRecords));
+ _data->dataSize = dataSize;
+ }
- typedef std::map<RecordId, InMemoryRecord> Records;
+protected:
+ struct InMemoryRecord {
+ InMemoryRecord() : size(0) {}
+ InMemoryRecord(int size) : size(size), data(new char[size]) {}
- bool isCapped() const { return _isCapped; }
- void setCappedDeleteCallback(CappedDocumentDeleteCallback* cb) {
- _cappedDeleteCallback = cb;
+ RecordData toRecordData() const {
+ return RecordData(data.get(), size);
}
- bool cappedMaxDocs() const { invariant(_isCapped); return _cappedMaxDocs; }
- bool cappedMaxSize() const { invariant(_isCapped); return _cappedMaxSize; }
- private:
- class InsertChange;
- class RemoveChange;
- class TruncateChange;
-
- class Cursor;
- class ReverseCursor;
-
- StatusWith<RecordId> extractAndCheckLocForOplog(const char* data, int len) const;
-
- RecordId allocateLoc();
- bool cappedAndNeedDelete(OperationContext* txn) const;
- void cappedDeleteAsNeeded(OperationContext* txn);
-
- // TODO figure out a proper solution to metadata
- const bool _isCapped;
- const int64_t _cappedMaxSize;
- const int64_t _cappedMaxDocs;
- CappedDocumentDeleteCallback* _cappedDeleteCallback;
-
- // This is the "persistent" data.
- struct Data {
- Data(bool isOplog) :dataSize(0), nextId(1), isOplog(isOplog) {}
-
- int64_t dataSize;
- Records records;
- int64_t nextId;
- const bool isOplog;
- };
+ int size;
+ boost::shared_array<char> data;
+ };
- Data* const _data;
+ virtual const InMemoryRecord* recordFor(const RecordId& loc) const;
+ virtual InMemoryRecord* recordFor(const RecordId& loc);
+
+public:
+ //
+ // Not in RecordStore interface
+ //
+
+ typedef std::map<RecordId, InMemoryRecord> Records;
+
+ bool isCapped() const {
+ return _isCapped;
+ }
+ void setCappedDeleteCallback(CappedDocumentDeleteCallback* cb) {
+ _cappedDeleteCallback = cb;
+ }
+ bool cappedMaxDocs() const {
+ invariant(_isCapped);
+ return _cappedMaxDocs;
+ }
+ bool cappedMaxSize() const {
+ invariant(_isCapped);
+ return _cappedMaxSize;
+ }
+
+private:
+ class InsertChange;
+ class RemoveChange;
+ class TruncateChange;
+
+ class Cursor;
+ class ReverseCursor;
+
+ StatusWith<RecordId> extractAndCheckLocForOplog(const char* data, int len) const;
+
+ RecordId allocateLoc();
+ bool cappedAndNeedDelete(OperationContext* txn) const;
+ void cappedDeleteAsNeeded(OperationContext* txn);
+
+ // TODO figure out a proper solution to metadata
+ const bool _isCapped;
+ const int64_t _cappedMaxSize;
+ const int64_t _cappedMaxDocs;
+ CappedDocumentDeleteCallback* _cappedDeleteCallback;
+
+ // This is the "persistent" data.
+ struct Data {
+ Data(bool isOplog) : dataSize(0), nextId(1), isOplog(isOplog) {}
+
+ int64_t dataSize;
+ Records records;
+ int64_t nextId;
+ const bool isOplog;
};
-} // namespace mongo
+ Data* const _data;
+};
+
+} // namespace mongo
diff --git a/src/mongo/db/storage/in_memory/in_memory_record_store_test.cpp b/src/mongo/db/storage/in_memory/in_memory_record_store_test.cpp
index 42138116da9..aedbb4484db 100644
--- a/src/mongo/db/storage/in_memory/in_memory_record_store_test.cpp
+++ b/src/mongo/db/storage/in_memory/in_memory_record_store_test.cpp
@@ -37,24 +37,22 @@
namespace mongo {
- class InMemoryHarnessHelper : public HarnessHelper {
- public:
- InMemoryHarnessHelper() {
- }
+class InMemoryHarnessHelper : public HarnessHelper {
+public:
+ InMemoryHarnessHelper() {}
- virtual RecordStore* newNonCappedRecordStore() {
- return new InMemoryRecordStore( "a.b", &data );
- }
-
- virtual RecoveryUnit* newRecoveryUnit() {
- return new InMemoryRecoveryUnit();
- }
-
- std::shared_ptr<void> data;
- };
+ virtual RecordStore* newNonCappedRecordStore() {
+ return new InMemoryRecordStore("a.b", &data);
+ }
- HarnessHelper* newHarnessHelper() {
- return new InMemoryHarnessHelper();
+ virtual RecoveryUnit* newRecoveryUnit() {
+ return new InMemoryRecoveryUnit();
}
+ std::shared_ptr<void> data;
+};
+
+HarnessHelper* newHarnessHelper() {
+ return new InMemoryHarnessHelper();
+}
}
diff --git a/src/mongo/db/storage/in_memory/in_memory_recovery_unit.cpp b/src/mongo/db/storage/in_memory/in_memory_recovery_unit.cpp
index 7ccf4574d47..80999a59305 100644
--- a/src/mongo/db/storage/in_memory/in_memory_recovery_unit.cpp
+++ b/src/mongo/db/storage/in_memory/in_memory_recovery_unit.cpp
@@ -37,30 +37,28 @@
namespace mongo {
- void InMemoryRecoveryUnit::commitUnitOfWork() {
- try {
- for (Changes::iterator it = _changes.begin(), end = _changes.end(); it != end; ++it) {
- (*it)->commit();
- }
- _changes.clear();
- }
- catch (...) {
- std::terminate();
+void InMemoryRecoveryUnit::commitUnitOfWork() {
+ try {
+ for (Changes::iterator it = _changes.begin(), end = _changes.end(); it != end; ++it) {
+ (*it)->commit();
}
+ _changes.clear();
+ } catch (...) {
+ std::terminate();
}
+}
- void InMemoryRecoveryUnit::abortUnitOfWork() {
- try {
- for (Changes::reverse_iterator it = _changes.rbegin(), end = _changes.rend();
- it != end; ++it) {
- ChangePtr change = *it;
- LOG(2) << "CUSTOM ROLLBACK " << demangleName(typeid(*change));
- change->rollback();
- }
- _changes.clear();
- }
- catch (...) {
- std::terminate();
+void InMemoryRecoveryUnit::abortUnitOfWork() {
+ try {
+ for (Changes::reverse_iterator it = _changes.rbegin(), end = _changes.rend(); it != end;
+ ++it) {
+ ChangePtr change = *it;
+ LOG(2) << "CUSTOM ROLLBACK " << demangleName(typeid(*change));
+ change->rollback();
}
+ _changes.clear();
+ } catch (...) {
+ std::terminate();
}
}
+}
diff --git a/src/mongo/db/storage/in_memory/in_memory_recovery_unit.h b/src/mongo/db/storage/in_memory/in_memory_recovery_unit.h
index 2ef0552d58e..895e364ef04 100644
--- a/src/mongo/db/storage/in_memory/in_memory_recovery_unit.h
+++ b/src/mongo/db/storage/in_memory/in_memory_recovery_unit.h
@@ -37,37 +37,39 @@
namespace mongo {
- class SortedDataInterface;
+class SortedDataInterface;
- class InMemoryRecoveryUnit : public RecoveryUnit {
- public:
- void beginUnitOfWork(OperationContext* opCtx) final { };
- void commitUnitOfWork() final;
- void abortUnitOfWork() final;
+class InMemoryRecoveryUnit : public RecoveryUnit {
+public:
+ void beginUnitOfWork(OperationContext* opCtx) final{};
+ void commitUnitOfWork() final;
+ void abortUnitOfWork() final;
- virtual bool waitUntilDurable() {
- return true;
- }
+ virtual bool waitUntilDurable() {
+ return true;
+ }
- virtual void abandonSnapshot() {}
+ virtual void abandonSnapshot() {}
- virtual void registerChange(Change* change) {
- _changes.push_back(ChangePtr(change));
- }
+ virtual void registerChange(Change* change) {
+ _changes.push_back(ChangePtr(change));
+ }
- virtual void* writingPtr(void* data, size_t len) {
- invariant(!"don't call writingPtr");
- }
+ virtual void* writingPtr(void* data, size_t len) {
+ invariant(!"don't call writingPtr");
+ }
- virtual void setRollbackWritesDisabled() {}
+ virtual void setRollbackWritesDisabled() {}
- virtual SnapshotId getSnapshotId() const { return SnapshotId(); }
+ virtual SnapshotId getSnapshotId() const {
+ return SnapshotId();
+ }
- private:
- typedef std::shared_ptr<Change> ChangePtr;
- typedef std::vector<ChangePtr> Changes;
+private:
+ typedef std::shared_ptr<Change> ChangePtr;
+ typedef std::vector<ChangePtr> Changes;
- Changes _changes;
- };
+ Changes _changes;
+};
-} // namespace mongo
+} // namespace mongo
diff --git a/src/mongo/db/storage/index_entry_comparison.cpp b/src/mongo/db/storage/index_entry_comparison.cpp
index 4a5d4fdab1a..41a1ae709e9 100644
--- a/src/mongo/db/storage/index_entry_comparison.cpp
+++ b/src/mongo/db/storage/index_entry_comparison.cpp
@@ -34,138 +34,134 @@
namespace mongo {
- std::ostream& operator<<(std::ostream& stream, const IndexKeyEntry& entry) {
- return stream << entry.key << '@' << entry.loc;
- }
-
- // Due to the limitations of various APIs, we need to use the same type (IndexKeyEntry)
- // for both the stored data and the "query". We cheat and encode extra information in the
- // first byte of the field names in the query. This works because all stored objects should
- // have all field names empty, so their first bytes are '\0'.
- enum BehaviorIfFieldIsEqual {
- normal = '\0',
- less = 'l',
- greater = 'g',
- };
-
- bool IndexEntryComparison::operator() (const IndexKeyEntry& lhs, const IndexKeyEntry& rhs)
- const {
- // implementing in memcmp style to ease reuse of this code.
- return compare(lhs, rhs) < 0;
- }
-
- // This should behave the same as customBSONCmp from btree_logic.cpp.
- //
- // Reading the comment in the .h file is highly recommended if you need to understand what this
- // function is doing
- int IndexEntryComparison::compare(const IndexKeyEntry& lhs, const IndexKeyEntry& rhs) const {
- BSONObjIterator lhsIt(lhs.key);
- BSONObjIterator rhsIt(rhs.key);
-
- // Iterate through both BSONObjects, comparing individual elements one by one
- for (unsigned mask = 1; lhsIt.more(); mask <<= 1) {
- if (!rhsIt.more())
- return _order.descending(mask) ? -1 : 1;
-
- const BSONElement l = lhsIt.next();
- const BSONElement r = rhsIt.next();
-
- if (int cmp = l.woCompare(r, /*compareFieldNames=*/false)) {
- if (cmp == std::numeric_limits<int>::min()) {
- // can't be negated
- cmp = -1;
- }
-
- return _order.descending(mask) ? -cmp : cmp;
- }
-
- // Here is where the weirdness begins. We sometimes want to fudge the comparison
- // when a key == the query to implement exclusive ranges.
- BehaviorIfFieldIsEqual lEqBehavior = BehaviorIfFieldIsEqual(l.fieldName()[0]);
- BehaviorIfFieldIsEqual rEqBehavior = BehaviorIfFieldIsEqual(r.fieldName()[0]);
-
- if (lEqBehavior) {
- // lhs is the query, rhs is the stored data
- invariant(rEqBehavior == normal);
- return lEqBehavior == less ? -1 : 1;
- }
-
- if (rEqBehavior) {
- // rhs is the query, lhs is the stored data, so reverse the returns
- invariant(lEqBehavior == normal);
- return rEqBehavior == less ? 1 : -1;
+std::ostream& operator<<(std::ostream& stream, const IndexKeyEntry& entry) {
+ return stream << entry.key << '@' << entry.loc;
+}
+
+// Due to the limitations of various APIs, we need to use the same type (IndexKeyEntry)
+// for both the stored data and the "query". We cheat and encode extra information in the
+// first byte of the field names in the query. This works because all stored objects should
+// have all field names empty, so their first bytes are '\0'.
+enum BehaviorIfFieldIsEqual {
+ normal = '\0',
+ less = 'l',
+ greater = 'g',
+};
+
+bool IndexEntryComparison::operator()(const IndexKeyEntry& lhs, const IndexKeyEntry& rhs) const {
+ // implementing in memcmp style to ease reuse of this code.
+ return compare(lhs, rhs) < 0;
+}
+
+// This should behave the same as customBSONCmp from btree_logic.cpp.
+//
+// Reading the comment in the .h file is highly recommended if you need to understand what this
+// function is doing
+int IndexEntryComparison::compare(const IndexKeyEntry& lhs, const IndexKeyEntry& rhs) const {
+ BSONObjIterator lhsIt(lhs.key);
+ BSONObjIterator rhsIt(rhs.key);
+
+ // Iterate through both BSONObjects, comparing individual elements one by one
+ for (unsigned mask = 1; lhsIt.more(); mask <<= 1) {
+ if (!rhsIt.more())
+ return _order.descending(mask) ? -1 : 1;
+
+ const BSONElement l = lhsIt.next();
+ const BSONElement r = rhsIt.next();
+
+ if (int cmp = l.woCompare(r, /*compareFieldNames=*/false)) {
+ if (cmp == std::numeric_limits<int>::min()) {
+ // can't be negated
+ cmp = -1;
}
+ return _order.descending(mask) ? -cmp : cmp;
}
- if(rhsIt.more())
- return -1;
-
- // This means just look at the key, not the loc.
- if (lhs.loc.isNull() || rhs.loc.isNull())
- return 0;
-
- return lhs.loc.compare(rhs.loc); // is supposed to ignore ordering
- }
+ // Here is where the weirdness begins. We sometimes want to fudge the comparison
+ // when a key == the query to implement exclusive ranges.
+ BehaviorIfFieldIsEqual lEqBehavior = BehaviorIfFieldIsEqual(l.fieldName()[0]);
+ BehaviorIfFieldIsEqual rEqBehavior = BehaviorIfFieldIsEqual(r.fieldName()[0]);
- // reading the comment in the .h file is highly recommended if you need to understand what this
- // function is doing
- BSONObj IndexEntryComparison::makeQueryObject(const BSONObj& keyPrefix,
- int prefixLen,
- bool prefixExclusive,
- const std::vector<const BSONElement*>& keySuffix,
- const std::vector<bool>& suffixInclusive,
- const int cursorDirection) {
-
- // Please read the comments in the header file to see why this is done.
- // The basic idea is that we use the field name to store a byte which indicates whether
- // each field in the query object is inclusive and exclusive, and if it is exclusive, in
- // which direction.
- const char exclusiveByte = (cursorDirection == 1 ? greater : less);
-
- const StringData exclusiveFieldName(&exclusiveByte, 1);
-
- BSONObjBuilder bb;
-
- // handle the prefix
- if (prefixLen > 0) {
- BSONObjIterator it(keyPrefix);
- for (int i = 0; i < prefixLen; i++) {
- invariant(it.more());
- const BSONElement e = it.next();
-
- if (prefixExclusive && i == prefixLen - 1) {
- bb.appendAs(e, exclusiveFieldName);
- }
- else {
- bb.appendAs(e, StringData());
- }
- }
+ if (lEqBehavior) {
+ // lhs is the query, rhs is the stored data
+ invariant(rEqBehavior == normal);
+ return lEqBehavior == less ? -1 : 1;
}
- // If the prefix is exclusive then the suffix does not matter as it will never be used
- if (prefixExclusive) {
- invariant(prefixLen > 0);
- return bb.obj();
+ if (rEqBehavior) {
+ // rhs is the query, lhs is the stored data, so reverse the returns
+ invariant(lEqBehavior == normal);
+ return rEqBehavior == less ? 1 : -1;
}
+ }
- // Handle the suffix. Note that the useful parts of the suffix start at index prefixLen
- // rather than at 0.
- invariant(keySuffix.size() == suffixInclusive.size());
- for (size_t i = prefixLen; i < keySuffix.size(); i++) {
- invariant(keySuffix[i]);
- if (suffixInclusive[i]) {
- bb.appendAs(*keySuffix[i], StringData());
+ if (rhsIt.more())
+ return -1;
+
+ // This means just look at the key, not the loc.
+ if (lhs.loc.isNull() || rhs.loc.isNull())
+ return 0;
+
+ return lhs.loc.compare(rhs.loc); // is supposed to ignore ordering
+}
+
+// reading the comment in the .h file is highly recommended if you need to understand what this
+// function is doing
+BSONObj IndexEntryComparison::makeQueryObject(const BSONObj& keyPrefix,
+ int prefixLen,
+ bool prefixExclusive,
+ const std::vector<const BSONElement*>& keySuffix,
+ const std::vector<bool>& suffixInclusive,
+ const int cursorDirection) {
+ // Please read the comments in the header file to see why this is done.
+ // The basic idea is that we use the field name to store a byte which indicates whether
+ // each field in the query object is inclusive and exclusive, and if it is exclusive, in
+ // which direction.
+ const char exclusiveByte = (cursorDirection == 1 ? greater : less);
+
+ const StringData exclusiveFieldName(&exclusiveByte, 1);
+
+ BSONObjBuilder bb;
+
+ // handle the prefix
+ if (prefixLen > 0) {
+ BSONObjIterator it(keyPrefix);
+ for (int i = 0; i < prefixLen; i++) {
+ invariant(it.more());
+ const BSONElement e = it.next();
+
+ if (prefixExclusive && i == prefixLen - 1) {
+ bb.appendAs(e, exclusiveFieldName);
} else {
- bb.appendAs(*keySuffix[i], exclusiveFieldName);
-
- // If an exclusive field exists then no fields after this will matter, since an
- // exclusive field never evaluates as equal
- return bb.obj();
+ bb.appendAs(e, StringData());
}
}
+ }
+ // If the prefix is exclusive then the suffix does not matter as it will never be used
+ if (prefixExclusive) {
+ invariant(prefixLen > 0);
return bb.obj();
}
-} // namespace mongo
+ // Handle the suffix. Note that the useful parts of the suffix start at index prefixLen
+ // rather than at 0.
+ invariant(keySuffix.size() == suffixInclusive.size());
+ for (size_t i = prefixLen; i < keySuffix.size(); i++) {
+ invariant(keySuffix[i]);
+ if (suffixInclusive[i]) {
+ bb.appendAs(*keySuffix[i], StringData());
+ } else {
+ bb.appendAs(*keySuffix[i], exclusiveFieldName);
+
+ // If an exclusive field exists then no fields after this will matter, since an
+ // exclusive field never evaluates as equal
+ return bb.obj();
+ }
+ }
+
+ return bb.obj();
+}
+
+} // namespace mongo
diff --git a/src/mongo/db/storage/index_entry_comparison.h b/src/mongo/db/storage/index_entry_comparison.h
index dbdd15e0368..906192bb954 100644
--- a/src/mongo/db/storage/index_entry_comparison.h
+++ b/src/mongo/db/storage/index_entry_comparison.h
@@ -37,170 +37,170 @@
namespace mongo {
+/**
+ * Represents a single item in an index. An index item simply consists of a key
+ * and a disk location.
+ */
+struct IndexKeyEntry {
+ IndexKeyEntry(BSONObj key, RecordId loc) : key(std::move(key)), loc(std::move(loc)) {}
+
+ BSONObj key;
+ RecordId loc;
+};
+
+std::ostream& operator<<(std::ostream& stream, const IndexKeyEntry& entry);
+
+inline bool operator==(const IndexKeyEntry& lhs, const IndexKeyEntry& rhs) {
+ return std::tie(lhs.key, lhs.loc) == std::tie(rhs.key, rhs.loc);
+}
+
+inline bool operator!=(const IndexKeyEntry& lhs, const IndexKeyEntry& rhs) {
+ return std::tie(lhs.key, lhs.loc) != std::tie(rhs.key, rhs.loc);
+}
+
+/**
+ * Describes a query that can be compared against an IndexKeyEntry in a way that allows
+ * expressing exclusiveness on a prefix of the key. This is mostly used to express a location to
+ * seek to in an index that may not be representable as a valid key.
+ *
+ * The "key" used for comparison is the concatenation of the first 'prefixLen' elements of
+ * 'keyPrefix' followed by the last 'keySuffix.size() - prefixLen' elements of
+ * 'keySuffix'.
+ *
+ * The comparison is exclusive if either 'prefixExclusive' is true or if there are any false
+ * values in 'suffixInclusive' that are false at index >= 'prefixLen'.
+ *
+ * Portions of the key following the first exclusive part may be ignored.
+ *
+ * e.g.
+ *
+ * Suppose that
+ *
+ * keyPrefix = { "" : 1, "" : 2 }
+ * prefixLen = 1
+ * prefixExclusive = false
+ * keySuffix = [ IGNORED, { "" : 5 } ]
+ * suffixInclusive = [ IGNORED, false ]
+ *
+ * ==> key is { "" : 1, "" : 5 }
+ * with the comparison being done exclusively
+ *
+ * Suppose that
+ *
+ * keyPrefix = { "" : 1, "" : 2 }
+ * prefixLen = 1
+ * prefixExclusive = true
+ * keySuffix = IGNORED
+ * suffixInclusive = IGNORED
+ *
+ * ==> represented key is { "" : 1 }
+ * with the comparison being done exclusively
+ *
+ * 'prefixLen = 0' and 'prefixExclusive = true' are mutually incompatible.
+ *
+ * @see IndexEntryComparison::makeQueryObject
+ */
+struct IndexSeekPoint {
+ BSONObj keyPrefix;
+
+ /**
+ * Use this many fields in 'keyPrefix'.
+ */
+ int prefixLen = 0;
+
/**
- * Represents a single item in an index. An index item simply consists of a key
- * and a disk location.
+ * If true, compare exclusively on just the fields on keyPrefix and ignore the suffix.
*/
- struct IndexKeyEntry {
- IndexKeyEntry(BSONObj key, RecordId loc) :key(std::move(key)), loc(std::move(loc)) {}
+ bool prefixExclusive = false;
- BSONObj key;
- RecordId loc;
- };
+ /**
+ * Elements starting at index 'prefixLen' are logically appended to the prefix.
+ * The elements before index 'prefixLen' should be ignored.
+ */
+ std::vector<const BSONElement*> keySuffix;
- std::ostream& operator<<(std::ostream& stream, const IndexKeyEntry& entry);
+ /**
+ * If the ith element is false, ignore indexes > i in keySuffix and treat the
+ * concatenated key as exclusive.
+ * The elements before index 'prefixLen' should be ignored.
+ *
+ * Must have identical size as keySuffix.
+ */
+ std::vector<bool> suffixInclusive;
+};
- inline bool operator==(const IndexKeyEntry& lhs, const IndexKeyEntry& rhs) {
- return std::tie(lhs.key, lhs.loc) == std::tie(rhs.key, rhs.loc);
- }
+/**
+ * Compares two different IndexKeyEntry instances.
+ * The existence of compound indexes necessitates some complicated logic. This is meant to
+ * support the comparisons of IndexKeyEntries (that are stored in an index) with IndexSeekPoints
+ * (that were encoded with makeQueryObject) to support fine-grained control over whether the
+ * ranges of various keys comprising a compound index are inclusive or exclusive.
+ */
+class IndexEntryComparison {
+public:
+ IndexEntryComparison(Ordering order) : _order(order) {}
- inline bool operator!=(const IndexKeyEntry& lhs, const IndexKeyEntry& rhs) {
- return std::tie(lhs.key, lhs.loc) != std::tie(rhs.key, rhs.loc);
- }
+ bool operator()(const IndexKeyEntry& lhs, const IndexKeyEntry& rhs) const;
/**
- * Describes a query that can be compared against an IndexKeyEntry in a way that allows
- * expressing exclusiveness on a prefix of the key. This is mostly used to express a location to
- * seek to in an index that may not be representable as a valid key.
- *
- * The "key" used for comparison is the concatenation of the first 'prefixLen' elements of
- * 'keyPrefix' followed by the last 'keySuffix.size() - prefixLen' elements of
- * 'keySuffix'.
- *
- * The comparison is exclusive if either 'prefixExclusive' is true or if there are any false
- * values in 'suffixInclusive' that are false at index >= 'prefixLen'.
+ * Compares two IndexKeyEntries and returns -1 if lhs < rhs, 1 if lhs > rhs, and 0
+ * otherwise.
*
- * Portions of the key following the first exclusive part may be ignored.
- *
- * e.g.
- *
- * Suppose that
- *
- * keyPrefix = { "" : 1, "" : 2 }
- * prefixLen = 1
- * prefixExclusive = false
- * keySuffix = [ IGNORED, { "" : 5 } ]
- * suffixInclusive = [ IGNORED, false ]
- *
- * ==> key is { "" : 1, "" : 5 }
- * with the comparison being done exclusively
+ * IndexKeyEntries are compared lexicographically field by field in the BSONObj, followed by
+ * the RecordId. Either lhs or rhs (but not both) can be a query object returned by
+ * makeQueryObject(). See makeQueryObject() for a description of how its arguments affect
+ * the outcome of the comparison.
+ */
+ int compare(const IndexKeyEntry& lhs, const IndexKeyEntry& rhs) const;
+
+ /**
+ * Encodes the arguments into a query object suitable to pass in to compare().
*
- * Suppose that
+ * A query object is used for seeking an iterator to a position in a sorted index. The
+ * difference between a query object and the keys inserted into indexes is that query
+ * objects can be exclusive. This means that the first matching entry in the index is the
+ * first key in the index after the query. The meaning of "after" depends on
+ * cursorDirection.
*
- * keyPrefix = { "" : 1, "" : 2 }
- * prefixLen = 1
- * prefixExclusive = true
- * keySuffix = IGNORED
- * suffixInclusive = IGNORED
+ * The fields of the key are the combination of keyPrefix and keySuffix. The first prefixLen
+ * keys of keyPrefix are used, as well as the keys starting at the prefixLen index of
+ * keySuffix. The first prefixLen elements of keySuffix are ignored.
*
- * ==> represented key is { "" : 1 }
- * with the comparison being done exclusively
+ * If a field is marked as exclusive, then comparisons stop after that field and return
+ * either higher or lower, even if that field compares equal. If prefixExclusive is true and
+ * prefixLen is greater than 0, then the last field in the prefix is marked as exclusive. It
+ * is illegal to specify prefixExclusive as true with a prefixLen of 0. Each bool in
+ * suffixInclusive, starting at index prefixLen, indicates whether the corresponding element
+ * in keySuffix is inclusive or exclusive.
*
- * 'prefixLen = 0' and 'prefixExclusive = true' are mutually incompatible.
+ * Returned objects are for use in lookups only and should never be inserted into the
+ * database, as their format may change. The only reason this is the same type as the
+ * entries in an index is to support storage engines that require comparators that take
+ * arguments of the same type.
*
- * @see IndexEntryComparison::makeQueryObject
+ * A cursurDirection of 1 indicates a forward cursor, and -1 indicates a reverse cursor.
+ * This effects the result when the exclusive field compares equal.
*/
- struct IndexSeekPoint {
- BSONObj keyPrefix;
-
- /**
- * Use this many fields in 'keyPrefix'.
- */
- int prefixLen = 0;
-
- /**
- * If true, compare exclusively on just the fields on keyPrefix and ignore the suffix.
- */
- bool prefixExclusive = false;
-
- /**
- * Elements starting at index 'prefixLen' are logically appended to the prefix.
- * The elements before index 'prefixLen' should be ignored.
- */
- std::vector<const BSONElement*> keySuffix;
-
- /**
- * If the ith element is false, ignore indexes > i in keySuffix and treat the
- * concatenated key as exclusive.
- * The elements before index 'prefixLen' should be ignored.
- *
- * Must have identical size as keySuffix.
- */
- std::vector<bool> suffixInclusive;
- };
+ static BSONObj makeQueryObject(const BSONObj& keyPrefix,
+ int prefixLen,
+ bool prefixExclusive,
+ const std::vector<const BSONElement*>& keySuffix,
+ const std::vector<bool>& suffixInclusive,
+ const int cursorDirection);
+
+ static BSONObj makeQueryObject(const IndexSeekPoint& seekPoint, bool isForward) {
+ return makeQueryObject(seekPoint.keyPrefix,
+ seekPoint.prefixLen,
+ seekPoint.prefixExclusive,
+ seekPoint.keySuffix,
+ seekPoint.suffixInclusive,
+ isForward ? 1 : -1);
+ }
- /**
- * Compares two different IndexKeyEntry instances.
- * The existence of compound indexes necessitates some complicated logic. This is meant to
- * support the comparisons of IndexKeyEntries (that are stored in an index) with IndexSeekPoints
- * (that were encoded with makeQueryObject) to support fine-grained control over whether the
- * ranges of various keys comprising a compound index are inclusive or exclusive.
- */
- class IndexEntryComparison {
- public:
- IndexEntryComparison(Ordering order) : _order(order) {}
-
- bool operator() (const IndexKeyEntry& lhs, const IndexKeyEntry& rhs) const;
-
- /**
- * Compares two IndexKeyEntries and returns -1 if lhs < rhs, 1 if lhs > rhs, and 0
- * otherwise.
- *
- * IndexKeyEntries are compared lexicographically field by field in the BSONObj, followed by
- * the RecordId. Either lhs or rhs (but not both) can be a query object returned by
- * makeQueryObject(). See makeQueryObject() for a description of how its arguments affect
- * the outcome of the comparison.
- */
- int compare(const IndexKeyEntry& lhs, const IndexKeyEntry& rhs) const;
-
- /**
- * Encodes the arguments into a query object suitable to pass in to compare().
- *
- * A query object is used for seeking an iterator to a position in a sorted index. The
- * difference between a query object and the keys inserted into indexes is that query
- * objects can be exclusive. This means that the first matching entry in the index is the
- * first key in the index after the query. The meaning of "after" depends on
- * cursorDirection.
- *
- * The fields of the key are the combination of keyPrefix and keySuffix. The first prefixLen
- * keys of keyPrefix are used, as well as the keys starting at the prefixLen index of
- * keySuffix. The first prefixLen elements of keySuffix are ignored.
- *
- * If a field is marked as exclusive, then comparisons stop after that field and return
- * either higher or lower, even if that field compares equal. If prefixExclusive is true and
- * prefixLen is greater than 0, then the last field in the prefix is marked as exclusive. It
- * is illegal to specify prefixExclusive as true with a prefixLen of 0. Each bool in
- * suffixInclusive, starting at index prefixLen, indicates whether the corresponding element
- * in keySuffix is inclusive or exclusive.
- *
- * Returned objects are for use in lookups only and should never be inserted into the
- * database, as their format may change. The only reason this is the same type as the
- * entries in an index is to support storage engines that require comparators that take
- * arguments of the same type.
- *
- * A cursurDirection of 1 indicates a forward cursor, and -1 indicates a reverse cursor.
- * This effects the result when the exclusive field compares equal.
- */
- static BSONObj makeQueryObject(const BSONObj& keyPrefix,
- int prefixLen,
- bool prefixExclusive,
- const std::vector<const BSONElement*>& keySuffix,
- const std::vector<bool>& suffixInclusive,
- const int cursorDirection);
-
- static BSONObj makeQueryObject(const IndexSeekPoint& seekPoint, bool isForward) {
- return makeQueryObject(seekPoint.keyPrefix,
- seekPoint.prefixLen,
- seekPoint.prefixExclusive,
- seekPoint.keySuffix,
- seekPoint.suffixInclusive,
- isForward ? 1 : -1);
- }
-
- private:
- // Ordering is used in comparison() to compare BSONElements
- const Ordering _order;
-
- }; // struct IndexEntryComparison
-
-} // namespace mongo
+private:
+ // Ordering is used in comparison() to compare BSONElements
+ const Ordering _order;
+
+}; // struct IndexEntryComparison
+
+} // namespace mongo
diff --git a/src/mongo/db/storage/key_string.cpp b/src/mongo/db/storage/key_string.cpp
index 526460c1d68..3d43d93e4f2 100644
--- a/src/mongo/db/storage/key_string.cpp
+++ b/src/mongo/db/storage/key_string.cpp
@@ -43,505 +43,517 @@
namespace mongo {
- using std::string;
-
- namespace {
- typedef KeyString::TypeBits TypeBits;
-
- namespace CType {
- // canonical types namespace. (would be enum class CType: uint8_t in C++11)
- // Note 0-9 and 246-255 are disallowed and reserved for value encodings.
- // For types that encode value information in the ctype byte, the value in this list is
- // the "generic" one to be used to represent all values of that ctype, such as in the
- // encoding of fields in Objects.
- const uint8_t kMinKey = 10;
- const uint8_t kUndefined = 15;
- const uint8_t kNullish = 20;
- const uint8_t kNumeric = 30;
- const uint8_t kStringLike = 60;
- const uint8_t kObject = 70;
- const uint8_t kArray = 80;
- const uint8_t kBinData = 90;
- const uint8_t kOID = 100;
- const uint8_t kBool = 110;
- const uint8_t kDate = 120;
- const uint8_t kTimestamp = 130;
- const uint8_t kRegEx = 140;
- const uint8_t kDBRef = 150;
- const uint8_t kCode = 160;
- const uint8_t kCodeWithScope = 170;
- const uint8_t kMaxKey = 240;
-
- // These are ordered by the numeric value of the values encoded in each format.
- // Therefore each format can be considered independently without considering
- // cross-format comparisons.
- const uint8_t kNumericNaN = kNumeric + 0;
- const uint8_t kNumericNegativeLargeDouble = kNumeric + 1; // <= -2**63 including -Inf
- const uint8_t kNumericNegative8ByteInt = kNumeric + 2;
- const uint8_t kNumericNegative7ByteInt = kNumeric + 3;
- const uint8_t kNumericNegative6ByteInt = kNumeric + 4;
- const uint8_t kNumericNegative5ByteInt = kNumeric + 5;
- const uint8_t kNumericNegative4ByteInt = kNumeric + 6;
- const uint8_t kNumericNegative3ByteInt = kNumeric + 7;
- const uint8_t kNumericNegative2ByteInt = kNumeric + 8;
- const uint8_t kNumericNegative1ByteInt = kNumeric + 9;
- const uint8_t kNumericNegativeSmallDouble = kNumeric + 10; // between 0 and -1 exclusive
- const uint8_t kNumericZero = kNumeric + 11;
- const uint8_t kNumericPositiveSmallDouble = kNumeric + 12; // between 0 and 1 exclusive
- const uint8_t kNumericPositive1ByteInt = kNumeric + 13;
- const uint8_t kNumericPositive2ByteInt = kNumeric + 14;
- const uint8_t kNumericPositive3ByteInt = kNumeric + 15;
- const uint8_t kNumericPositive4ByteInt = kNumeric + 16;
- const uint8_t kNumericPositive5ByteInt = kNumeric + 17;
- const uint8_t kNumericPositive6ByteInt = kNumeric + 18;
- const uint8_t kNumericPositive7ByteInt = kNumeric + 19;
- const uint8_t kNumericPositive8ByteInt = kNumeric + 20;
- const uint8_t kNumericPositiveLargeDouble = kNumeric + 21; // >= 2**63 including +Inf
- BOOST_STATIC_ASSERT(kNumericPositiveLargeDouble < kStringLike);
-
- const uint8_t kBoolFalse = kBool + 0;
- const uint8_t kBoolTrue = kBool + 1;
- BOOST_STATIC_ASSERT(kBoolTrue < kDate);
-
- size_t numBytesForInt(uint8_t ctype) {
- if (ctype >= kNumericPositive1ByteInt) {
- dassert(ctype <= kNumericPositive8ByteInt);
- return ctype - kNumericPositive1ByteInt + 1;
- }
-
- dassert(ctype <= kNumericNegative1ByteInt);
- dassert(ctype >= kNumericNegative8ByteInt);
- return kNumericNegative1ByteInt - ctype + 1;
- }
- } // namespace CType
-
- uint8_t bsonTypeToGenericKeyStringType(BSONType type) {
- switch (type) {
- case MinKey:
- return CType::kMinKey;
-
- case EOO:
- case jstNULL:
- return CType::kNullish;
-
- case Undefined:
- return CType::kUndefined;
-
- case NumberDouble:
- case NumberInt:
- case NumberLong:
- return CType::kNumeric;
-
- case mongo::String:
- case Symbol:
- return CType::kStringLike;
-
- case Object: return CType::kObject;
- case Array: return CType::kArray;
- case BinData: return CType::kBinData;
- case jstOID: return CType::kOID;
- case Bool: return CType::kBool;
- case Date: return CType::kDate;
- case bsonTimestamp: return CType::kTimestamp;
- case RegEx: return CType::kRegEx;
- case DBRef: return CType::kDBRef;
-
- case Code: return CType::kCode;
- case CodeWScope: return CType::kCodeWithScope;
-
- case MaxKey: return CType::kMaxKey;
- default:
- invariant(false);
- }
- }
-
- // First double that isn't an int64.
- const double kMinLargeDouble = 9223372036854775808.0; // 1ULL<<63
-
- const uint8_t kEnd = 0x4;
-
- // These overlay with CType or kEnd bytes and therefor must be less/greater than all of
- // them (and their inverses). They also can't equal 0 or 255 since that would collide with
- // the encoding of NUL bytes in strings as "\x00\xff".
- const uint8_t kLess = 1;
- const uint8_t kGreater = 254;
- } // namespace
-
- // some utility functions
- namespace {
- void memcpy_flipBits(void* dst, const void* src, size_t bytes) {
- const char* input = static_cast<const char*>(src);
- char* output = static_cast<char*>(dst);
- const char* const end = input + bytes;
- while (input != end) {
- *output++ = ~(*input++);
- }
- }
-
- template <typename T> T readType(BufReader* reader, bool inverted) {
- // TODO for C++11 to static_assert that T is integral
- T t = ConstDataView(static_cast<const char*>(reader->skip(sizeof(T)))).read<T>();
- if (inverted)
- return ~t;
- return t;
- }
-
- StringData readCString(BufReader* reader) {
- const char* start = static_cast<const char*>(reader->pos());
- const char* end = static_cast<const char*>(memchr(start, 0x0, reader->remaining()));
- invariant(end);
- size_t actualBytes = end - start;
- reader->skip(1 + actualBytes);
- return StringData(start, actualBytes);
- }
-
- /**
- * scratch must be empty when passed in. It will be used if there is a NUL byte in the
- * output string. In that case the returned StringData will point into scratch, otherwise
- * it will point directly into the input buffer.
- */
- StringData readCStringWithNuls(BufReader* reader, std::string* scratch) {
- const StringData initial = readCString(reader);
- if (reader->peek<unsigned char>() != 0xFF)
- return initial; // Don't alloc or copy for simple case with no NUL bytes.
-
- scratch->append(initial.rawData(), initial.size());
- while (reader->peek<unsigned char>() == 0xFF) {
- // Each time we enter this loop it means we hit a NUL byte encoded as "\x00\xFF".
- *scratch += '\0';
- reader->skip(1);
-
- const StringData nextPart = readCString(reader);
- scratch->append(nextPart.rawData(), nextPart.size());
- }
-
- return *scratch;
- }
-
- string readInvertedCString(BufReader* reader) {
- const char* start = static_cast<const char*>(reader->pos());
- const char* end = static_cast<const char*>(memchr(start, 0xFF, reader->remaining()));
- invariant(end);
- size_t actualBytes = end - start;
- string s(start, actualBytes);
- for (size_t i = 0; i < s.size(); i++) {
- s[i] = ~s[i];
- }
- reader->skip(1 + actualBytes);
- return s;
- }
-
- string readInvertedCStringWithNuls(BufReader* reader) {
- std::string out;
- do {
- if (!out.empty()) {
- // If this isn't our first pass through the loop it means we hit an NUL byte
- // encoded as "\xFF\00" in our inverted string.
- reader->skip(1);
- out += '\xFF'; // will be flipped to '\0' with rest of out before returning.
- }
+using std::string;
+
+namespace {
+typedef KeyString::TypeBits TypeBits;
+
+namespace CType {
+// canonical types namespace. (would be enum class CType: uint8_t in C++11)
+// Note 0-9 and 246-255 are disallowed and reserved for value encodings.
+// For types that encode value information in the ctype byte, the value in this list is
+// the "generic" one to be used to represent all values of that ctype, such as in the
+// encoding of fields in Objects.
+const uint8_t kMinKey = 10;
+const uint8_t kUndefined = 15;
+const uint8_t kNullish = 20;
+const uint8_t kNumeric = 30;
+const uint8_t kStringLike = 60;
+const uint8_t kObject = 70;
+const uint8_t kArray = 80;
+const uint8_t kBinData = 90;
+const uint8_t kOID = 100;
+const uint8_t kBool = 110;
+const uint8_t kDate = 120;
+const uint8_t kTimestamp = 130;
+const uint8_t kRegEx = 140;
+const uint8_t kDBRef = 150;
+const uint8_t kCode = 160;
+const uint8_t kCodeWithScope = 170;
+const uint8_t kMaxKey = 240;
+
+// These are ordered by the numeric value of the values encoded in each format.
+// Therefore each format can be considered independently without considering
+// cross-format comparisons.
+const uint8_t kNumericNaN = kNumeric + 0;
+const uint8_t kNumericNegativeLargeDouble = kNumeric + 1; // <= -2**63 including -Inf
+const uint8_t kNumericNegative8ByteInt = kNumeric + 2;
+const uint8_t kNumericNegative7ByteInt = kNumeric + 3;
+const uint8_t kNumericNegative6ByteInt = kNumeric + 4;
+const uint8_t kNumericNegative5ByteInt = kNumeric + 5;
+const uint8_t kNumericNegative4ByteInt = kNumeric + 6;
+const uint8_t kNumericNegative3ByteInt = kNumeric + 7;
+const uint8_t kNumericNegative2ByteInt = kNumeric + 8;
+const uint8_t kNumericNegative1ByteInt = kNumeric + 9;
+const uint8_t kNumericNegativeSmallDouble = kNumeric + 10; // between 0 and -1 exclusive
+const uint8_t kNumericZero = kNumeric + 11;
+const uint8_t kNumericPositiveSmallDouble = kNumeric + 12; // between 0 and 1 exclusive
+const uint8_t kNumericPositive1ByteInt = kNumeric + 13;
+const uint8_t kNumericPositive2ByteInt = kNumeric + 14;
+const uint8_t kNumericPositive3ByteInt = kNumeric + 15;
+const uint8_t kNumericPositive4ByteInt = kNumeric + 16;
+const uint8_t kNumericPositive5ByteInt = kNumeric + 17;
+const uint8_t kNumericPositive6ByteInt = kNumeric + 18;
+const uint8_t kNumericPositive7ByteInt = kNumeric + 19;
+const uint8_t kNumericPositive8ByteInt = kNumeric + 20;
+const uint8_t kNumericPositiveLargeDouble = kNumeric + 21; // >= 2**63 including +Inf
+BOOST_STATIC_ASSERT(kNumericPositiveLargeDouble < kStringLike);
+
+const uint8_t kBoolFalse = kBool + 0;
+const uint8_t kBoolTrue = kBool + 1;
+BOOST_STATIC_ASSERT(kBoolTrue < kDate);
+
+size_t numBytesForInt(uint8_t ctype) {
+ if (ctype >= kNumericPositive1ByteInt) {
+ dassert(ctype <= kNumericPositive8ByteInt);
+ return ctype - kNumericPositive1ByteInt + 1;
+ }
- const char* start = static_cast<const char*>(reader->pos());
- const char* end = static_cast<const char*>(
- memchr(start, 0xFF, reader->remaining()));
- invariant(end);
- size_t actualBytes = end - start;
+ dassert(ctype <= kNumericNegative1ByteInt);
+ dassert(ctype >= kNumericNegative8ByteInt);
+ return kNumericNegative1ByteInt - ctype + 1;
+}
+} // namespace CType
- out.append(start, actualBytes);
- reader->skip(1 + actualBytes);
- } while (reader->peek<unsigned char>() == 0x00);
+uint8_t bsonTypeToGenericKeyStringType(BSONType type) {
+ switch (type) {
+ case MinKey:
+ return CType::kMinKey;
- for (size_t i = 0; i < out.size(); i++) {
- out[i] = ~out[i];
- }
+ case EOO:
+ case jstNULL:
+ return CType::kNullish;
- return out;
- }
- } // namespace
+ case Undefined:
+ return CType::kUndefined;
+
+ case NumberDouble:
+ case NumberInt:
+ case NumberLong:
+ return CType::kNumeric;
+
+ case mongo::String:
+ case Symbol:
+ return CType::kStringLike;
+
+ case Object:
+ return CType::kObject;
+ case Array:
+ return CType::kArray;
+ case BinData:
+ return CType::kBinData;
+ case jstOID:
+ return CType::kOID;
+ case Bool:
+ return CType::kBool;
+ case Date:
+ return CType::kDate;
+ case bsonTimestamp:
+ return CType::kTimestamp;
+ case RegEx:
+ return CType::kRegEx;
+ case DBRef:
+ return CType::kDBRef;
+
+ case Code:
+ return CType::kCode;
+ case CodeWScope:
+ return CType::kCodeWithScope;
- void KeyString::resetToKey(const BSONObj& obj, Ordering ord, RecordId recordId) {
- resetToEmpty();
- _appendAllElementsForIndexing(obj, ord, kInclusive);
- appendRecordId(recordId);
+ case MaxKey:
+ return CType::kMaxKey;
+ default:
+ invariant(false);
}
-
- void KeyString::resetToKey(const BSONObj& obj, Ordering ord, Discriminator discriminator) {
- resetToEmpty();
- _appendAllElementsForIndexing(obj, ord, discriminator);
+}
+
+// First double that isn't an int64.
+const double kMinLargeDouble = 9223372036854775808.0; // 1ULL<<63
+
+const uint8_t kEnd = 0x4;
+
+// These overlay with CType or kEnd bytes and therefor must be less/greater than all of
+// them (and their inverses). They also can't equal 0 or 255 since that would collide with
+// the encoding of NUL bytes in strings as "\x00\xff".
+const uint8_t kLess = 1;
+const uint8_t kGreater = 254;
+} // namespace
+
+// some utility functions
+namespace {
+void memcpy_flipBits(void* dst, const void* src, size_t bytes) {
+ const char* input = static_cast<const char*>(src);
+ char* output = static_cast<char*>(dst);
+ const char* const end = input + bytes;
+ while (input != end) {
+ *output++ = ~(*input++);
}
+}
+
+template <typename T>
+T readType(BufReader* reader, bool inverted) {
+ // TODO for C++11 to static_assert that T is integral
+ T t = ConstDataView(static_cast<const char*>(reader->skip(sizeof(T)))).read<T>();
+ if (inverted)
+ return ~t;
+ return t;
+}
+
+StringData readCString(BufReader* reader) {
+ const char* start = static_cast<const char*>(reader->pos());
+ const char* end = static_cast<const char*>(memchr(start, 0x0, reader->remaining()));
+ invariant(end);
+ size_t actualBytes = end - start;
+ reader->skip(1 + actualBytes);
+ return StringData(start, actualBytes);
+}
- // ----------------------------------------------------------------------
- // ----------- APPEND CODE -------------------------------------------
- // ----------------------------------------------------------------------
-
- void KeyString::_appendAllElementsForIndexing(const BSONObj& obj, Ordering ord,
- Discriminator discriminator) {
- int elemCount = 0;
- BSONObjIterator it(obj);
- while (auto elem = it.next()) {
- const int elemIdx = elemCount++;
- const bool invert = (ord.get(elemIdx) == -1);
-
- _appendBsonValue(elem, invert, NULL);
-
- dassert(elem.fieldNameSize() < 3); // fieldNameSize includes the NUL
-
- // IndexEntryComparison::makeQueryObject() encodes a discriminator in the first byte of
- // the field name. This discriminator overrides the passed in one. Normal elements only
- // have the NUL byte terminator. Entries stored in an index are not allowed to have a
- // discriminator.
- if (char ch = *elem.fieldName()) {
- // l for less / g for greater.
- invariant(ch == 'l' || ch == 'g');
- discriminator = ch == 'l' ? kExclusiveBefore : kExclusiveAfter;
- invariant(!it.more());
- }
- }
-
- // The discriminator forces this KeyString to compare Less/Greater than any KeyString with
- // the same prefix of keys. As an example, this can be used to land on the first key in the
- // index with the value "a" regardless of the RecordId. In compound indexes it can use a
- // prefix of the full key to ignore the later keys.
- switch (discriminator) {
- case kExclusiveBefore: _append(kLess, false); break;
- case kExclusiveAfter: _append(kGreater, false); break;
- case kInclusive: break; // No discriminator byte.
- }
-
- // TODO consider omitting kEnd when using a discriminator byte. It is not a storage format
- // change since keystrings with discriminators are not allowed to be stored.
- _append(kEnd, false);
+/**
+ * scratch must be empty when passed in. It will be used if there is a NUL byte in the
+ * output string. In that case the returned StringData will point into scratch, otherwise
+ * it will point directly into the input buffer.
+ */
+StringData readCStringWithNuls(BufReader* reader, std::string* scratch) {
+ const StringData initial = readCString(reader);
+ if (reader->peek<unsigned char>() != 0xFF)
+ return initial; // Don't alloc or copy for simple case with no NUL bytes.
+
+ scratch->append(initial.rawData(), initial.size());
+ while (reader->peek<unsigned char>() == 0xFF) {
+ // Each time we enter this loop it means we hit a NUL byte encoded as "\x00\xFF".
+ *scratch += '\0';
+ reader->skip(1);
+
+ const StringData nextPart = readCString(reader);
+ scratch->append(nextPart.rawData(), nextPart.size());
}
- void KeyString::appendRecordId(RecordId loc) {
- // The RecordId encoding must be able to determine the full length starting from the last
- // byte, without knowing where the first byte is since it is stored at the end of a
- // KeyString, and we need to be able to read the RecordId without decoding the whole thing.
- //
- // This encoding places a number (N) between 0 and 7 in both the high 3 bits of the first
- // byte and the low 3 bits of the last byte. This is the number of bytes between the first
- // and last byte (ie total bytes is N + 2). The remaining bits of the first and last bytes
- // are combined with the bits of the in-between bytes to store the 64-bit RecordId in
- // big-endian order. This does not encode negative RecordIds to give maximum space to
- // positive RecordIds which are the only ones that are allowed to be stored in an index.
-
- int64_t raw = loc.repr();
- if (raw < 0) {
- // Note: we encode RecordId::min() and RecordId() the same which is ok, as they are
- // never stored so they will never be compared to each other.
- invariant(raw == RecordId::min().repr());
- raw = 0;
- }
- const uint64_t value = static_cast<uint64_t>(raw);
- const int bitsNeeded = 64 - countLeadingZeros64(raw);
- const int extraBytesNeeded = bitsNeeded <= 10
- ? 0
- : ((bitsNeeded - 10) + 7) / 8; // ceil((bitsNeeded - 10) / 8)
-
- // extraBytesNeeded must fit in 3 bits.
- dassert(extraBytesNeeded >= 0 && extraBytesNeeded < 8);
-
- // firstByte combines highest 5 bits of value with extraBytesNeeded.
- const uint8_t firstByte = uint8_t((extraBytesNeeded << 5)
- | (value >> (5 + (extraBytesNeeded * 8))));
- // lastByte combines lowest 5 bits of value with extraBytesNeeded.
- const uint8_t lastByte = uint8_t((value << 3) | extraBytesNeeded);
-
- // RecordIds are never appended inverted.
- _append(firstByte, false);
- if (extraBytesNeeded) {
- const uint64_t extraBytes = endian::nativeToBig(value >> 5);
- // Only using the low-order extraBytesNeeded bytes of extraBytes.
- _appendBytes(reinterpret_cast<const char*>(&extraBytes) + sizeof(extraBytes)
- - extraBytesNeeded,
- extraBytesNeeded,
- false);
- }
- _append(lastByte, false);
+ return *scratch;
+}
+
+string readInvertedCString(BufReader* reader) {
+ const char* start = static_cast<const char*>(reader->pos());
+ const char* end = static_cast<const char*>(memchr(start, 0xFF, reader->remaining()));
+ invariant(end);
+ size_t actualBytes = end - start;
+ string s(start, actualBytes);
+ for (size_t i = 0; i < s.size(); i++) {
+ s[i] = ~s[i];
}
-
- void KeyString::appendTypeBits(const TypeBits& typeBits) {
- // As an optimization, encode AllZeros as a single 0 byte.
- if (typeBits.isAllZeros()) {
- _append(uint8_t(0), false);
- return;
+ reader->skip(1 + actualBytes);
+ return s;
+}
+
+string readInvertedCStringWithNuls(BufReader* reader) {
+ std::string out;
+ do {
+ if (!out.empty()) {
+ // If this isn't our first pass through the loop it means we hit an NUL byte
+ // encoded as "\xFF\00" in our inverted string.
+ reader->skip(1);
+ out += '\xFF'; // will be flipped to '\0' with rest of out before returning.
}
- _appendBytes(typeBits.getBuffer(), typeBits.getSize(), false);
- }
+ const char* start = static_cast<const char*>(reader->pos());
+ const char* end = static_cast<const char*>(memchr(start, 0xFF, reader->remaining()));
+ invariant(end);
+ size_t actualBytes = end - start;
- void KeyString::_appendBool(bool val, bool invert) {
- _append(val ? CType::kBoolTrue : CType::kBoolFalse, invert);
- }
+ out.append(start, actualBytes);
+ reader->skip(1 + actualBytes);
+ } while (reader->peek<unsigned char>() == 0x00);
- void KeyString::_appendDate(Date_t val, bool invert) {
- _append(CType::kDate, invert);
- // see: http://en.wikipedia.org/wiki/Offset_binary
- uint64_t encoded = static_cast<uint64_t>(val.asInt64());
- encoded ^= (1LL << 63); // flip highest bit (equivalent to bias encoding)
- _append(endian::nativeToBig(encoded), invert);
+ for (size_t i = 0; i < out.size(); i++) {
+ out[i] = ~out[i];
}
- void KeyString::_appendTimestamp(Timestamp val, bool invert) {
- _append(CType::kTimestamp, invert);
- _append(endian::nativeToBig(val.asLL()), invert);
- }
-
- void KeyString::_appendOID(OID val, bool invert) {
- _append(CType::kOID, invert);
- _appendBytes(val.view().view(), OID::kOIDSize, invert);
+ return out;
+}
+} // namespace
+
+void KeyString::resetToKey(const BSONObj& obj, Ordering ord, RecordId recordId) {
+ resetToEmpty();
+ _appendAllElementsForIndexing(obj, ord, kInclusive);
+ appendRecordId(recordId);
+}
+
+void KeyString::resetToKey(const BSONObj& obj, Ordering ord, Discriminator discriminator) {
+ resetToEmpty();
+ _appendAllElementsForIndexing(obj, ord, discriminator);
+}
+
+// ----------------------------------------------------------------------
+// ----------- APPEND CODE -------------------------------------------
+// ----------------------------------------------------------------------
+
+void KeyString::_appendAllElementsForIndexing(const BSONObj& obj,
+ Ordering ord,
+ Discriminator discriminator) {
+ int elemCount = 0;
+ BSONObjIterator it(obj);
+ while (auto elem = it.next()) {
+ const int elemIdx = elemCount++;
+ const bool invert = (ord.get(elemIdx) == -1);
+
+ _appendBsonValue(elem, invert, NULL);
+
+ dassert(elem.fieldNameSize() < 3); // fieldNameSize includes the NUL
+
+ // IndexEntryComparison::makeQueryObject() encodes a discriminator in the first byte of
+ // the field name. This discriminator overrides the passed in one. Normal elements only
+ // have the NUL byte terminator. Entries stored in an index are not allowed to have a
+ // discriminator.
+ if (char ch = *elem.fieldName()) {
+ // l for less / g for greater.
+ invariant(ch == 'l' || ch == 'g');
+ discriminator = ch == 'l' ? kExclusiveBefore : kExclusiveAfter;
+ invariant(!it.more());
+ }
}
- void KeyString::_appendString(StringData val, bool invert) {
- _typeBits.appendString();
- _append(CType::kStringLike, invert);
- _appendStringLike(val, invert);
+ // The discriminator forces this KeyString to compare Less/Greater than any KeyString with
+ // the same prefix of keys. As an example, this can be used to land on the first key in the
+ // index with the value "a" regardless of the RecordId. In compound indexes it can use a
+ // prefix of the full key to ignore the later keys.
+ switch (discriminator) {
+ case kExclusiveBefore:
+ _append(kLess, false);
+ break;
+ case kExclusiveAfter:
+ _append(kGreater, false);
+ break;
+ case kInclusive:
+ break; // No discriminator byte.
}
- void KeyString::_appendSymbol(StringData val, bool invert) {
- _typeBits.appendSymbol();
- _append(CType::kStringLike, invert); // Symbols and Strings compare equally
- _appendStringLike(val, invert);
+ // TODO consider omitting kEnd when using a discriminator byte. It is not a storage format
+ // change since keystrings with discriminators are not allowed to be stored.
+ _append(kEnd, false);
+}
+
+void KeyString::appendRecordId(RecordId loc) {
+ // The RecordId encoding must be able to determine the full length starting from the last
+ // byte, without knowing where the first byte is since it is stored at the end of a
+ // KeyString, and we need to be able to read the RecordId without decoding the whole thing.
+ //
+ // This encoding places a number (N) between 0 and 7 in both the high 3 bits of the first
+ // byte and the low 3 bits of the last byte. This is the number of bytes between the first
+ // and last byte (ie total bytes is N + 2). The remaining bits of the first and last bytes
+ // are combined with the bits of the in-between bytes to store the 64-bit RecordId in
+ // big-endian order. This does not encode negative RecordIds to give maximum space to
+ // positive RecordIds which are the only ones that are allowed to be stored in an index.
+
+ int64_t raw = loc.repr();
+ if (raw < 0) {
+ // Note: we encode RecordId::min() and RecordId() the same which is ok, as they are
+ // never stored so they will never be compared to each other.
+ invariant(raw == RecordId::min().repr());
+ raw = 0;
}
-
- void KeyString::_appendCode(StringData val, bool invert) {
- _append(CType::kCode, invert);
- _appendStringLike(val, invert);
+ const uint64_t value = static_cast<uint64_t>(raw);
+ const int bitsNeeded = 64 - countLeadingZeros64(raw);
+ const int extraBytesNeeded =
+ bitsNeeded <= 10 ? 0 : ((bitsNeeded - 10) + 7) / 8; // ceil((bitsNeeded - 10) / 8)
+
+ // extraBytesNeeded must fit in 3 bits.
+ dassert(extraBytesNeeded >= 0 && extraBytesNeeded < 8);
+
+ // firstByte combines highest 5 bits of value with extraBytesNeeded.
+ const uint8_t firstByte =
+ uint8_t((extraBytesNeeded << 5) | (value >> (5 + (extraBytesNeeded * 8))));
+ // lastByte combines lowest 5 bits of value with extraBytesNeeded.
+ const uint8_t lastByte = uint8_t((value << 3) | extraBytesNeeded);
+
+ // RecordIds are never appended inverted.
+ _append(firstByte, false);
+ if (extraBytesNeeded) {
+ const uint64_t extraBytes = endian::nativeToBig(value >> 5);
+ // Only using the low-order extraBytesNeeded bytes of extraBytes.
+ _appendBytes(reinterpret_cast<const char*>(&extraBytes) + sizeof(extraBytes) -
+ extraBytesNeeded,
+ extraBytesNeeded,
+ false);
}
-
- void KeyString::_appendCodeWString(const BSONCodeWScope& val, bool invert) {
- _append(CType::kCodeWithScope, invert);
- _appendStringLike(val.code, invert);
- _appendBson(val.scope, invert);
+ _append(lastByte, false);
+}
+
+void KeyString::appendTypeBits(const TypeBits& typeBits) {
+ // As an optimization, encode AllZeros as a single 0 byte.
+ if (typeBits.isAllZeros()) {
+ _append(uint8_t(0), false);
+ return;
}
- void KeyString::_appendBinData(const BSONBinData& val, bool invert) {
- _append(CType::kBinData, invert);
- if (val.length < 0xff) {
- // size fits in one byte so use one byte to encode.
- _append(uint8_t(val.length), invert);
- }
- else {
- // Encode 0xff prefix to indicate that the size takes 4 bytes.
- _append(uint8_t(0xff), invert);
- _append(endian::nativeToBig(int32_t(val.length)), invert);
- }
- _append(uint8_t(val.type), invert);
- _appendBytes(val.data, val.length, invert);
+ _appendBytes(typeBits.getBuffer(), typeBits.getSize(), false);
+}
+
+void KeyString::_appendBool(bool val, bool invert) {
+ _append(val ? CType::kBoolTrue : CType::kBoolFalse, invert);
+}
+
+void KeyString::_appendDate(Date_t val, bool invert) {
+ _append(CType::kDate, invert);
+ // see: http://en.wikipedia.org/wiki/Offset_binary
+ uint64_t encoded = static_cast<uint64_t>(val.asInt64());
+ encoded ^= (1LL << 63); // flip highest bit (equivalent to bias encoding)
+ _append(endian::nativeToBig(encoded), invert);
+}
+
+void KeyString::_appendTimestamp(Timestamp val, bool invert) {
+ _append(CType::kTimestamp, invert);
+ _append(endian::nativeToBig(val.asLL()), invert);
+}
+
+void KeyString::_appendOID(OID val, bool invert) {
+ _append(CType::kOID, invert);
+ _appendBytes(val.view().view(), OID::kOIDSize, invert);
+}
+
+void KeyString::_appendString(StringData val, bool invert) {
+ _typeBits.appendString();
+ _append(CType::kStringLike, invert);
+ _appendStringLike(val, invert);
+}
+
+void KeyString::_appendSymbol(StringData val, bool invert) {
+ _typeBits.appendSymbol();
+ _append(CType::kStringLike, invert); // Symbols and Strings compare equally
+ _appendStringLike(val, invert);
+}
+
+void KeyString::_appendCode(StringData val, bool invert) {
+ _append(CType::kCode, invert);
+ _appendStringLike(val, invert);
+}
+
+void KeyString::_appendCodeWString(const BSONCodeWScope& val, bool invert) {
+ _append(CType::kCodeWithScope, invert);
+ _appendStringLike(val.code, invert);
+ _appendBson(val.scope, invert);
+}
+
+void KeyString::_appendBinData(const BSONBinData& val, bool invert) {
+ _append(CType::kBinData, invert);
+ if (val.length < 0xff) {
+ // size fits in one byte so use one byte to encode.
+ _append(uint8_t(val.length), invert);
+ } else {
+ // Encode 0xff prefix to indicate that the size takes 4 bytes.
+ _append(uint8_t(0xff), invert);
+ _append(endian::nativeToBig(int32_t(val.length)), invert);
}
-
- void KeyString::_appendRegex(const BSONRegEx& val, bool invert) {
- _append(CType::kRegEx, invert);
- // note: NULL is not allowed in pattern or flags
- _appendBytes(val.pattern.rawData(), val.pattern.size(), invert);
- _append(int8_t(0), invert);
- _appendBytes(val.flags.rawData(), val.flags.size(), invert);
- _append(int8_t(0), invert);
+ _append(uint8_t(val.type), invert);
+ _appendBytes(val.data, val.length, invert);
+}
+
+void KeyString::_appendRegex(const BSONRegEx& val, bool invert) {
+ _append(CType::kRegEx, invert);
+ // note: NULL is not allowed in pattern or flags
+ _appendBytes(val.pattern.rawData(), val.pattern.size(), invert);
+ _append(int8_t(0), invert);
+ _appendBytes(val.flags.rawData(), val.flags.size(), invert);
+ _append(int8_t(0), invert);
+}
+
+void KeyString::_appendDBRef(const BSONDBRef& val, bool invert) {
+ _append(CType::kDBRef, invert);
+ _append(endian::nativeToBig(int32_t(val.ns.size())), invert);
+ _appendBytes(val.ns.rawData(), val.ns.size(), invert);
+ _appendBytes(val.oid.view().view(), OID::kOIDSize, invert);
+}
+
+void KeyString::_appendArray(const BSONArray& val, bool invert) {
+ _append(CType::kArray, invert);
+ BSONForEach(elem, val) {
+ // No generic ctype byte needed here since no name is encoded.
+ _appendBsonValue(elem, invert, NULL);
}
-
- void KeyString::_appendDBRef(const BSONDBRef& val, bool invert) {
- _append(CType::kDBRef, invert);
- _append(endian::nativeToBig(int32_t(val.ns.size())), invert);
- _appendBytes(val.ns.rawData(), val.ns.size(), invert);
- _appendBytes(val.oid.view().view(), OID::kOIDSize, invert);
+ _append(int8_t(0), invert);
+}
+
+void KeyString::_appendObject(const BSONObj& val, bool invert) {
+ _append(CType::kObject, invert);
+ _appendBson(val, invert);
+}
+
+void KeyString::_appendNumberDouble(const double num, bool invert) {
+ if (num == 0.0 && std::signbit(num)) {
+ _typeBits.appendNegativeZero();
+ } else {
+ _typeBits.appendNumberDouble();
}
- void KeyString::_appendArray(const BSONArray& val, bool invert) {
- _append(CType::kArray, invert);
- BSONForEach(elem, val) {
- // No generic ctype byte needed here since no name is encoded.
- _appendBsonValue(elem, invert, NULL);
- }
- _append(int8_t(0), invert);
+ // no special cases needed for Inf,
+ // see http://en.wikipedia.org/wiki/IEEE_754-1985#Positive_and_negative_infinity
+ if (std::isnan(num)) {
+ _append(CType::kNumericNaN, invert);
+ return;
}
- void KeyString::_appendObject(const BSONObj& val, bool invert) {
- _append(CType::kObject, invert);
- _appendBson(val, invert);
+ if (num == 0.0) {
+ // We are collapsing -0.0 and 0.0 to the same value here.
+ // This is correct as IEEE-754 specifies that they compare as equal,
+ // however this prevents roundtripping -0.0.
+ // So if you put a -0.0 in, you'll get 0.0 out.
+ // We believe this to be ok.
+ _append(CType::kNumericZero, invert);
+ return;
}
- void KeyString::_appendNumberDouble(const double num, bool invert) {
- if (num == 0.0 && std::signbit(num)) {
- _typeBits.appendNegativeZero();
- }
- else {
- _typeBits.appendNumberDouble();
- }
+ const bool isNegative = num < 0.0;
+ const double magnitude = isNegative ? -num : num;
- // no special cases needed for Inf,
- // see http://en.wikipedia.org/wiki/IEEE_754-1985#Positive_and_negative_infinity
- if (std::isnan(num)) {
- _append(CType::kNumericNaN, invert);
- return;
- }
-
- if (num == 0.0) {
- // We are collapsing -0.0 and 0.0 to the same value here.
- // This is correct as IEEE-754 specifies that they compare as equal,
- // however this prevents roundtripping -0.0.
- // So if you put a -0.0 in, you'll get 0.0 out.
- // We believe this to be ok.
- _append(CType::kNumericZero, invert);
- return;
- }
-
- const bool isNegative = num < 0.0;
- const double magnitude = isNegative ? -num : num;
-
- if (magnitude < 1.0) {
- // This includes subnormal numbers.
- _appendSmallDouble(num, invert);
- return;
- }
-
- if (magnitude < kMinLargeDouble) {
- uint64_t integerPart = uint64_t(magnitude);
- if (double(integerPart) == magnitude) {
- // No fractional part
- _appendPreshiftedIntegerPortion(integerPart << 1, isNegative, invert);
- return;
- }
+ if (magnitude < 1.0) {
+ // This includes subnormal numbers.
+ _appendSmallDouble(num, invert);
+ return;
+ }
- // There is a fractional part.
- _appendPreshiftedIntegerPortion((integerPart << 1) | 1, isNegative, invert);
-
- // Append the bytes of the mantissa that include fractional bits.
- const size_t fractionalBits = (53 - (64 - countLeadingZeros64(integerPart)));
- const size_t fractionalBytes = (fractionalBits + 7) / 8;
- dassert(fractionalBytes > 0);
- uint64_t mantissa;
- memcpy(&mantissa, &num, sizeof(mantissa));
- mantissa &= ~(uint64_t(-1) << fractionalBits); // set non-fractional bits to 0;
- mantissa = endian::nativeToBig(mantissa);
-
- const void* firstUsedByte =
- reinterpret_cast<const char*>((&mantissa) + 1) - fractionalBytes;
- _appendBytes(firstUsedByte, fractionalBytes, isNegative ? !invert : invert);
+ if (magnitude < kMinLargeDouble) {
+ uint64_t integerPart = uint64_t(magnitude);
+ if (double(integerPart) == magnitude) {
+ // No fractional part
+ _appendPreshiftedIntegerPortion(integerPart << 1, isNegative, invert);
return;
}
- _appendLargeDouble(num, invert);
+ // There is a fractional part.
+ _appendPreshiftedIntegerPortion((integerPart << 1) | 1, isNegative, invert);
+
+ // Append the bytes of the mantissa that include fractional bits.
+ const size_t fractionalBits = (53 - (64 - countLeadingZeros64(integerPart)));
+ const size_t fractionalBytes = (fractionalBits + 7) / 8;
+ dassert(fractionalBytes > 0);
+ uint64_t mantissa;
+ memcpy(&mantissa, &num, sizeof(mantissa));
+ mantissa &= ~(uint64_t(-1) << fractionalBits); // set non-fractional bits to 0;
+ mantissa = endian::nativeToBig(mantissa);
+
+ const void* firstUsedByte =
+ reinterpret_cast<const char*>((&mantissa) + 1) - fractionalBytes;
+ _appendBytes(firstUsedByte, fractionalBytes, isNegative ? !invert : invert);
+ return;
}
- void KeyString::_appendNumberLong(const long long num, bool invert) {
- _typeBits.appendNumberLong();
- _appendInteger(num, invert);
- }
+ _appendLargeDouble(num, invert);
+}
- void KeyString::_appendNumberInt(const int num, bool invert) {
- _typeBits.appendNumberInt();
- _appendInteger(num, invert);
- }
+void KeyString::_appendNumberLong(const long long num, bool invert) {
+ _typeBits.appendNumberLong();
+ _appendInteger(num, invert);
+}
- void KeyString::_appendBsonValue(const BSONElement& elem,
- bool invert,
- const StringData* name) {
+void KeyString::_appendNumberInt(const int num, bool invert) {
+ _typeBits.appendNumberInt();
+ _appendInteger(num, invert);
+}
- if (name) {
- _appendBytes(name->rawData(), name->size() + 1, invert); // + 1 for NUL
- }
+void KeyString::_appendBsonValue(const BSONElement& elem, bool invert, const StringData* name) {
+ if (name) {
+ _appendBytes(name->rawData(), name->size() + 1, invert); // + 1 for NUL
+ }
- switch (elem.type()) {
+ switch (elem.type()) {
case MinKey:
case MaxKey:
case EOO:
@@ -550,10 +562,18 @@ namespace mongo {
_append(bsonTypeToGenericKeyStringType(elem.type()), invert);
break;
- case NumberDouble: _appendNumberDouble(elem._numberDouble(), invert); break;
- case String: _appendString(elem.valueStringData(), invert); break;
- case Object: _appendObject(elem.Obj(), invert); break;
- case Array: _appendArray(BSONArray(elem.Obj()), invert); break;
+ case NumberDouble:
+ _appendNumberDouble(elem._numberDouble(), invert);
+ break;
+ case String:
+ _appendString(elem.valueStringData(), invert);
+ break;
+ case Object:
+ _appendObject(elem.Obj(), invert);
+ break;
+ case Array:
+ _appendArray(BSONArray(elem.Obj()), invert);
+ break;
case BinData: {
int len;
const char* data = elem.binData(len);
@@ -561,606 +581,635 @@ namespace mongo {
break;
}
- case jstOID: _appendOID(elem.__oid(), invert); break;
- case Bool: _appendBool(elem.boolean(), invert); break;
- case Date: _appendDate(elem.date(), invert); break;
+ case jstOID:
+ _appendOID(elem.__oid(), invert);
+ break;
+ case Bool:
+ _appendBool(elem.boolean(), invert);
+ break;
+ case Date:
+ _appendDate(elem.date(), invert);
+ break;
- case RegEx: _appendRegex(BSONRegEx(elem.regex(), elem.regexFlags()), invert); break;
- case DBRef: _appendDBRef(BSONDBRef(elem.dbrefNS(), elem.dbrefOID()), invert); break;
- case Symbol: _appendSymbol(elem.valueStringData(), invert); break;
- case Code: _appendCode(elem.valueStringData(), invert); break;
+ case RegEx:
+ _appendRegex(BSONRegEx(elem.regex(), elem.regexFlags()), invert);
+ break;
+ case DBRef:
+ _appendDBRef(BSONDBRef(elem.dbrefNS(), elem.dbrefOID()), invert);
+ break;
+ case Symbol:
+ _appendSymbol(elem.valueStringData(), invert);
+ break;
+ case Code:
+ _appendCode(elem.valueStringData(), invert);
+ break;
case CodeWScope: {
- _appendCodeWString(BSONCodeWScope(StringData(elem.codeWScopeCode(),
- elem.codeWScopeCodeLen()-1),
- BSONObj(elem.codeWScopeScopeData())),
- invert);
+ _appendCodeWString(
+ BSONCodeWScope(StringData(elem.codeWScopeCode(), elem.codeWScopeCodeLen() - 1),
+ BSONObj(elem.codeWScopeScopeData())),
+ invert);
break;
}
- case NumberInt: _appendNumberInt(elem._numberInt(), invert); break;
- case bsonTimestamp: _appendTimestamp(elem.timestamp(), invert); break;
- case NumberLong: _appendNumberLong(elem._numberLong(), invert); break;
+ case NumberInt:
+ _appendNumberInt(elem._numberInt(), invert);
+ break;
+ case bsonTimestamp:
+ _appendTimestamp(elem.timestamp(), invert);
+ break;
+ case NumberLong:
+ _appendNumberLong(elem._numberLong(), invert);
+ break;
default:
invariant(false);
- }
}
+}
- /// -- lowest level
+/// -- lowest level
- void KeyString::_appendStringLike(StringData str, bool invert) {
- while (true) {
- size_t firstNul = strnlen(str.rawData(), str.size());
- // No NULs in string.
- _appendBytes(str.rawData(), firstNul, invert);
- if (firstNul == str.size() || firstNul == std::string::npos) {
- _append(int8_t(0), invert);
- break;
- }
-
- // replace "\x00" with "\x00\xFF"
- _appendBytes("\x00\xFF", 2, invert);
- str = str.substr(firstNul + 1); // skip over the NUL byte
+void KeyString::_appendStringLike(StringData str, bool invert) {
+ while (true) {
+ size_t firstNul = strnlen(str.rawData(), str.size());
+ // No NULs in string.
+ _appendBytes(str.rawData(), firstNul, invert);
+ if (firstNul == str.size() || firstNul == std::string::npos) {
+ _append(int8_t(0), invert);
+ break;
}
- }
- void KeyString::_appendBson(const BSONObj& obj, bool invert) {
- BSONForEach(elem, obj) {
- // Force the order to be based on (ctype, name, value).
- _append(bsonTypeToGenericKeyStringType(elem.type()), invert);
- StringData name = elem.fieldNameStringData();
- _appendBsonValue(elem, invert, &name);
- }
- _append(int8_t(0), invert);
+ // replace "\x00" with "\x00\xFF"
+ _appendBytes("\x00\xFF", 2, invert);
+ str = str.substr(firstNul + 1); // skip over the NUL byte
}
-
- void KeyString::_appendSmallDouble(double value, bool invert) {
- dassert(!std::isnan(value));
- dassert(value != 0.0);
-
- uint64_t data;
- memcpy(&data, &value, sizeof(data));
-
- if (value > 0) {
- _append(CType::kNumericPositiveSmallDouble, invert);
- _append(endian::nativeToBig(data), invert);
- }
- else {
- _append(CType::kNumericNegativeSmallDouble, invert);
- _append(endian::nativeToBig(data), !invert);
- }
+}
+
+void KeyString::_appendBson(const BSONObj& obj, bool invert) {
+ BSONForEach(elem, obj) {
+ // Force the order to be based on (ctype, name, value).
+ _append(bsonTypeToGenericKeyStringType(elem.type()), invert);
+ StringData name = elem.fieldNameStringData();
+ _appendBsonValue(elem, invert, &name);
+ }
+ _append(int8_t(0), invert);
+}
+
+void KeyString::_appendSmallDouble(double value, bool invert) {
+ dassert(!std::isnan(value));
+ dassert(value != 0.0);
+
+ uint64_t data;
+ memcpy(&data, &value, sizeof(data));
+
+ if (value > 0) {
+ _append(CType::kNumericPositiveSmallDouble, invert);
+ _append(endian::nativeToBig(data), invert);
+ } else {
+ _append(CType::kNumericNegativeSmallDouble, invert);
+ _append(endian::nativeToBig(data), !invert);
}
+}
- void KeyString::_appendLargeDouble(double value, bool invert) {
- dassert(!std::isnan(value));
- dassert(value != 0.0);
+void KeyString::_appendLargeDouble(double value, bool invert) {
+ dassert(!std::isnan(value));
+ dassert(value != 0.0);
- uint64_t data;
- memcpy(&data, &value, sizeof(data));
+ uint64_t data;
+ memcpy(&data, &value, sizeof(data));
- if (value > 0) {
- _append(CType::kNumericPositiveLargeDouble, invert);
- _append(endian::nativeToBig(data), invert);
- }
- else {
- _append(CType::kNumericNegativeLargeDouble, invert);
- _append(endian::nativeToBig(data), !invert);
- }
+ if (value > 0) {
+ _append(CType::kNumericPositiveLargeDouble, invert);
+ _append(endian::nativeToBig(data), invert);
+ } else {
+ _append(CType::kNumericNegativeLargeDouble, invert);
+ _append(endian::nativeToBig(data), !invert);
+ }
+}
+
+// Handles NumberLong and NumberInt which are encoded identically except for the TypeBits.
+void KeyString::_appendInteger(const long long num, bool invert) {
+ if (num == std::numeric_limits<long long>::min()) {
+ // -2**63 is exactly representable as a double and not as a positive int64.
+ // Therefore we encode it as a double.
+ dassert(-double(num) == kMinLargeDouble);
+ _appendLargeDouble(double(num), invert);
+ return;
}
- // Handles NumberLong and NumberInt which are encoded identically except for the TypeBits.
- void KeyString::_appendInteger(const long long num, bool invert) {
- if (num == std::numeric_limits<long long>::min()) {
- // -2**63 is exactly representable as a double and not as a positive int64.
- // Therefore we encode it as a double.
- dassert(-double(num) == kMinLargeDouble);
- _appendLargeDouble(double(num), invert);
- return;
- }
-
- if (num == 0) {
- _append(CType::kNumericZero, invert);
- return;
- }
-
- const bool isNegative = num < 0;
- const uint64_t magnitude = isNegative ? -num : num;
- _appendPreshiftedIntegerPortion(magnitude << 1, isNegative, invert);
+ if (num == 0) {
+ _append(CType::kNumericZero, invert);
+ return;
}
+ const bool isNegative = num < 0;
+ const uint64_t magnitude = isNegative ? -num : num;
+ _appendPreshiftedIntegerPortion(magnitude << 1, isNegative, invert);
+}
- void KeyString::_appendPreshiftedIntegerPortion(uint64_t value, bool isNegative, bool invert) {
- dassert(value != 0ull);
- dassert(value != 1ull);
- const size_t bytesNeeded = (64 - countLeadingZeros64(value) + 7) / 8;
+void KeyString::_appendPreshiftedIntegerPortion(uint64_t value, bool isNegative, bool invert) {
+ dassert(value != 0ull);
+ dassert(value != 1ull);
- // Append the low bytes of value in big endian order.
- value = endian::nativeToBig(value);
- const void* firstUsedByte = reinterpret_cast<const char*>((&value) + 1) - bytesNeeded;
+ const size_t bytesNeeded = (64 - countLeadingZeros64(value) + 7) / 8;
- if (isNegative) {
- _append(uint8_t(CType::kNumericNegative1ByteInt - (bytesNeeded - 1)), invert);
- _appendBytes(firstUsedByte, bytesNeeded, !invert);
- }
- else {
- _append(uint8_t(CType::kNumericPositive1ByteInt + (bytesNeeded - 1)), invert);
- _appendBytes(firstUsedByte, bytesNeeded, invert);
- }
- }
+ // Append the low bytes of value in big endian order.
+ value = endian::nativeToBig(value);
+ const void* firstUsedByte = reinterpret_cast<const char*>((&value) + 1) - bytesNeeded;
- template <typename T>
- void KeyString::_append(const T& thing, bool invert) {
- _appendBytes(&thing, sizeof(thing), invert);
+ if (isNegative) {
+ _append(uint8_t(CType::kNumericNegative1ByteInt - (bytesNeeded - 1)), invert);
+ _appendBytes(firstUsedByte, bytesNeeded, !invert);
+ } else {
+ _append(uint8_t(CType::kNumericPositive1ByteInt + (bytesNeeded - 1)), invert);
+ _appendBytes(firstUsedByte, bytesNeeded, invert);
}
+}
- void KeyString::_appendBytes(const void* source, size_t bytes, bool invert) {
- char* const base = _buffer.skip(bytes);
+template <typename T>
+void KeyString::_append(const T& thing, bool invert) {
+ _appendBytes(&thing, sizeof(thing), invert);
+}
- if (invert) {
- memcpy_flipBits(base, source, bytes);
+void KeyString::_appendBytes(const void* source, size_t bytes, bool invert) {
+ char* const base = _buffer.skip(bytes);
+
+ if (invert) {
+ memcpy_flipBits(base, source, bytes);
+ } else {
+ memcpy(base, source, bytes);
+ }
+}
+
+
+// ----------------------------------------------------------------------
+// ----------- DECODING CODE --------------------------------------------
+// ----------------------------------------------------------------------
+
+namespace {
+void toBsonValue(uint8_t ctype,
+ BufReader* reader,
+ TypeBits::Reader* typeBits,
+ bool inverted,
+ BSONObjBuilderValueStream* stream);
+
+void toBson(BufReader* reader, TypeBits::Reader* typeBits, bool inverted, BSONObjBuilder* builder) {
+ while (readType<uint8_t>(reader, inverted) != 0) {
+ if (inverted) {
+ std::string name = readInvertedCString(reader);
+ BSONObjBuilderValueStream& stream = *builder << name;
+ toBsonValue(readType<uint8_t>(reader, inverted), reader, typeBits, inverted, &stream);
} else {
- memcpy(base, source, bytes);
+ StringData name = readCString(reader);
+ BSONObjBuilderValueStream& stream = *builder << name;
+ toBsonValue(readType<uint8_t>(reader, inverted), reader, typeBits, inverted, &stream);
}
}
+}
+
+void toBsonValue(uint8_t ctype,
+ BufReader* reader,
+ TypeBits::Reader* typeBits,
+ bool inverted,
+ BSONObjBuilderValueStream* stream) {
+ // This is only used by the kNumeric.*ByteInt types, but needs to be declared up here
+ // since it is used across a fallthrough.
+ bool isNegative = false;
+
+ switch (ctype) {
+ case CType::kMinKey:
+ *stream << MINKEY;
+ break;
+ case CType::kMaxKey:
+ *stream << MAXKEY;
+ break;
+ case CType::kNullish:
+ *stream << BSONNULL;
+ break;
+ case CType::kUndefined:
+ *stream << BSONUndefined;
+ break;
+ case CType::kBoolTrue:
+ *stream << true;
+ break;
+ case CType::kBoolFalse:
+ *stream << false;
+ break;
- // ----------------------------------------------------------------------
- // ----------- DECODING CODE --------------------------------------------
- // ----------------------------------------------------------------------
-
- namespace {
- void toBsonValue(uint8_t ctype,
- BufReader* reader,
- TypeBits::Reader* typeBits,
- bool inverted,
- BSONObjBuilderValueStream* stream);
-
- void toBson(BufReader* reader, TypeBits::Reader* typeBits,
- bool inverted, BSONObjBuilder* builder) {
- while (readType<uint8_t>(reader, inverted) != 0) {
- if (inverted) {
- std::string name = readInvertedCString(reader);
- BSONObjBuilderValueStream& stream = *builder << name;
- toBsonValue(readType<uint8_t>(reader, inverted), reader, typeBits, inverted,
- &stream);
- }
- else {
- StringData name = readCString(reader);
- BSONObjBuilderValueStream& stream = *builder << name;
- toBsonValue(readType<uint8_t>(reader, inverted), reader, typeBits, inverted,
- &stream);
- }
- }
- }
+ case CType::kDate:
+ *stream << Date_t::fromMillisSinceEpoch(
+ endian::bigToNative(readType<uint64_t>(reader, inverted)) ^ (1LL << 63));
+ break;
- void toBsonValue(uint8_t ctype,
- BufReader* reader,
- TypeBits::Reader* typeBits,
- bool inverted,
- BSONObjBuilderValueStream* stream) {
-
- // This is only used by the kNumeric.*ByteInt types, but needs to be declared up here
- // since it is used across a fallthrough.
- bool isNegative = false;
-
- switch (ctype) {
- case CType::kMinKey: *stream << MINKEY; break;
- case CType::kMaxKey: *stream << MAXKEY; break;
- case CType::kNullish: *stream << BSONNULL; break;
- case CType::kUndefined: *stream << BSONUndefined; break;
-
- case CType::kBoolTrue: *stream << true; break;
- case CType::kBoolFalse: *stream << false; break;
-
- case CType::kDate:
- *stream << Date_t::fromMillisSinceEpoch(
- endian::bigToNative(readType<uint64_t>(reader, inverted)) ^ (1LL << 63));
- break;
-
- case CType::kTimestamp:
- *stream << Timestamp(endian::bigToNative(readType<uint64_t>(reader, inverted)));
- break;
-
- case CType::kOID:
- if (inverted) {
- char buf[OID::kOIDSize];
- memcpy_flipBits(buf, reader->skip(OID::kOIDSize), OID::kOIDSize);
- *stream << OID::from(buf);
- }
- else {
- *stream << OID::from(reader->skip(OID::kOIDSize));
- }
- break;
-
- case CType::kStringLike: {
- const uint8_t originalType = typeBits->readStringLike();
- if (inverted) {
- if (originalType == TypeBits::kString) {
- *stream << readInvertedCStringWithNuls(reader);
- }
- else {
- dassert(originalType == TypeBits::kSymbol);
- *stream << BSONSymbol(readInvertedCStringWithNuls(reader));
- }
-
- }
- else {
- std::string scratch;
- if (originalType == TypeBits::kString) {
- *stream << readCStringWithNuls(reader, &scratch);
- }
- else {
- dassert(originalType == TypeBits::kSymbol);
- *stream << BSONSymbol(readCStringWithNuls(reader, &scratch));
- }
- }
- break;
+ case CType::kTimestamp:
+ *stream << Timestamp(endian::bigToNative(readType<uint64_t>(reader, inverted)));
+ break;
+
+ case CType::kOID:
+ if (inverted) {
+ char buf[OID::kOIDSize];
+ memcpy_flipBits(buf, reader->skip(OID::kOIDSize), OID::kOIDSize);
+ *stream << OID::from(buf);
+ } else {
+ *stream << OID::from(reader->skip(OID::kOIDSize));
}
+ break;
- case CType::kCode: {
- if (inverted) {
- *stream << BSONCode(readInvertedCStringWithNuls(reader));
+ case CType::kStringLike: {
+ const uint8_t originalType = typeBits->readStringLike();
+ if (inverted) {
+ if (originalType == TypeBits::kString) {
+ *stream << readInvertedCStringWithNuls(reader);
+ } else {
+ dassert(originalType == TypeBits::kSymbol);
+ *stream << BSONSymbol(readInvertedCStringWithNuls(reader));
}
- else {
- std::string scratch;
- *stream << BSONCode(readCStringWithNuls(reader, &scratch));
+
+ } else {
+ std::string scratch;
+ if (originalType == TypeBits::kString) {
+ *stream << readCStringWithNuls(reader, &scratch);
+ } else {
+ dassert(originalType == TypeBits::kSymbol);
+ *stream << BSONSymbol(readCStringWithNuls(reader, &scratch));
}
- break;
}
+ break;
+ }
- case CType::kCodeWithScope: {
+ case CType::kCode: {
+ if (inverted) {
+ *stream << BSONCode(readInvertedCStringWithNuls(reader));
+ } else {
std::string scratch;
- StringData code; // will point to either scratch or the raw encoded bytes.
- if (inverted) {
- scratch = readInvertedCStringWithNuls(reader);
- code = scratch;
- }
- else {
- code = readCStringWithNuls(reader, &scratch);
- }
- // Not going to optimize CodeWScope.
- BSONObjBuilder scope;
- toBson(reader, typeBits, inverted, &scope);
- *stream << BSONCodeWScope(code, scope.done());
- break;
+ *stream << BSONCode(readCStringWithNuls(reader, &scratch));
}
+ break;
+ }
- case CType::kBinData: {
- size_t size = readType<uint8_t>(reader, inverted);
- if (size == 0xff) {
- // size was stored in 4 bytes.
- size = endian::bigToNative(readType<uint32_t>(reader, inverted));
- }
- BinDataType subType = BinDataType(readType<uint8_t>(reader, inverted));
- const void* ptr = reader->skip(size);
- if (!inverted) {
- *stream << BSONBinData(ptr, size, subType);
- }
- else {
- std::unique_ptr<char[]> flipped(new char[size]);
- memcpy_flipBits(flipped.get(), ptr, size);
- *stream << BSONBinData(flipped.get(), size, subType);
- }
- break;
+ case CType::kCodeWithScope: {
+ std::string scratch;
+ StringData code; // will point to either scratch or the raw encoded bytes.
+ if (inverted) {
+ scratch = readInvertedCStringWithNuls(reader);
+ code = scratch;
+ } else {
+ code = readCStringWithNuls(reader, &scratch);
}
+ // Not going to optimize CodeWScope.
+ BSONObjBuilder scope;
+ toBson(reader, typeBits, inverted, &scope);
+ *stream << BSONCodeWScope(code, scope.done());
+ break;
+ }
- case CType::kRegEx: {
- if (inverted) {
- string pattern = readInvertedCString(reader);
- string flags = readInvertedCString(reader);
- *stream << BSONRegEx(pattern, flags);
- }
- else {
- StringData pattern = readCString(reader);
- StringData flags = readCString(reader);
- *stream << BSONRegEx(pattern, flags);
- }
- break;
+ case CType::kBinData: {
+ size_t size = readType<uint8_t>(reader, inverted);
+ if (size == 0xff) {
+ // size was stored in 4 bytes.
+ size = endian::bigToNative(readType<uint32_t>(reader, inverted));
+ }
+ BinDataType subType = BinDataType(readType<uint8_t>(reader, inverted));
+ const void* ptr = reader->skip(size);
+ if (!inverted) {
+ *stream << BSONBinData(ptr, size, subType);
+ } else {
+ std::unique_ptr<char[]> flipped(new char[size]);
+ memcpy_flipBits(flipped.get(), ptr, size);
+ *stream << BSONBinData(flipped.get(), size, subType);
}
+ break;
+ }
- case CType::kDBRef: {
- size_t size = endian::bigToNative(readType<uint32_t>(reader, inverted));
- if (inverted) {
- std::unique_ptr<char[]> ns(new char[size]);
- memcpy_flipBits(ns.get(), reader->skip(size), size);
- char oidBytes[OID::kOIDSize];
- memcpy_flipBits(oidBytes, reader->skip(OID::kOIDSize), OID::kOIDSize);
- OID oid = OID::from(oidBytes);
- *stream << BSONDBRef(StringData(ns.get(), size), oid);
- }
- else {
- const char* ns = static_cast<const char*>(reader->skip(size));
- OID oid = OID::from(reader->skip(OID::kOIDSize));
- *stream << BSONDBRef(StringData(ns, size), oid);
- }
- break;
+ case CType::kRegEx: {
+ if (inverted) {
+ string pattern = readInvertedCString(reader);
+ string flags = readInvertedCString(reader);
+ *stream << BSONRegEx(pattern, flags);
+ } else {
+ StringData pattern = readCString(reader);
+ StringData flags = readCString(reader);
+ *stream << BSONRegEx(pattern, flags);
}
+ break;
+ }
- case CType::kObject: {
- BSONObjBuilder subObj(stream->subobjStart());
- toBson(reader, typeBits, inverted, &subObj);
- break;
+ case CType::kDBRef: {
+ size_t size = endian::bigToNative(readType<uint32_t>(reader, inverted));
+ if (inverted) {
+ std::unique_ptr<char[]> ns(new char[size]);
+ memcpy_flipBits(ns.get(), reader->skip(size), size);
+ char oidBytes[OID::kOIDSize];
+ memcpy_flipBits(oidBytes, reader->skip(OID::kOIDSize), OID::kOIDSize);
+ OID oid = OID::from(oidBytes);
+ *stream << BSONDBRef(StringData(ns.get(), size), oid);
+ } else {
+ const char* ns = static_cast<const char*>(reader->skip(size));
+ OID oid = OID::from(reader->skip(OID::kOIDSize));
+ *stream << BSONDBRef(StringData(ns, size), oid);
}
+ break;
+ }
- case CType::kArray: {
- BSONObjBuilder subArr(stream->subarrayStart());
- int index = 0;
- uint8_t elemType;
- while ((elemType = readType<uint8_t>(reader, inverted)) != 0) {
- toBsonValue(elemType,
- reader,
- typeBits,
- inverted,
- &(subArr << BSONObjBuilder::numStr(index++)));
- }
- break;
+ case CType::kObject: {
+ BSONObjBuilder subObj(stream->subobjStart());
+ toBson(reader, typeBits, inverted, &subObj);
+ break;
+ }
+
+ case CType::kArray: {
+ BSONObjBuilder subArr(stream->subarrayStart());
+ int index = 0;
+ uint8_t elemType;
+ while ((elemType = readType<uint8_t>(reader, inverted)) != 0) {
+ toBsonValue(elemType,
+ reader,
+ typeBits,
+ inverted,
+ &(subArr << BSONObjBuilder::numStr(index++)));
}
+ break;
+ }
- //
- // Numerics
- //
-
- case CType::kNumericNaN:
- invariant(typeBits->readNumeric() == TypeBits::kDouble);
- *stream << std::numeric_limits<double>::quiet_NaN();
- break;
-
- case CType::kNumericZero:
- switch(typeBits->readNumeric()) {
- case TypeBits::kDouble: *stream << 0.0; break;
- case TypeBits::kInt: *stream << 0; break;
- case TypeBits::kLong: *stream << 0ll; break;
- case TypeBits::kNegativeZero: *stream << -0.0; break;
- }
- break;
-
- case CType::kNumericNegativeLargeDouble:
- case CType::kNumericNegativeSmallDouble:
- inverted = !inverted;
- // fallthrough (format is the same as positive, but inverted)
-
- case CType::kNumericPositiveLargeDouble:
- case CType::kNumericPositiveSmallDouble: {
- // for these, the raw double was stored intact, including sign bit.
- const uint8_t originalType = typeBits->readNumeric();
- uint64_t encoded = readType<uint64_t>(reader, inverted);
- encoded = endian::bigToNative(encoded);
- double d;
- memcpy(&d, &encoded, sizeof(d));
-
- if (originalType == TypeBits::kDouble) {
- *stream << d;
- }
- else {
- // This can only happen for a single number.
- invariant(originalType == TypeBits::kLong);
- invariant(d == double(std::numeric_limits<long long>::min()));
- *stream << std::numeric_limits<long long>::min();
- }
+ //
+ // Numerics
+ //
- break;
+ case CType::kNumericNaN:
+ invariant(typeBits->readNumeric() == TypeBits::kDouble);
+ *stream << std::numeric_limits<double>::quiet_NaN();
+ break;
+
+ case CType::kNumericZero:
+ switch (typeBits->readNumeric()) {
+ case TypeBits::kDouble:
+ *stream << 0.0;
+ break;
+ case TypeBits::kInt:
+ *stream << 0;
+ break;
+ case TypeBits::kLong:
+ *stream << 0ll;
+ break;
+ case TypeBits::kNegativeZero:
+ *stream << -0.0;
+ break;
}
+ break;
- case CType::kNumericNegative8ByteInt:
- case CType::kNumericNegative7ByteInt:
- case CType::kNumericNegative6ByteInt:
- case CType::kNumericNegative5ByteInt:
- case CType::kNumericNegative4ByteInt:
- case CType::kNumericNegative3ByteInt:
- case CType::kNumericNegative2ByteInt:
- case CType::kNumericNegative1ByteInt:
- inverted = !inverted;
- isNegative = true;
- // fallthrough (format is the same as positive, but inverted)
-
- case CType::kNumericPositive1ByteInt:
- case CType::kNumericPositive2ByteInt:
- case CType::kNumericPositive3ByteInt:
- case CType::kNumericPositive4ByteInt:
- case CType::kNumericPositive5ByteInt:
- case CType::kNumericPositive6ByteInt:
- case CType::kNumericPositive7ByteInt:
- case CType::kNumericPositive8ByteInt: {
- const uint8_t originalType = typeBits->readNumeric();
-
- uint64_t encodedIntegerPart = 0;
- {
- size_t intBytesRemaining = CType::numBytesForInt(ctype);
- while (intBytesRemaining--) {
- encodedIntegerPart = (encodedIntegerPart << 8)
- | readType<uint8_t>(reader, inverted);
- }
- }
+ case CType::kNumericNegativeLargeDouble:
+ case CType::kNumericNegativeSmallDouble:
+ inverted = !inverted;
+ // fallthrough (format is the same as positive, but inverted)
+
+ case CType::kNumericPositiveLargeDouble:
+ case CType::kNumericPositiveSmallDouble: {
+ // for these, the raw double was stored intact, including sign bit.
+ const uint8_t originalType = typeBits->readNumeric();
+ uint64_t encoded = readType<uint64_t>(reader, inverted);
+ encoded = endian::bigToNative(encoded);
+ double d;
+ memcpy(&d, &encoded, sizeof(d));
+
+ if (originalType == TypeBits::kDouble) {
+ *stream << d;
+ } else {
+ // This can only happen for a single number.
+ invariant(originalType == TypeBits::kLong);
+ invariant(d == double(std::numeric_limits<long long>::min()));
+ *stream << std::numeric_limits<long long>::min();
+ }
- const bool haveFractionalPart = (encodedIntegerPart & 1);
- long long integerPart = encodedIntegerPart >> 1;
+ break;
+ }
- if (!haveFractionalPart) {
- if (isNegative)
- integerPart = -integerPart;
+ case CType::kNumericNegative8ByteInt:
+ case CType::kNumericNegative7ByteInt:
+ case CType::kNumericNegative6ByteInt:
+ case CType::kNumericNegative5ByteInt:
+ case CType::kNumericNegative4ByteInt:
+ case CType::kNumericNegative3ByteInt:
+ case CType::kNumericNegative2ByteInt:
+ case CType::kNumericNegative1ByteInt:
+ inverted = !inverted;
+ isNegative = true;
+ // fallthrough (format is the same as positive, but inverted)
+
+ case CType::kNumericPositive1ByteInt:
+ case CType::kNumericPositive2ByteInt:
+ case CType::kNumericPositive3ByteInt:
+ case CType::kNumericPositive4ByteInt:
+ case CType::kNumericPositive5ByteInt:
+ case CType::kNumericPositive6ByteInt:
+ case CType::kNumericPositive7ByteInt:
+ case CType::kNumericPositive8ByteInt: {
+ const uint8_t originalType = typeBits->readNumeric();
+
+ uint64_t encodedIntegerPart = 0;
+ {
+ size_t intBytesRemaining = CType::numBytesForInt(ctype);
+ while (intBytesRemaining--) {
+ encodedIntegerPart =
+ (encodedIntegerPart << 8) | readType<uint8_t>(reader, inverted);
+ }
+ }
- switch(originalType) {
- case TypeBits::kDouble: *stream << double(integerPart); break;
- case TypeBits::kInt: *stream << int(integerPart); break;
- case TypeBits::kLong: *stream << integerPart; break;
- case TypeBits::kNegativeZero: invariant(false);
- }
+ const bool haveFractionalPart = (encodedIntegerPart & 1);
+ long long integerPart = encodedIntegerPart >> 1;
+
+ if (!haveFractionalPart) {
+ if (isNegative)
+ integerPart = -integerPart;
+
+ switch (originalType) {
+ case TypeBits::kDouble:
+ *stream << double(integerPart);
+ break;
+ case TypeBits::kInt:
+ *stream << int(integerPart);
+ break;
+ case TypeBits::kLong:
+ *stream << integerPart;
+ break;
+ case TypeBits::kNegativeZero:
+ invariant(false);
+ }
+ } else {
+ // Nothing else can have a fractional part.
+ invariant(originalType == TypeBits::kDouble);
+
+ const uint64_t exponent = (64 - countLeadingZeros64(integerPart)) - 1;
+ const size_t fractionalBits = (52 - exponent);
+ const size_t fractionalBytes = (fractionalBits + 7) / 8;
+
+ // build up the bits of a double here.
+ uint64_t doubleBits = integerPart << fractionalBits;
+ doubleBits &= ~(1ull << 52); // clear implicit leading 1
+ doubleBits |= (exponent + 1023 /*bias*/) << 52;
+ if (isNegative) {
+ doubleBits |= (1ull << 63); // sign bit
}
- else {
- // Nothing else can have a fractional part.
- invariant(originalType == TypeBits::kDouble);
-
- const uint64_t exponent = (64 - countLeadingZeros64(integerPart)) - 1;
- const size_t fractionalBits = (52 - exponent);
- const size_t fractionalBytes = (fractionalBits + 7) / 8;
-
- // build up the bits of a double here.
- uint64_t doubleBits = integerPart << fractionalBits;
- doubleBits &= ~(1ull << 52); // clear implicit leading 1
- doubleBits |= (exponent + 1023/*bias*/) << 52;
- if (isNegative) {
- doubleBits |= (1ull << 63); // sign bit
- }
- for (size_t i = 0; i < fractionalBytes; i++) {
- // fold in the fractional bytes
- const uint64_t byte = readType<uint8_t>(reader, inverted);
- doubleBits |= (byte << ((fractionalBytes - i - 1) * 8));
- }
-
- double number;
- memcpy(&number, &doubleBits, sizeof(number));
- *stream << number;
+ for (size_t i = 0; i < fractionalBytes; i++) {
+ // fold in the fractional bytes
+ const uint64_t byte = readType<uint8_t>(reader, inverted);
+ doubleBits |= (byte << ((fractionalBytes - i - 1) * 8));
}
- break;
- }
- default: invariant(false);
- }
- }
- } // namespace
-
- BSONObj KeyString::toBson(const char* buffer, size_t len, Ordering ord,
- const TypeBits& typeBits) {
- BSONObjBuilder builder;
- BufReader reader(buffer, len);
- TypeBits::Reader typeBitsReader(typeBits);
- for (int i = 0; reader.remaining(); i++) {
- const bool invert = (ord.get(i) == -1);
- uint8_t ctype = readType<uint8_t>(&reader, invert);
- if (ctype == kLess || ctype == kGreater) {
- // This was just a discriminator which is logically part of the previous field. This
- // will only be encountered on queries, not in the keys stored in an index.
- // Note: this should probably affect the BSON key name of the last field, but it
- // must be read *after* the value so it isn't possible.
- ctype = readType<uint8_t>(&reader, invert);
+ double number;
+ memcpy(&number, &doubleBits, sizeof(number));
+ *stream << number;
}
- if (ctype == kEnd)
- break;
- toBsonValue(ctype, &reader, &typeBitsReader, invert, &(builder << ""));
+ break;
}
- return builder.obj();
+ default:
+ invariant(false);
}
+}
+} // namespace
+
+BSONObj KeyString::toBson(const char* buffer, size_t len, Ordering ord, const TypeBits& typeBits) {
+ BSONObjBuilder builder;
+ BufReader reader(buffer, len);
+ TypeBits::Reader typeBitsReader(typeBits);
+ for (int i = 0; reader.remaining(); i++) {
+ const bool invert = (ord.get(i) == -1);
+ uint8_t ctype = readType<uint8_t>(&reader, invert);
+ if (ctype == kLess || ctype == kGreater) {
+ // This was just a discriminator which is logically part of the previous field. This
+ // will only be encountered on queries, not in the keys stored in an index.
+ // Note: this should probably affect the BSON key name of the last field, but it
+ // must be read *after* the value so it isn't possible.
+ ctype = readType<uint8_t>(&reader, invert);
+ }
- BSONObj KeyString::toBson(StringData data, Ordering ord, const TypeBits& typeBits) {
- return toBson(data.rawData(), data.size(), ord, typeBits);
+ if (ctype == kEnd)
+ break;
+ toBsonValue(ctype, &reader, &typeBitsReader, invert, &(builder << ""));
}
-
- RecordId KeyString::decodeRecordIdAtEnd(const void* bufferRaw, size_t bufSize) {
- invariant(bufSize >= 2); // smallest possible encoding of a RecordId.
- const unsigned char* buffer = static_cast<const unsigned char*>(bufferRaw);
- const unsigned char lastByte = *(buffer + bufSize - 1);
- const size_t ridSize = 2 + (lastByte & 0x7); // stored in low 3 bits.
- invariant(bufSize >= ridSize);
- const unsigned char* firstBytePtr = buffer + bufSize - ridSize;
- BufReader reader(firstBytePtr, ridSize);
- return decodeRecordId(&reader);
+ return builder.obj();
+}
+
+BSONObj KeyString::toBson(StringData data, Ordering ord, const TypeBits& typeBits) {
+ return toBson(data.rawData(), data.size(), ord, typeBits);
+}
+
+RecordId KeyString::decodeRecordIdAtEnd(const void* bufferRaw, size_t bufSize) {
+ invariant(bufSize >= 2); // smallest possible encoding of a RecordId.
+ const unsigned char* buffer = static_cast<const unsigned char*>(bufferRaw);
+ const unsigned char lastByte = *(buffer + bufSize - 1);
+ const size_t ridSize = 2 + (lastByte & 0x7); // stored in low 3 bits.
+ invariant(bufSize >= ridSize);
+ const unsigned char* firstBytePtr = buffer + bufSize - ridSize;
+ BufReader reader(firstBytePtr, ridSize);
+ return decodeRecordId(&reader);
+}
+
+RecordId KeyString::decodeRecordId(BufReader* reader) {
+ const uint8_t firstByte = readType<uint8_t>(reader, false);
+ const uint8_t numExtraBytes = firstByte >> 5; // high 3 bits in firstByte
+ uint64_t repr = firstByte & 0x1f; // low 5 bits in firstByte
+ for (int i = 0; i < numExtraBytes; i++) {
+ repr = (repr << 8) | readType<uint8_t>(reader, false);
}
- RecordId KeyString::decodeRecordId(BufReader* reader) {
- const uint8_t firstByte = readType<uint8_t>(reader, false);
- const uint8_t numExtraBytes = firstByte >> 5; // high 3 bits in firstByte
- uint64_t repr = firstByte & 0x1f; // low 5 bits in firstByte
- for (int i = 0; i < numExtraBytes; i++) {
- repr = (repr << 8) | readType<uint8_t>(reader, false);
- }
+ const uint8_t lastByte = readType<uint8_t>(reader, false);
+ invariant((lastByte & 0x7) == numExtraBytes);
+ repr = (repr << 5) | (lastByte >> 3); // fold in high 5 bits of last byte
+ return RecordId(repr);
+}
- const uint8_t lastByte = readType<uint8_t>(reader, false);
- invariant((lastByte & 0x7) == numExtraBytes);
- repr = (repr << 5) | (lastByte >> 3); // fold in high 5 bits of last byte
- return RecordId(repr);
- }
+// ----------------------------------------------------------------------
+// --------- MISC class utils --------
+// ----------------------------------------------------------------------
- // ----------------------------------------------------------------------
- // --------- MISC class utils --------
- // ----------------------------------------------------------------------
+std::string KeyString::toString() const {
+ return toHex(getBuffer(), getSize());
+}
- std::string KeyString::toString() const {
- return toHex(getBuffer(), getSize());
- }
+int KeyString::compare(const KeyString& other) const {
+ int a = getSize();
+ int b = other.getSize();
- int KeyString::compare(const KeyString& other) const {
- int a = getSize();
- int b = other.getSize();
+ int min = std::min(a, b);
- int min = std::min(a, b);
+ int cmp = memcmp(getBuffer(), other.getBuffer(), min);
- int cmp = memcmp(getBuffer(), other.getBuffer(), min);
+ if (cmp) {
+ if (cmp < 0)
+ return -1;
+ return 1;
+ }
- if (cmp) {
- if (cmp < 0)
- return -1;
- return 1;
- }
+ // keys match
- // keys match
+ if (a == b)
+ return 0;
- if (a == b)
- return 0;
+ return a < b ? -1 : 1;
+}
- return a < b ? -1 : 1;
+void KeyString::TypeBits::resetFromBuffer(BufReader* reader) {
+ if (!reader->remaining()) {
+ // This means AllZeros state was encoded as an empty buffer.
+ reset();
+ return;
}
-
- void KeyString::TypeBits::resetFromBuffer(BufReader* reader) {
- if (!reader->remaining()) {
- // This means AllZeros state was encoded as an empty buffer.
- reset();
- return;
- }
- const uint8_t firstByte = readType<uint8_t>(reader, false);
- if (firstByte & 0x80) {
- // firstByte is the size byte.
- _isAllZeros = false; // it wouldn't be encoded like this if it was.
+ const uint8_t firstByte = readType<uint8_t>(reader, false);
+ if (firstByte & 0x80) {
+ // firstByte is the size byte.
+ _isAllZeros = false; // it wouldn't be encoded like this if it was.
- _buf[0] = firstByte;
- const uint8_t remainingBytes = getSizeByte();
- memcpy(_buf + 1, reader->skip(remainingBytes), remainingBytes);
- return;
- }
+ _buf[0] = firstByte;
+ const uint8_t remainingBytes = getSizeByte();
+ memcpy(_buf + 1, reader->skip(remainingBytes), remainingBytes);
+ return;
+ }
- // In remaining cases, firstByte is the only byte.
+ // In remaining cases, firstByte is the only byte.
- if (firstByte == 0) {
- // This means AllZeros state was encoded as a single 0 byte.
- reset();
- return;
- }
-
- _isAllZeros = false;
- setSizeByte(1);
- _buf[1] = firstByte;
+ if (firstByte == 0) {
+ // This means AllZeros state was encoded as a single 0 byte.
+ reset();
+ return;
}
- void KeyString::TypeBits::appendBit(uint8_t oneOrZero) {
- dassert(oneOrZero == 0 || oneOrZero == 1);
+ _isAllZeros = false;
+ setSizeByte(1);
+ _buf[1] = firstByte;
+}
- if (oneOrZero == 1) _isAllZeros = false;
+void KeyString::TypeBits::appendBit(uint8_t oneOrZero) {
+ dassert(oneOrZero == 0 || oneOrZero == 1);
- const uint8_t byte = (_curBit / 8) + 1;
- const uint8_t offsetInByte = _curBit % 8;
- if (offsetInByte == 0) {
- setSizeByte(byte);
- _buf[byte] = oneOrZero; // zeros bits 1-7
- }
- else {
- _buf[byte] |= (oneOrZero << offsetInByte);
- }
-
- _curBit++;
+ if (oneOrZero == 1)
+ _isAllZeros = false;
+
+ const uint8_t byte = (_curBit / 8) + 1;
+ const uint8_t offsetInByte = _curBit % 8;
+ if (offsetInByte == 0) {
+ setSizeByte(byte);
+ _buf[byte] = oneOrZero; // zeros bits 1-7
+ } else {
+ _buf[byte] |= (oneOrZero << offsetInByte);
}
- uint8_t KeyString::TypeBits::Reader::readBit() {
- if (_typeBits._isAllZeros) return 0;
+ _curBit++;
+}
- const uint8_t byte = (_curBit / 8) + 1;
- const uint8_t offsetInByte = _curBit % 8;
- _curBit++;
+uint8_t KeyString::TypeBits::Reader::readBit() {
+ if (_typeBits._isAllZeros)
+ return 0;
- dassert(byte <= _typeBits.getSizeByte());
+ const uint8_t byte = (_curBit / 8) + 1;
+ const uint8_t offsetInByte = _curBit % 8;
+ _curBit++;
- return (_typeBits._buf[byte] & (1 << offsetInByte)) ? 1 : 0;
- }
+ dassert(byte <= _typeBits.getSizeByte());
+
+ return (_typeBits._buf[byte] & (1 << offsetInByte)) ? 1 : 0;
+}
-} // namespace mongo
+} // namespace mongo
diff --git a/src/mongo/db/storage/key_string.h b/src/mongo/db/storage/key_string.h
index 04b341521bf..7ef8ee0e723 100644
--- a/src/mongo/db/storage/key_string.h
+++ b/src/mongo/db/storage/key_string.h
@@ -39,287 +39,315 @@
namespace mongo {
- class KeyString {
+class KeyString {
+public:
+ /**
+ * Encodes info needed to restore the original BSONTypes from a KeyString. They cannot be
+ * stored in place since we don't want them to affect the ordering (1 and 1.0 compare as
+ * equal).
+ */
+ class TypeBits {
public:
+ // Sufficient bytes to encode extra type information for any BSON key that fits in 1KB.
+ // The encoding format will need to change if we raise this limit.
+ static const uint8_t kMaxBytesNeeded = 127;
+
+ TypeBits() {
+ reset();
+ }
/**
- * Encodes info needed to restore the original BSONTypes from a KeyString. They cannot be
- * stored in place since we don't want them to affect the ordering (1 and 1.0 compare as
- * equal).
+ * If there are no bytes remaining, assumes AllZeros. Otherwise, reads bytes out of the
+ * BufReader in the format described on the getBuffer() method.
*/
- class TypeBits {
- public:
- // Sufficient bytes to encode extra type information for any BSON key that fits in 1KB.
- // The encoding format will need to change if we raise this limit.
- static const uint8_t kMaxBytesNeeded = 127;
-
- TypeBits() { reset(); }
-
- /**
- * If there are no bytes remaining, assumes AllZeros. Otherwise, reads bytes out of the
- * BufReader in the format described on the getBuffer() method.
- */
- void resetFromBuffer(BufReader* reader);
- static TypeBits fromBuffer(BufReader* reader) {
- TypeBits out;
- out.resetFromBuffer(reader);
- return out;
- }
-
- /**
- * If true, no bits have been set to one. This is true if no bits have been set at all.
- */
- bool isAllZeros() const { return _isAllZeros; }
-
- /**
- * These methods return a buffer and size which encodes all of the type bits in this
- * instance.
- *
- * Encoded format:
- * Case 1 (first byte has high bit set to 1):
- * Remaining bits of first byte encode number of follow-up bytes that are data
- * bytes. Note that _buf is always maintained in this format but these methods may
- * return one of the other formats, if possible, by skipping over the first byte.
- *
- * Case 2 (first byte is 0x0):
- * This encodes the "AllZeros" state which represents an infinite stream of bits set
- * to 0. Callers may optionally encode this case as an empty buffer if they have
- * another way to mark the end of the buffer. There are no follow-up bytes.
- *
- * Case 3 (first byte isn't 0x0 but has high bit set to 0):
- * The first byte is the only data byte. This can represent any 7-bit sequence or an
- * 8-bit sequence if the 8th bit is 0, since the 8th bit is the same as the bit that
- * is 1 if the first byte is the size byte. There are no follow-up bytes.
- *
- * Within data bytes (ie everything excluding the size byte if there is one), bits are
- * packed in from low to high.
- */
- const uint8_t* getBuffer() const { return getSize() == 1 ? _buf + 1 : _buf; }
- size_t getSize() const {
- if (_isAllZeros) { // Case 2
- dassert(_buf[1] == 0);
- return 1;
- }
-
- uint8_t rawSize = getSizeByte();
- dassert(rawSize >= 1); // 0 should be handled as isAllZeros.
- if (rawSize == 1 && !(_buf[1] & 0x80)) { // Case 3
- return 1;
- }
-
- return rawSize + 1; // Case 1
- }
-
- //
- // Everything below is only for use by KeyString.
- //
-
- // Note: No space is used if all bits are 0 so the most common cases should be 0x0.
- static const uint8_t kString = 0x0;
- static const uint8_t kSymbol = 0x1;
-
- static const uint8_t kInt = 0x0;
- static const uint8_t kDouble = 0x1;
- static const uint8_t kLong = 0x2;
- static const uint8_t kNegativeZero = 0x3; // decodes as a double
-
- void reset() {
- _curBit = 0;
- _isAllZeros = true;
- setSizeByte(0);
- _buf[1] = 0;
- }
+ void resetFromBuffer(BufReader* reader);
+ static TypeBits fromBuffer(BufReader* reader) {
+ TypeBits out;
+ out.resetFromBuffer(reader);
+ return out;
+ }
- void appendString() { appendBit(kString); }
- void appendSymbol() { appendBit(kSymbol); }
+ /**
+ * If true, no bits have been set to one. This is true if no bits have been set at all.
+ */
+ bool isAllZeros() const {
+ return _isAllZeros;
+ }
- void appendNumberDouble() { appendBit(kDouble & 1); appendBit(kDouble >> 1); }
- void appendNumberInt() { appendBit(kInt & 1); appendBit(kInt >> 1); }
- void appendNumberLong() { appendBit(kLong & 1); appendBit(kLong >> 1); }
- void appendNegativeZero() {
- appendBit(kNegativeZero & 1);
- appendBit(kNegativeZero >> 1);
+ /**
+ * These methods return a buffer and size which encodes all of the type bits in this
+ * instance.
+ *
+ * Encoded format:
+ * Case 1 (first byte has high bit set to 1):
+ * Remaining bits of first byte encode number of follow-up bytes that are data
+ * bytes. Note that _buf is always maintained in this format but these methods may
+ * return one of the other formats, if possible, by skipping over the first byte.
+ *
+ * Case 2 (first byte is 0x0):
+ * This encodes the "AllZeros" state which represents an infinite stream of bits set
+ * to 0. Callers may optionally encode this case as an empty buffer if they have
+ * another way to mark the end of the buffer. There are no follow-up bytes.
+ *
+ * Case 3 (first byte isn't 0x0 but has high bit set to 0):
+ * The first byte is the only data byte. This can represent any 7-bit sequence or an
+ * 8-bit sequence if the 8th bit is 0, since the 8th bit is the same as the bit that
+ * is 1 if the first byte is the size byte. There are no follow-up bytes.
+ *
+ * Within data bytes (ie everything excluding the size byte if there is one), bits are
+ * packed in from low to high.
+ */
+ const uint8_t* getBuffer() const {
+ return getSize() == 1 ? _buf + 1 : _buf;
+ }
+ size_t getSize() const {
+ if (_isAllZeros) { // Case 2
+ dassert(_buf[1] == 0);
+ return 1;
}
- class Reader {
- public:
- /**
- * Passed in TypeBits must outlive this Reader instance.
- */
- explicit Reader(const TypeBits& typeBits) : _curBit(0), _typeBits(typeBits) {}
-
- uint8_t readStringLike() { return readBit(); }
- uint8_t readNumeric() {
- uint8_t lowBit = readBit();
- return lowBit | (readBit() << 1);
- }
-
- private:
- uint8_t readBit();
-
- size_t _curBit;
- const TypeBits& _typeBits;
- };
-
- private:
- /**
- * size only includes data bytes, not the size byte itself.
- */
- uint8_t getSizeByte() const { return _buf[0] & 0x3f; }
- void setSizeByte(uint8_t size) {
- dassert(size < kMaxBytesNeeded);
- _buf[0] = 0x80 | size;
+ uint8_t rawSize = getSizeByte();
+ dassert(rawSize >= 1); // 0 should be handled as isAllZeros.
+ if (rawSize == 1 && !(_buf[1] & 0x80)) { // Case 3
+ return 1;
}
- void appendBit(uint8_t oneOrZero);
-
- size_t _curBit;
- bool _isAllZeros;
+ return rawSize + 1; // Case 1
+ }
- // See getBuffer()/getSize() documentation for a description of how data is encoded.
- // Currently whole buffer is copied in default copy methods. If they ever show up as hot
- // in profiling, we should add copy operations that only copy the parts of _buf that are
- // in use.
- uint8_t _buf[1/*size*/ + kMaxBytesNeeded];
- };
+ //
+ // Everything below is only for use by KeyString.
+ //
- enum Discriminator {
- kInclusive, // Anything to be stored in an index must use this.
- kExclusiveBefore,
- kExclusiveAfter,
- };
+ // Note: No space is used if all bits are 0 so the most common cases should be 0x0.
+ static const uint8_t kString = 0x0;
+ static const uint8_t kSymbol = 0x1;
- KeyString() {}
+ static const uint8_t kInt = 0x0;
+ static const uint8_t kDouble = 0x1;
+ static const uint8_t kLong = 0x2;
+ static const uint8_t kNegativeZero = 0x3; // decodes as a double
- KeyString(const BSONObj& obj, Ordering ord, RecordId recordId) {
- resetToKey(obj, ord, recordId);
+ void reset() {
+ _curBit = 0;
+ _isAllZeros = true;
+ setSizeByte(0);
+ _buf[1] = 0;
}
- KeyString(const BSONObj& obj, Ordering ord, Discriminator discriminator = kInclusive) {
- resetToKey(obj, ord, discriminator);
+ void appendString() {
+ appendBit(kString);
+ }
+ void appendSymbol() {
+ appendBit(kSymbol);
}
- explicit KeyString(RecordId rid) {
- appendRecordId(rid);
+ void appendNumberDouble() {
+ appendBit(kDouble & 1);
+ appendBit(kDouble >> 1);
+ }
+ void appendNumberInt() {
+ appendBit(kInt & 1);
+ appendBit(kInt >> 1);
+ }
+ void appendNumberLong() {
+ appendBit(kLong & 1);
+ appendBit(kLong >> 1);
+ }
+ void appendNegativeZero() {
+ appendBit(kNegativeZero & 1);
+ appendBit(kNegativeZero >> 1);
}
- static BSONObj toBson(StringData data, Ordering ord, const TypeBits& types);
- static BSONObj toBson(const char* buffer, size_t len, Ordering ord,
- const TypeBits& types);
+ class Reader {
+ public:
+ /**
+ * Passed in TypeBits must outlive this Reader instance.
+ */
+ explicit Reader(const TypeBits& typeBits) : _curBit(0), _typeBits(typeBits) {}
- /**
- * Decodes a RecordId from the end of a buffer.
- */
- static RecordId decodeRecordIdAtEnd(const void* buf, size_t size);
+ uint8_t readStringLike() {
+ return readBit();
+ }
+ uint8_t readNumeric() {
+ uint8_t lowBit = readBit();
+ return lowBit | (readBit() << 1);
+ }
- /**
- * Decodes a RecordId, consuming all bytes needed from reader.
- */
- static RecordId decodeRecordId(BufReader* reader);
+ private:
+ uint8_t readBit();
- void appendRecordId(RecordId loc);
- void appendTypeBits(const TypeBits& bits);
+ size_t _curBit;
+ const TypeBits& _typeBits;
+ };
+ private:
/**
- * Resets to an empty state.
- * Equivalent to but faster than *this = KeyString()
+ * size only includes data bytes, not the size byte itself.
*/
- void resetToEmpty() {
- _buffer.reset();
- _typeBits.reset();
+ uint8_t getSizeByte() const {
+ return _buf[0] & 0x3f;
}
-
- void resetToKey(const BSONObj& obj, Ordering ord, RecordId recordId);
- void resetToKey(const BSONObj& obj, Ordering ord, Discriminator discriminator = kInclusive);
- void resetFromBuffer(const void* buffer, size_t size) {
- _buffer.reset();
- memcpy(_buffer.skip(size), buffer, size);
+ void setSizeByte(uint8_t size) {
+ dassert(size < kMaxBytesNeeded);
+ _buf[0] = 0x80 | size;
}
- const char* getBuffer() const { return _buffer.buf(); }
- size_t getSize() const { return _buffer.len(); }
- bool isEmpty() const { return _buffer.len() == 0; }
-
- const TypeBits& getTypeBits() const { return _typeBits; }
+ void appendBit(uint8_t oneOrZero);
- int compare(const KeyString& other) const;
+ size_t _curBit;
+ bool _isAllZeros;
- /**
- * @return a hex encoding of this key
- */
- std::string toString() const;
-
- private:
-
- void _appendAllElementsForIndexing(const BSONObj& obj, Ordering ord,
- Discriminator discriminator);
-
- void _appendBool(bool val, bool invert);
- void _appendDate(Date_t val, bool invert);
- void _appendTimestamp(Timestamp val, bool invert);
- void _appendOID(OID val, bool invert);
- void _appendString(StringData val, bool invert);
- void _appendSymbol(StringData val, bool invert);
- void _appendCode(StringData val, bool invert);
- void _appendCodeWString(const BSONCodeWScope& val, bool invert);
- void _appendBinData(const BSONBinData& val, bool invert);
- void _appendRegex(const BSONRegEx& val, bool invert);
- void _appendDBRef(const BSONDBRef& val, bool invert);
- void _appendArray(const BSONArray& val, bool invert);
- void _appendObject(const BSONObj& val, bool invert);
- void _appendNumberDouble(const double num, bool invert);
- void _appendNumberLong(const long long num, bool invert);
- void _appendNumberInt(const int num, bool invert);
+ // See getBuffer()/getSize() documentation for a description of how data is encoded.
+ // Currently whole buffer is copied in default copy methods. If they ever show up as hot
+ // in profiling, we should add copy operations that only copy the parts of _buf that are
+ // in use.
+ uint8_t _buf[1 /*size*/ + kMaxBytesNeeded];
+ };
- /**
- * @param name - optional, can be NULL
- * if NULL, not included in encoding
- * if not NULL, put in after type, before value
- */
- void _appendBsonValue(const BSONElement& elem,
- bool invert,
- const StringData* name);
-
- void _appendStringLike(StringData str, bool invert);
- void _appendBson(const BSONObj& obj, bool invert);
- void _appendSmallDouble(double value, bool invert);
- void _appendLargeDouble(double value, bool invert);
- void _appendInteger(const long long num, bool invert);
- void _appendPreshiftedIntegerPortion(uint64_t value, bool isNegative, bool invert);
-
- template <typename T> void _append(const T& thing, bool invert);
- void _appendBytes(const void* source, size_t bytes, bool invert);
-
- TypeBits _typeBits;
- StackBufBuilder _buffer;
+ enum Discriminator {
+ kInclusive, // Anything to be stored in an index must use this.
+ kExclusiveBefore,
+ kExclusiveAfter,
};
- inline bool operator<(const KeyString& lhs, const KeyString& rhs) {
- return lhs.compare(rhs) < 0;
+ KeyString() {}
+
+ KeyString(const BSONObj& obj, Ordering ord, RecordId recordId) {
+ resetToKey(obj, ord, recordId);
}
- inline bool operator<=(const KeyString& lhs, const KeyString& rhs) {
- return lhs.compare(rhs) <= 0;
+ KeyString(const BSONObj& obj, Ordering ord, Discriminator discriminator = kInclusive) {
+ resetToKey(obj, ord, discriminator);
}
- inline bool operator==(const KeyString& lhs, const KeyString& rhs) {
- return lhs.compare(rhs) == 0;
+ explicit KeyString(RecordId rid) {
+ appendRecordId(rid);
}
- inline bool operator>(const KeyString& lhs, const KeyString& rhs) {
- return lhs.compare(rhs) > 0;
+ static BSONObj toBson(StringData data, Ordering ord, const TypeBits& types);
+ static BSONObj toBson(const char* buffer, size_t len, Ordering ord, const TypeBits& types);
+
+ /**
+ * Decodes a RecordId from the end of a buffer.
+ */
+ static RecordId decodeRecordIdAtEnd(const void* buf, size_t size);
+
+ /**
+ * Decodes a RecordId, consuming all bytes needed from reader.
+ */
+ static RecordId decodeRecordId(BufReader* reader);
+
+ void appendRecordId(RecordId loc);
+ void appendTypeBits(const TypeBits& bits);
+
+ /**
+ * Resets to an empty state.
+ * Equivalent to but faster than *this = KeyString()
+ */
+ void resetToEmpty() {
+ _buffer.reset();
+ _typeBits.reset();
}
- inline bool operator>=(const KeyString& lhs, const KeyString& rhs) {
- return lhs.compare(rhs) >= 0;
+ void resetToKey(const BSONObj& obj, Ordering ord, RecordId recordId);
+ void resetToKey(const BSONObj& obj, Ordering ord, Discriminator discriminator = kInclusive);
+ void resetFromBuffer(const void* buffer, size_t size) {
+ _buffer.reset();
+ memcpy(_buffer.skip(size), buffer, size);
}
- inline bool operator!=(const KeyString& lhs, const KeyString& rhs) {
- return !(lhs == rhs);
+ const char* getBuffer() const {
+ return _buffer.buf();
+ }
+ size_t getSize() const {
+ return _buffer.len();
+ }
+ bool isEmpty() const {
+ return _buffer.len() == 0;
}
- inline std::ostream& operator<<(std::ostream& stream, const KeyString& value) {
- return stream << value.toString();
+ const TypeBits& getTypeBits() const {
+ return _typeBits;
}
-} // namespace mongo
+ int compare(const KeyString& other) const;
+
+ /**
+ * @return a hex encoding of this key
+ */
+ std::string toString() const;
+
+private:
+ void _appendAllElementsForIndexing(const BSONObj& obj,
+ Ordering ord,
+ Discriminator discriminator);
+
+ void _appendBool(bool val, bool invert);
+ void _appendDate(Date_t val, bool invert);
+ void _appendTimestamp(Timestamp val, bool invert);
+ void _appendOID(OID val, bool invert);
+ void _appendString(StringData val, bool invert);
+ void _appendSymbol(StringData val, bool invert);
+ void _appendCode(StringData val, bool invert);
+ void _appendCodeWString(const BSONCodeWScope& val, bool invert);
+ void _appendBinData(const BSONBinData& val, bool invert);
+ void _appendRegex(const BSONRegEx& val, bool invert);
+ void _appendDBRef(const BSONDBRef& val, bool invert);
+ void _appendArray(const BSONArray& val, bool invert);
+ void _appendObject(const BSONObj& val, bool invert);
+ void _appendNumberDouble(const double num, bool invert);
+ void _appendNumberLong(const long long num, bool invert);
+ void _appendNumberInt(const int num, bool invert);
+
+ /**
+ * @param name - optional, can be NULL
+ * if NULL, not included in encoding
+ * if not NULL, put in after type, before value
+ */
+ void _appendBsonValue(const BSONElement& elem, bool invert, const StringData* name);
+
+ void _appendStringLike(StringData str, bool invert);
+ void _appendBson(const BSONObj& obj, bool invert);
+ void _appendSmallDouble(double value, bool invert);
+ void _appendLargeDouble(double value, bool invert);
+ void _appendInteger(const long long num, bool invert);
+ void _appendPreshiftedIntegerPortion(uint64_t value, bool isNegative, bool invert);
+
+ template <typename T>
+ void _append(const T& thing, bool invert);
+ void _appendBytes(const void* source, size_t bytes, bool invert);
+
+ TypeBits _typeBits;
+ StackBufBuilder _buffer;
+};
+
+inline bool operator<(const KeyString& lhs, const KeyString& rhs) {
+ return lhs.compare(rhs) < 0;
+}
+
+inline bool operator<=(const KeyString& lhs, const KeyString& rhs) {
+ return lhs.compare(rhs) <= 0;
+}
+
+inline bool operator==(const KeyString& lhs, const KeyString& rhs) {
+ return lhs.compare(rhs) == 0;
+}
+
+inline bool operator>(const KeyString& lhs, const KeyString& rhs) {
+ return lhs.compare(rhs) > 0;
+}
+
+inline bool operator>=(const KeyString& lhs, const KeyString& rhs) {
+ return lhs.compare(rhs) >= 0;
+}
+
+inline bool operator!=(const KeyString& lhs, const KeyString& rhs) {
+ return !(lhs == rhs);
+}
+
+inline std::ostream& operator<<(std::ostream& stream, const KeyString& value) {
+ return stream << value.toString();
+}
+
+} // namespace mongo
diff --git a/src/mongo/db/storage/key_string_test.cpp b/src/mongo/db/storage/key_string_test.cpp
index e085be8fbb9..082d01366a9 100644
--- a/src/mongo/db/storage/key_string_test.cpp
+++ b/src/mongo/db/storage/key_string_test.cpp
@@ -61,61 +61,60 @@ TEST(KeyStringTest, Simple1) {
KeyString(b, ALL_ASCENDING, RecordId()));
}
-#define ROUNDTRIP_ORDER(x, order) do { \
- const BSONObj _orig = x; \
- const KeyString _ks(_orig, order); \
- const BSONObj _converted = toBson(_ks, order); \
- ASSERT_EQ(_converted, _orig); \
- ASSERT(_converted.binaryEqual(_orig)); \
+#define ROUNDTRIP_ORDER(x, order) \
+ do { \
+ const BSONObj _orig = x; \
+ const KeyString _ks(_orig, order); \
+ const BSONObj _converted = toBson(_ks, order); \
+ ASSERT_EQ(_converted, _orig); \
+ ASSERT(_converted.binaryEqual(_orig)); \
} while (0)
-#define ROUNDTRIP(x) do { \
- ROUNDTRIP_ORDER(x, ALL_ASCENDING); \
- ROUNDTRIP_ORDER(x, ONE_DESCENDING); \
+#define ROUNDTRIP(x) \
+ do { \
+ ROUNDTRIP_ORDER(x, ALL_ASCENDING); \
+ ROUNDTRIP_ORDER(x, ONE_DESCENDING); \
} while (0)
-#define COMPARES_SAME(_x,_y) do { \
- KeyString _xKS(_x, ONE_ASCENDING); \
- KeyString _yKS(_y, ONE_ASCENDING); \
- if (_x == _y) { \
- ASSERT_EQUALS(_xKS, _yKS); \
- } \
- else if (_x < _y) { \
- ASSERT_LESS_THAN(_xKS, _yKS); \
- } \
- else { \
- ASSERT_LESS_THAN(_yKS, _xKS); \
- } \
- \
- _xKS.resetToKey(_x, ONE_DESCENDING); \
- _yKS.resetToKey(_y, ONE_DESCENDING); \
- if (_x == _y) { \
- ASSERT_EQUALS(_xKS, _yKS); \
- } \
- else if (_x < _y) { \
- ASSERT_GREATER_THAN(_xKS, _yKS); \
- } \
- else { \
- ASSERT_GREATER_THAN(_yKS, _xKS); \
- } \
+#define COMPARES_SAME(_x, _y) \
+ do { \
+ KeyString _xKS(_x, ONE_ASCENDING); \
+ KeyString _yKS(_y, ONE_ASCENDING); \
+ if (_x == _y) { \
+ ASSERT_EQUALS(_xKS, _yKS); \
+ } else if (_x < _y) { \
+ ASSERT_LESS_THAN(_xKS, _yKS); \
+ } else { \
+ ASSERT_LESS_THAN(_yKS, _xKS); \
+ } \
+ \
+ _xKS.resetToKey(_x, ONE_DESCENDING); \
+ _yKS.resetToKey(_y, ONE_DESCENDING); \
+ if (_x == _y) { \
+ ASSERT_EQUALS(_xKS, _yKS); \
+ } else if (_x < _y) { \
+ ASSERT_GREATER_THAN(_xKS, _yKS); \
+ } else { \
+ ASSERT_GREATER_THAN(_yKS, _xKS); \
+ } \
} while (0)
TEST(KeyStringTest, ActualBytesDouble) {
// just one test like this for utter sanity
- BSONObj a = BSON("" << 5.5 );
+ BSONObj a = BSON("" << 5.5);
KeyString ks(a, ALL_ASCENDING);
log() << "size: " << ks.getSize() << " hex [" << toHex(ks.getBuffer(), ks.getSize()) << "]";
ASSERT_EQUALS(10U, ks.getSize());
- string hex = "2B" // kNumericPositive1ByteInt
- "0B" // (5 << 1) | 1
- "02000000000000" // fractional bytes of double
- "04"; // kEnd
+ string hex =
+ "2B" // kNumericPositive1ByteInt
+ "0B" // (5 << 1) | 1
+ "02000000000000" // fractional bytes of double
+ "04"; // kEnd
- ASSERT_EQUALS(hex,
- toHex(ks.getBuffer(), ks.getSize()));
+ ASSERT_EQUALS(hex, toHex(ks.getBuffer(), ks.getSize()));
ks.resetToKey(a, Ordering::make(BSON("a" << -1)));
@@ -124,23 +123,23 @@ TEST(KeyStringTest, ActualBytesDouble) {
// last byte (kEnd) doesn't get flipped
string hexFlipped;
- for ( size_t i = 0; i < hex.size()-2; i += 2 ) {
+ for (size_t i = 0; i < hex.size() - 2; i += 2) {
char c = fromHex(hex.c_str() + i);
c = ~c;
hexFlipped += toHex(&c, 1);
}
- hexFlipped += hex.substr(hex.size()-2);
+ hexFlipped += hex.substr(hex.size() - 2);
- ASSERT_EQUALS(hexFlipped,
- toHex(ks.getBuffer(), ks.getSize()));
+ ASSERT_EQUALS(hexFlipped, toHex(ks.getBuffer(), ks.getSize()));
}
TEST(KeyStringTest, AllTypesSimple) {
ROUNDTRIP(BSON("" << 5.5));
- ROUNDTRIP(BSON("" << "abc"));
+ ROUNDTRIP(BSON(""
+ << "abc"));
ROUNDTRIP(BSON("" << BSON("a" << 5)));
ROUNDTRIP(BSON("" << BSON_ARRAY("a" << 5)));
- ROUNDTRIP(BSON("" << BSONBinData( "abc", 3, bdtCustom )));
+ ROUNDTRIP(BSON("" << BSONBinData("abc", 3, bdtCustom)));
ROUNDTRIP(BSON("" << BSONUndefined));
ROUNDTRIP(BSON("" << OID("abcdefabcdefabcdefabcdef")));
ROUNDTRIP(BSON("" << true));
@@ -148,7 +147,9 @@ TEST(KeyStringTest, AllTypesSimple) {
ROUNDTRIP(BSON("" << BSONRegEx("asdf", "x")));
ROUNDTRIP(BSON("" << BSONDBRef("db.c", OID("010203040506070809101112"))));
ROUNDTRIP(BSON("" << BSONCode("abc_code")));
- ROUNDTRIP(BSON("" << BSONCodeWScope("def_code", BSON("x_scope" << "a"))));
+ ROUNDTRIP(BSON("" << BSONCodeWScope("def_code",
+ BSON("x_scope"
+ << "a"))));
ROUNDTRIP(BSON("" << 5));
ROUNDTRIP(BSON("" << Timestamp(123123, 123)));
ROUNDTRIP(BSON("" << 1235123123123LL));
@@ -176,26 +177,27 @@ TEST(KeyStringTest, Array1) {
KeyString b(emptyArray, ALL_ASCENDING, RecordId(5));
ASSERT_LESS_THAN(a, b);
}
-
}
TEST(KeyStringTest, SubDoc1) {
ROUNDTRIP(BSON("" << BSON("foo" << 2)));
- ROUNDTRIP(BSON("" << BSON("foo" << 2 << "bar" << "asd")));
+ ROUNDTRIP(BSON("" << BSON("foo" << 2 << "bar"
+ << "asd")));
ROUNDTRIP(BSON("" << BSON("foo" << BSON_ARRAY(2 << 4))));
}
TEST(KeyStringTest, SubDoc2) {
- BSONObj a = BSON("" << BSON("a" << "foo"));
+ BSONObj a = BSON("" << BSON("a"
+ << "foo"));
BSONObj b = BSON("" << BSON("b" << 5.5));
BSONObj c = BSON("" << BSON("c" << BSON("x" << 5)));
ROUNDTRIP(a);
ROUNDTRIP(b);
ROUNDTRIP(c);
- COMPARES_SAME(a,b);
- COMPARES_SAME(a,c);
- COMPARES_SAME(b,c);
+ COMPARES_SAME(a, b);
+ COMPARES_SAME(a, c);
+ COMPARES_SAME(b, c);
}
@@ -248,7 +250,6 @@ TEST(KeyStringTest, LotsOfNumbers1) {
ROUNDTRIP(BSON("" << -(static_cast<int>(x) + 1)));
ROUNDTRIP(BSON("" << -(static_cast<double>(x) + 1)));
ROUNDTRIP(BSON("" << -(static_cast<double>(x) + 1.1)));
-
}
}
@@ -264,7 +265,6 @@ TEST(KeyStringTest, LotsOfNumbers2) {
}
TEST(KeyStringTest, RecordIdOrder1) {
-
Ordering ordering = Ordering::make(BSON("a" << 1));
KeyString a(BSON("" << 5), ordering, RecordId::min());
@@ -277,11 +277,9 @@ TEST(KeyStringTest, RecordIdOrder1) {
ASSERT_LESS_THAN(b, c);
ASSERT_LESS_THAN(c, d);
ASSERT_LESS_THAN(d, e);
-
}
TEST(KeyStringTest, RecordIdOrder2) {
-
Ordering ordering = Ordering::make(BSON("a" << -1 << "b" << -1));
KeyString a(BSON("" << 5 << "" << 6), ordering, RecordId::min());
@@ -298,7 +296,6 @@ TEST(KeyStringTest, RecordIdOrder2) {
}
TEST(KeyStringTest, RecordIdOrder2Double) {
-
Ordering ordering = Ordering::make(BSON("a" << -1 << "b" << -1));
KeyString a(BSON("" << 5.0 << "" << 6.0), ordering, RecordId::min());
@@ -311,7 +308,6 @@ TEST(KeyStringTest, RecordIdOrder2Double) {
}
TEST(KeyStringTest, Timestamp) {
-
BSONObj a = BSON("" << Timestamp(0, 0));
BSONObj b = BSON("" << Timestamp(1234, 1));
BSONObj c = BSON("" << Timestamp(1234, 2));
@@ -356,20 +352,19 @@ TEST(KeyStringTest, Timestamp) {
ASSERT(kb.compare(kc) > 0);
ASSERT(kc.compare(kd) > 0);
}
-
}
TEST(KeyStringTest, AllTypesRoundtrip) {
- for ( int i = 1; i <= JSTypeMax; i++ ) {
+ for (int i = 1; i <= JSTypeMax; i++) {
{
BSONObjBuilder b;
- b.appendMinForType("", i );
+ b.appendMinForType("", i);
BSONObj o = b.obj();
ROUNDTRIP(o);
}
{
BSONObjBuilder b;
- b.appendMaxForType("", i );
+ b.appendMaxForType("", i);
BSONObj o = b.obj();
ROUNDTRIP(o);
}
@@ -382,7 +377,7 @@ const std::vector<BSONObj>& getInterestingElements() {
if (!elements.empty()) {
return elements;
}
-
+
// These are used to test strings that include NUL bytes.
const StringData ball("ball", StringData::LiteralTag());
const StringData ball00n("ball\0\0n", StringData::LiteralTag());
@@ -407,8 +402,10 @@ const std::vector<BSONObj>& getInterestingElements() {
elements.push_back(BSON("" << -2.2));
elements.push_back(BSON("" << -12312312.2123123123123));
elements.push_back(BSON("" << 12312312.2123123123123));
- elements.push_back(BSON("" << "aaa"));
- elements.push_back(BSON("" << "AAA"));
+ elements.push_back(BSON(""
+ << "aaa"));
+ elements.push_back(BSON(""
+ << "AAA"));
elements.push_back(BSON("" << ball));
elements.push_back(BSON("" << ball00n));
elements.push_back(BSON("" << BSONSymbol(ball)));
@@ -424,22 +421,22 @@ const std::vector<BSONObj>& getInterestingElements() {
elements.push_back(BSON("" << BSONCode("abc_code")));
elements.push_back(BSON("" << BSONCode(ball)));
elements.push_back(BSON("" << BSONCode(ball00n)));
- elements.push_back(BSON("" << BSONCodeWScope("def_code1", BSON("x_scope" << "a"))));
- elements.push_back(BSON("" << BSONCodeWScope("def_code2", BSON("x_scope" << "a"))));
- elements.push_back(BSON("" << BSONCodeWScope("def_code2", BSON("x_scope" << "b"))));
+ elements.push_back(BSON("" << BSONCodeWScope("def_code1",
+ BSON("x_scope"
+ << "a"))));
+ elements.push_back(BSON("" << BSONCodeWScope("def_code2",
+ BSON("x_scope"
+ << "a"))));
+ elements.push_back(BSON("" << BSONCodeWScope("def_code2",
+ BSON("x_scope"
+ << "b"))));
elements.push_back(BSON("" << BSONCodeWScope(ball, BSON("a" << 1))));
elements.push_back(BSON("" << BSONCodeWScope(ball00n, BSON("a" << 1))));
elements.push_back(BSON("" << true));
elements.push_back(BSON("" << false));
// Something that needs multiple bytes of typeBits
- elements.push_back(BSON("" << BSON_ARRAY(""
- << BSONSymbol("")
- << 0
- << 0ll
- << 0.0
- << -0.0
- )));
+ elements.push_back(BSON("" << BSON_ARRAY("" << BSONSymbol("") << 0 << 0ll << 0.0 << -0.0)));
//
// Interesting numeric cases
@@ -488,12 +485,12 @@ const std::vector<BSONObj>& getInterestingElements() {
elements.push_back(BSON("" << (-lNum + 1)));
elements.push_back(BSON("" << (-lNum - 1)));
- if (powerOfTwo <= 52) { // is dNum - 0.5 representable?
+ if (powerOfTwo <= 52) { // is dNum - 0.5 representable?
elements.push_back(BSON("" << (dNum - 0.5)));
elements.push_back(BSON("" << -(dNum - 0.5)));
}
- if (powerOfTwo <= 51) { // is dNum + 0.5 representable?
+ if (powerOfTwo <= 51) { // is dNum + 0.5 representable?
elements.push_back(BSON("" << (dNum + 0.5)));
elements.push_back(BSON("" << -(dNum + 0.5)));
}
@@ -503,8 +500,8 @@ const std::vector<BSONObj>& getInterestingElements() {
// Numbers around +/- numeric_limits<long long>::max() which can't be represented
// precisely as a double.
const long long maxLL = std::numeric_limits<long long>::max();
- const double closestAbove = 9223372036854775808.0; // 2**63
- const double closestBelow = 9223372036854774784.0; // 2**63 - epsilon
+ const double closestAbove = 9223372036854775808.0; // 2**63
+ const double closestBelow = 9223372036854774784.0; // 2**63 - epsilon
elements.push_back(BSON("" << maxLL));
elements.push_back(BSON("" << (maxLL - 1)));
@@ -521,9 +518,9 @@ const std::vector<BSONObj>& getInterestingElements() {
// Numbers around numeric_limits<long long>::min() which can be represented precisely as
// a double, but not as a positive long long.
const long long minLL = std::numeric_limits<long long>::min();
- const double closestBelow = -9223372036854777856.0; // -2**63 - epsilon
- const double equal = -9223372036854775808.0; // 2**63
- const double closestAbove = -9223372036854774784.0; // -2**63 + epsilon
+ const double closestBelow = -9223372036854777856.0; // -2**63 - epsilon
+ const double equal = -9223372036854775808.0; // 2**63
+ const double closestAbove = -9223372036854774784.0; // -2**63 + epsilon
elements.push_back(BSON("" << minLL));
elements.push_back(BSON("" << equal));
@@ -537,44 +534,45 @@ const std::vector<BSONObj>& getInterestingElements() {
void testPermutation(const std::vector<BSONObj>& elementsOrig,
const std::vector<BSONObj>& orderings,
bool debug) {
-
// Since KeyStrings are compared using memcmp we can assume it provides a total ordering such
// that there won't be cases where (a < b && b < c && !(a < c)). This test still needs to ensure
// that it provides the *correct* total ordering.
for (size_t k = 0; k < orderings.size(); k++) {
BSONObj orderObj = orderings[k];
Ordering ordering = Ordering::make(orderObj);
- if (debug) log() << "ordering: " << orderObj;
+ if (debug)
+ log() << "ordering: " << orderObj;
std::vector<BSONObj> elements = elementsOrig;
std::stable_sort(elements.begin(), elements.end(), BSONObjCmp(orderObj));
for (size_t i = 0; i < elements.size(); i++) {
const BSONObj& o1 = elements[i];
- if (debug) log() << "\to1: " << o1;
+ if (debug)
+ log() << "\to1: " << o1;
ROUNDTRIP_ORDER(o1, ordering);
KeyString k1(o1, ordering);
- KeyString l1(BSON("l" << o1.firstElement()), ordering); // kLess
- KeyString g1(BSON("g" << o1.firstElement()), ordering); // kGreater
+ KeyString l1(BSON("l" << o1.firstElement()), ordering); // kLess
+ KeyString g1(BSON("g" << o1.firstElement()), ordering); // kGreater
ASSERT_LT(l1, k1);
ASSERT_GT(g1, k1);
if (i + 1 < elements.size()) {
const BSONObj& o2 = elements[i + 1];
- if (debug) log() << "\t\t o2: " << o2;
+ if (debug)
+ log() << "\t\t o2: " << o2;
KeyString k2(o2, ordering);
KeyString g2(BSON("g" << o2.firstElement()), ordering);
KeyString l2(BSON("l" << o2.firstElement()), ordering);
int bsonCmp = o1.woCompare(o2, ordering);
- invariant(bsonCmp <= 0); // We should be sorted...
+ invariant(bsonCmp <= 0); // We should be sorted...
if (bsonCmp == 0) {
ASSERT_EQ(k1, k2);
- }
- else {
+ } else {
ASSERT_LT(k1, k2);
}
@@ -592,8 +590,7 @@ void testPermutation(const std::vector<BSONObj>& elementsOrig,
ASSERT_EQ(g1, g2);
ASSERT_LT(l1, k2);
ASSERT_GT(g1, k2);
- }
- else {
+ } else {
// k1 is less than k2. Less(k2) and Greater(k1) should be between them.
ASSERT_LT(g1, k2);
ASSERT_GT(l2, k1);
@@ -619,7 +616,7 @@ TEST(KeyStringTest, AllPermCompare) {
}
TEST(KeyStringTest, AllPerm2Compare) {
- // This test can take over a minute without optimizations. Re-enable if you need to debug it.
+// This test can take over a minute without optimizations. Re-enable if you need to debug it.
#if !defined(MONGO_CONFIG_OPTIMIZED_BUILD)
log() << "\t\t\tskipping test on non-optimized build";
return;
@@ -654,13 +651,12 @@ TEST(KeyStringTest, AllPerm2Compare) {
testPermutation(elements, orderings, false);
}
-#define COMPARE_HELPER(LHS, RHS) \
- (((LHS) < (RHS)) ? -1 : (((LHS) == (RHS)) ? 0 : 1))
+#define COMPARE_HELPER(LHS, RHS) (((LHS) < (RHS)) ? -1 : (((LHS) == (RHS)) ? 0 : 1))
int compareLongToDouble(long long lhs, double rhs) {
if (rhs >= std::numeric_limits<long long>::max())
return -1;
- if (rhs < std::numeric_limits<long long>::min() )
+ if (rhs < std::numeric_limits<long long>::min())
return 1;
if (fabs(rhs) >= (1LL << 52)) {
@@ -670,7 +666,7 @@ int compareLongToDouble(long long lhs, double rhs) {
return COMPARE_HELPER(static_cast<double>(lhs), rhs);
}
-int compareNumbers(const BSONElement& lhs, const BSONElement& rhs ) {
+int compareNumbers(const BSONElement& lhs, const BSONElement& rhs) {
invariant(lhs.isNumber());
invariant(rhs.isNumber());
@@ -679,8 +675,7 @@ int compareNumbers(const BSONElement& lhs, const BSONElement& rhs ) {
return COMPARE_HELPER(lhs.numberLong(), rhs.numberLong());
}
return compareLongToDouble(lhs.numberLong(), rhs.Double());
- }
- else { // double
+ } else { // double
if (rhs.type() == NumberDouble) {
return COMPARE_HELPER(lhs.Double(), rhs.Double());
}
@@ -769,15 +764,13 @@ TEST(KeyStringTest, NumberOrderLots) {
const KeyString& b = *keyStrings[j];
ASSERT_EQUALS(a.compare(b), -b.compare(a));
- if (a.compare(b) != compareNumbers(numbers[i].firstElement(),
- numbers[j].firstElement())) {
+ if (a.compare(b) !=
+ compareNumbers(numbers[i].firstElement(), numbers[j].firstElement())) {
log() << numbers[i] << " " << numbers[j];
}
ASSERT_EQUALS(a.compare(b),
- compareNumbers(numbers[i].firstElement(),
- numbers[j].firstElement()));
-
+ compareNumbers(numbers[i].firstElement(), numbers[j].firstElement()));
}
}
}
@@ -786,7 +779,7 @@ TEST(KeyStringTest, RecordIds) {
for (int i = 0; i < 63; i++) {
const RecordId rid = RecordId(1ll << i);
- { // Test encoding / decoding of single RecordIds
+ { // Test encoding / decoding of single RecordIds
const KeyString ks(rid);
ASSERT_GTE(ks.getSize(), 2u);
ASSERT_LTE(ks.getSize(), 10u);
@@ -812,18 +805,21 @@ TEST(KeyStringTest, RecordIds) {
for (int j = 0; j < 63; j++) {
RecordId other = RecordId(1ll << j);
- if (rid == other) ASSERT_EQ(KeyString(rid), KeyString(other));
- if (rid < other) ASSERT_LT(KeyString(rid), KeyString(other));
- if (rid > other) ASSERT_GT(KeyString(rid), KeyString(other));
+ if (rid == other)
+ ASSERT_EQ(KeyString(rid), KeyString(other));
+ if (rid < other)
+ ASSERT_LT(KeyString(rid), KeyString(other));
+ if (rid > other)
+ ASSERT_GT(KeyString(rid), KeyString(other));
{
// Test concatenating RecordIds like in a unique index.
KeyString ks;
- ks.appendRecordId(RecordId::max()); // uses all bytes
+ ks.appendRecordId(RecordId::max()); // uses all bytes
ks.appendRecordId(rid);
- ks.appendRecordId(RecordId(0xDEADBEEF)); // uses some extra bytes
+ ks.appendRecordId(RecordId(0xDEADBEEF)); // uses some extra bytes
ks.appendRecordId(rid);
- ks.appendRecordId(RecordId(1)); // uses no extra bytes
+ ks.appendRecordId(RecordId(1)); // uses no extra bytes
ks.appendRecordId(rid);
ks.appendRecordId(other);
@@ -843,4 +839,3 @@ TEST(KeyStringTest, RecordIds) {
}
}
}
-
diff --git a/src/mongo/db/storage/kv/kv_catalog.cpp b/src/mongo/db/storage/kv/kv_catalog.cpp
index b24cc705226..df0a39faeee 100644
--- a/src/mongo/db/storage/kv/kv_catalog.cpp
+++ b/src/mongo/db/storage/kv/kv_catalog.cpp
@@ -45,398 +45,367 @@
namespace mongo {
namespace {
- // This is a global resource, which protects accesses to the catalog metadata (instance-wide).
- // It is never used with KVEngines that support doc-level locking so this should never conflict
- // with anything else.
- //
- // NOTE: Must be locked *before* _identLock.
- const ResourceId resourceIdCatalogMetadata(RESOURCE_METADATA, 1ULL);
+// This is a global resource, which protects accesses to the catalog metadata (instance-wide).
+// It is never used with KVEngines that support doc-level locking so this should never conflict
+// with anything else.
+//
+// NOTE: Must be locked *before* _identLock.
+const ResourceId resourceIdCatalogMetadata(RESOURCE_METADATA, 1ULL);
}
- using std::unique_ptr;
- using std::string;
+using std::unique_ptr;
+using std::string;
- class KVCatalog::AddIdentChange : public RecoveryUnit::Change {
- public:
- AddIdentChange(KVCatalog* catalog, StringData ident)
- :_catalog(catalog), _ident(ident.toString())
- {}
+class KVCatalog::AddIdentChange : public RecoveryUnit::Change {
+public:
+ AddIdentChange(KVCatalog* catalog, StringData ident)
+ : _catalog(catalog), _ident(ident.toString()) {}
- virtual void commit() {}
- virtual void rollback() {
- stdx::lock_guard<stdx::mutex> lk(_catalog->_identsLock);
- _catalog->_idents.erase(_ident);
- }
+ virtual void commit() {}
+ virtual void rollback() {
+ stdx::lock_guard<stdx::mutex> lk(_catalog->_identsLock);
+ _catalog->_idents.erase(_ident);
+ }
- KVCatalog* const _catalog;
- const std::string _ident;
- };
+ KVCatalog* const _catalog;
+ const std::string _ident;
+};
- class KVCatalog::RemoveIdentChange : public RecoveryUnit::Change {
- public:
- RemoveIdentChange(KVCatalog* catalog, StringData ident, const Entry& entry)
- :_catalog(catalog), _ident(ident.toString()), _entry(entry)
- {}
+class KVCatalog::RemoveIdentChange : public RecoveryUnit::Change {
+public:
+ RemoveIdentChange(KVCatalog* catalog, StringData ident, const Entry& entry)
+ : _catalog(catalog), _ident(ident.toString()), _entry(entry) {}
- virtual void commit() {}
- virtual void rollback() {
- stdx::lock_guard<stdx::mutex> lk(_catalog->_identsLock);
- _catalog->_idents[_ident] = _entry;
- }
+ virtual void commit() {}
+ virtual void rollback() {
+ stdx::lock_guard<stdx::mutex> lk(_catalog->_identsLock);
+ _catalog->_idents[_ident] = _entry;
+ }
+
+ KVCatalog* const _catalog;
+ const std::string _ident;
+ const Entry _entry;
+};
+
+KVCatalog::KVCatalog(RecordStore* rs,
+ bool isRsThreadSafe,
+ bool directoryPerDb,
+ bool directoryForIndexes)
+ : _rs(rs),
+ _isRsThreadSafe(isRsThreadSafe),
+ _directoryPerDb(directoryPerDb),
+ _directoryForIndexes(directoryForIndexes),
+ _rand(_newRand()) {}
+
+KVCatalog::~KVCatalog() {
+ _rs = NULL;
+}
+
+std::string KVCatalog::_newRand() {
+ return str::stream() << std::unique_ptr<SecureRandom>(SecureRandom::create())->nextInt64();
+}
- KVCatalog* const _catalog;
- const std::string _ident;
- const Entry _entry;
- };
-
- KVCatalog::KVCatalog( RecordStore* rs,
- bool isRsThreadSafe,
- bool directoryPerDb,
- bool directoryForIndexes )
- : _rs( rs )
- , _isRsThreadSafe(isRsThreadSafe)
- , _directoryPerDb(directoryPerDb)
- , _directoryForIndexes(directoryForIndexes)
- , _rand(_newRand())
- {}
-
- KVCatalog::~KVCatalog() {
- _rs = NULL;
+bool KVCatalog::_hasEntryCollidingWithRand() const {
+ // Only called from init() so don't need to lock.
+ for (NSToIdentMap::const_iterator it = _idents.begin(); it != _idents.end(); ++it) {
+ if (StringData(it->first).endsWith(_rand))
+ return true;
}
+ return false;
+}
- std::string KVCatalog::_newRand() {
- return str::stream()
- << std::unique_ptr<SecureRandom>(SecureRandom::create())->nextInt64();
+std::string KVCatalog::_newUniqueIdent(StringData ns, const char* kind) {
+ // If this changes to not put _rand at the end, _hasEntryCollidingWithRand will need fixing.
+ StringBuilder buf;
+ if (_directoryPerDb) {
+ buf << NamespaceString::escapeDbName(nsToDatabaseSubstring(ns)) << '/';
}
+ buf << kind;
+ buf << (_directoryForIndexes ? '/' : '-');
+ buf << _next.fetchAndAdd(1) << '-' << _rand;
+ return buf.str();
+}
- bool KVCatalog::_hasEntryCollidingWithRand() const {
- // Only called from init() so don't need to lock.
- for (NSToIdentMap::const_iterator it = _idents.begin(); it != _idents.end(); ++it) {
- if (StringData(it->first).endsWith(_rand))
- return true;
- }
- return false;
+void KVCatalog::init(OperationContext* opCtx) {
+ // No locking needed since called single threaded.
+ auto cursor = _rs->getCursor(opCtx);
+ while (auto record = cursor->next()) {
+ BSONObj obj = record->data.releaseToBson();
+
+ // No rollback since this is just loading already committed data.
+ string ns = obj["ns"].String();
+ string ident = obj["ident"].String();
+ _idents[ns] = Entry(ident, record->id);
}
- std::string KVCatalog::_newUniqueIdent(StringData ns, const char* kind) {
- // If this changes to not put _rand at the end, _hasEntryCollidingWithRand will need fixing.
- StringBuilder buf;
- if ( _directoryPerDb ) {
- buf << NamespaceString::escapeDbName( nsToDatabaseSubstring( ns ) ) << '/';
- }
- buf << kind;
- buf << ( _directoryForIndexes ? '/' : '-' );
- buf << _next.fetchAndAdd(1) << '-' << _rand;
- return buf.str();
+ // In the unlikely event that we have used this _rand before generate a new one.
+ while (_hasEntryCollidingWithRand()) {
+ _rand = _newRand();
}
+}
- void KVCatalog::init( OperationContext* opCtx ) {
- // No locking needed since called single threaded.
- auto cursor = _rs->getCursor(opCtx);
- while (auto record = cursor->next()) {
- BSONObj obj = record->data.releaseToBson();
+void KVCatalog::getAllCollections(std::vector<std::string>* out) const {
+ stdx::lock_guard<stdx::mutex> lk(_identsLock);
+ for (NSToIdentMap::const_iterator it = _idents.begin(); it != _idents.end(); ++it) {
+ out->push_back(it->first);
+ }
+}
- // No rollback since this is just loading already committed data.
- string ns = obj["ns"].String();
- string ident = obj["ident"].String();
- _idents[ns] = Entry(ident, record->id);
- }
+Status KVCatalog::newCollection(OperationContext* opCtx,
+ StringData ns,
+ const CollectionOptions& options) {
+ invariant(opCtx->lockState() == NULL ||
+ opCtx->lockState()->isDbLockedForMode(nsToDatabaseSubstring(ns), MODE_X));
- // In the unlikely event that we have used this _rand before generate a new one.
- while (_hasEntryCollidingWithRand()) {
- _rand = _newRand();
- }
+ std::unique_ptr<Lock::ResourceLock> rLk;
+ if (!_isRsThreadSafe && opCtx->lockState()) {
+ rLk.reset(new Lock::ResourceLock(opCtx->lockState(), resourceIdCatalogMetadata, MODE_X));
}
- void KVCatalog::getAllCollections( std::vector<std::string>* out ) const {
- stdx::lock_guard<stdx::mutex> lk( _identsLock );
- for ( NSToIdentMap::const_iterator it = _idents.begin(); it != _idents.end(); ++it ) {
- out->push_back( it->first );
- }
+ const string ident = _newUniqueIdent(ns, "collection");
+
+ stdx::lock_guard<stdx::mutex> lk(_identsLock);
+ Entry& old = _idents[ns.toString()];
+ if (!old.ident.empty()) {
+ return Status(ErrorCodes::NamespaceExists, "collection already exists");
}
- Status KVCatalog::newCollection( OperationContext* opCtx,
- StringData ns,
- const CollectionOptions& options ) {
- invariant( opCtx->lockState() == NULL ||
- opCtx->lockState()->isDbLockedForMode( nsToDatabaseSubstring(ns), MODE_X ) );
-
- std::unique_ptr<Lock::ResourceLock> rLk;
- if (!_isRsThreadSafe && opCtx->lockState()) {
- rLk.reset(new Lock::ResourceLock(opCtx->lockState(),
- resourceIdCatalogMetadata,
- MODE_X));
- }
+ opCtx->recoveryUnit()->registerChange(new AddIdentChange(this, ns));
- const string ident = _newUniqueIdent(ns, "collection");
+ BSONObj obj;
+ {
+ BSONObjBuilder b;
+ b.append("ns", ns);
+ b.append("ident", ident);
+ BSONCollectionCatalogEntry::MetaData md;
+ md.ns = ns.toString();
+ md.options = options;
+ b.append("md", md.toBSON());
+ obj = b.obj();
+ }
- stdx::lock_guard<stdx::mutex> lk( _identsLock );
- Entry& old = _idents[ns.toString()];
- if ( !old.ident.empty() ) {
- return Status( ErrorCodes::NamespaceExists, "collection already exists" );
- }
+ StatusWith<RecordId> res = _rs->insertRecord(opCtx, obj.objdata(), obj.objsize(), false);
+ if (!res.isOK())
+ return res.getStatus();
- opCtx->recoveryUnit()->registerChange(new AddIdentChange(this, ns));
-
- BSONObj obj;
- {
- BSONObjBuilder b;
- b.append( "ns", ns );
- b.append( "ident", ident );
- BSONCollectionCatalogEntry::MetaData md;
- md.ns = ns.toString();
- md.options = options;
- b.append( "md", md.toBSON() );
- obj = b.obj();
- }
+ old = Entry(ident, res.getValue());
+ LOG(1) << "stored meta data for " << ns << " @ " << res.getValue();
+ return Status::OK();
+}
- StatusWith<RecordId> res = _rs->insertRecord( opCtx, obj.objdata(), obj.objsize(), false );
- if ( !res.isOK() )
- return res.getStatus();
+std::string KVCatalog::getCollectionIdent(StringData ns) const {
+ stdx::lock_guard<stdx::mutex> lk(_identsLock);
+ NSToIdentMap::const_iterator it = _idents.find(ns.toString());
+ invariant(it != _idents.end());
+ return it->second.ident;
+}
- old = Entry( ident, res.getValue() );
- LOG(1) << "stored meta data for " << ns << " @ " << res.getValue();
- return Status::OK();
- }
+std::string KVCatalog::getIndexIdent(OperationContext* opCtx,
+ StringData ns,
+ StringData idxName) const {
+ BSONObj obj = _findEntry(opCtx, ns);
+ BSONObj idxIdent = obj["idxIdent"].Obj();
+ return idxIdent[idxName].String();
+}
- std::string KVCatalog::getCollectionIdent( StringData ns ) const {
- stdx::lock_guard<stdx::mutex> lk( _identsLock );
- NSToIdentMap::const_iterator it = _idents.find( ns.toString() );
- invariant( it != _idents.end() );
- return it->second.ident;
+BSONObj KVCatalog::_findEntry(OperationContext* opCtx, StringData ns, RecordId* out) const {
+ std::unique_ptr<Lock::ResourceLock> rLk;
+ if (!_isRsThreadSafe && opCtx->lockState()) {
+ rLk.reset(new Lock::ResourceLock(opCtx->lockState(), resourceIdCatalogMetadata, MODE_S));
}
- std::string KVCatalog::getIndexIdent( OperationContext* opCtx,
- StringData ns,
- StringData idxName ) const {
- BSONObj obj = _findEntry( opCtx, ns );
- BSONObj idxIdent = obj["idxIdent"].Obj();
- return idxIdent[idxName].String();
+ RecordId dl;
+ {
+ stdx::lock_guard<stdx::mutex> lk(_identsLock);
+ NSToIdentMap::const_iterator it = _idents.find(ns.toString());
+ invariant(it != _idents.end());
+ dl = it->second.storedLoc;
}
- BSONObj KVCatalog::_findEntry( OperationContext* opCtx,
- StringData ns,
- RecordId* out ) const {
-
- std::unique_ptr<Lock::ResourceLock> rLk;
- if (!_isRsThreadSafe && opCtx->lockState()) {
- rLk.reset(new Lock::ResourceLock(opCtx->lockState(),
- resourceIdCatalogMetadata,
- MODE_S));
- }
+ LOG(1) << "looking up metadata for: " << ns << " @ " << dl;
+ RecordData data;
+ if (!_rs->findRecord(opCtx, dl, &data)) {
+ // since the in memory meta data isn't managed with mvcc
+ // its possible for different transactions to see slightly
+ // different things, which is ok via the locking above.
+ return BSONObj();
+ }
- RecordId dl;
- {
- stdx::lock_guard<stdx::mutex> lk( _identsLock );
- NSToIdentMap::const_iterator it = _idents.find( ns.toString() );
- invariant( it != _idents.end() );
- dl = it->second.storedLoc;
- }
+ if (out)
+ *out = dl;
- LOG(1) << "looking up metadata for: " << ns << " @ " << dl;
- RecordData data;
- if ( !_rs->findRecord( opCtx, dl, &data ) ) {
- // since the in memory meta data isn't managed with mvcc
- // its possible for different transactions to see slightly
- // different things, which is ok via the locking above.
- return BSONObj();
- }
-
- if (out)
- *out = dl;
+ return data.releaseToBson().getOwned();
+}
- return data.releaseToBson().getOwned();
+const BSONCollectionCatalogEntry::MetaData KVCatalog::getMetaData(OperationContext* opCtx,
+ StringData ns) {
+ BSONObj obj = _findEntry(opCtx, ns);
+ LOG(3) << " fetched CCE metadata: " << obj;
+ BSONCollectionCatalogEntry::MetaData md;
+ const BSONElement mdElement = obj["md"];
+ if (mdElement.isABSONObj()) {
+ LOG(3) << "returning metadata: " << mdElement;
+ md.parse(mdElement.Obj());
}
+ return md;
+}
- const BSONCollectionCatalogEntry::MetaData KVCatalog::getMetaData( OperationContext* opCtx,
- StringData ns ) {
- BSONObj obj = _findEntry( opCtx, ns );
- LOG(3) << " fetched CCE metadata: " << obj;
- BSONCollectionCatalogEntry::MetaData md;
- const BSONElement mdElement = obj["md"];
- if ( mdElement.isABSONObj() ) {
- LOG(3) << "returning metadata: " << mdElement;
- md.parse( mdElement.Obj() );
- }
- return md;
+void KVCatalog::putMetaData(OperationContext* opCtx,
+ StringData ns,
+ BSONCollectionCatalogEntry::MetaData& md) {
+ std::unique_ptr<Lock::ResourceLock> rLk;
+ if (!_isRsThreadSafe && opCtx->lockState()) {
+ rLk.reset(new Lock::ResourceLock(opCtx->lockState(), resourceIdCatalogMetadata, MODE_X));
}
- void KVCatalog::putMetaData( OperationContext* opCtx,
- StringData ns,
- BSONCollectionCatalogEntry::MetaData& md ) {
-
- std::unique_ptr<Lock::ResourceLock> rLk;
- if (!_isRsThreadSafe && opCtx->lockState()) {
- rLk.reset(new Lock::ResourceLock(opCtx->lockState(),
- resourceIdCatalogMetadata,
- MODE_X));
+ RecordId loc;
+ BSONObj obj = _findEntry(opCtx, ns, &loc);
+
+ {
+ // rebuilt doc
+ BSONObjBuilder b;
+ b.append("md", md.toBSON());
+
+ BSONObjBuilder newIdentMap;
+ BSONObj oldIdentMap;
+ if (obj["idxIdent"].isABSONObj())
+ oldIdentMap = obj["idxIdent"].Obj();
+
+ // fix ident map
+ for (size_t i = 0; i < md.indexes.size(); i++) {
+ string name = md.indexes[i].name();
+ BSONElement e = oldIdentMap[name];
+ if (e.type() == String) {
+ newIdentMap.append(e);
+ continue;
+ }
+ // missing, create new
+ newIdentMap.append(name, _newUniqueIdent(ns, "index"));
}
+ b.append("idxIdent", newIdentMap.obj());
- RecordId loc;
- BSONObj obj = _findEntry( opCtx, ns, &loc );
-
- {
- // rebuilt doc
- BSONObjBuilder b;
- b.append( "md", md.toBSON() );
-
- BSONObjBuilder newIdentMap;
- BSONObj oldIdentMap;
- if ( obj["idxIdent"].isABSONObj() )
- oldIdentMap = obj["idxIdent"].Obj();
-
- // fix ident map
- for ( size_t i = 0; i < md.indexes.size(); i++ ) {
- string name = md.indexes[i].name();
- BSONElement e = oldIdentMap[name];
- if ( e.type() == String ) {
- newIdentMap.append( e );
- continue;
- }
- // missing, create new
- newIdentMap.append( name, _newUniqueIdent(ns, "index") );
- }
- b.append( "idxIdent", newIdentMap.obj() );
+ // add whatever is left
+ b.appendElementsUnique(obj);
+ obj = b.obj();
+ }
- // add whatever is left
- b.appendElementsUnique( obj );
- obj = b.obj();
- }
+ LOG(3) << "recording new metadata: " << obj;
+ StatusWith<RecordId> status =
+ _rs->updateRecord(opCtx, loc, obj.objdata(), obj.objsize(), false, NULL);
+ fassert(28521, status.getStatus());
+ invariant(status.getValue() == loc);
+}
- LOG(3) << "recording new metadata: " << obj;
- StatusWith<RecordId> status = _rs->updateRecord( opCtx,
- loc,
- obj.objdata(),
- obj.objsize(),
- false,
- NULL );
- fassert( 28521, status.getStatus() );
- invariant( status.getValue() == loc );
+Status KVCatalog::renameCollection(OperationContext* opCtx,
+ StringData fromNS,
+ StringData toNS,
+ bool stayTemp) {
+ std::unique_ptr<Lock::ResourceLock> rLk;
+ if (!_isRsThreadSafe && opCtx->lockState()) {
+ rLk.reset(new Lock::ResourceLock(opCtx->lockState(), resourceIdCatalogMetadata, MODE_X));
}
- Status KVCatalog::renameCollection( OperationContext* opCtx,
- StringData fromNS,
- StringData toNS,
- bool stayTemp ) {
+ RecordId loc;
+ BSONObj old = _findEntry(opCtx, fromNS, &loc).getOwned();
+ {
+ BSONObjBuilder b;
- std::unique_ptr<Lock::ResourceLock> rLk;
- if (!_isRsThreadSafe && opCtx->lockState()) {
- rLk.reset(new Lock::ResourceLock(opCtx->lockState(),
- resourceIdCatalogMetadata,
- MODE_X));
- }
+ b.append("ns", toNS);
- RecordId loc;
- BSONObj old = _findEntry( opCtx, fromNS, &loc ).getOwned();
- {
- BSONObjBuilder b;
-
- b.append( "ns", toNS );
-
- BSONCollectionCatalogEntry::MetaData md;
- md.parse( old["md"].Obj() );
- md.rename( toNS );
- if ( !stayTemp )
- md.options.temp = false;
- b.append( "md", md.toBSON() );
-
- b.appendElementsUnique( old );
-
- BSONObj obj = b.obj();
- StatusWith<RecordId> status = _rs->updateRecord( opCtx,
- loc,
- obj.objdata(),
- obj.objsize(),
- false,
- NULL );
- fassert( 28522, status.getStatus() );
- invariant( status.getValue() == loc );
- }
+ BSONCollectionCatalogEntry::MetaData md;
+ md.parse(old["md"].Obj());
+ md.rename(toNS);
+ if (!stayTemp)
+ md.options.temp = false;
+ b.append("md", md.toBSON());
+
+ b.appendElementsUnique(old);
+
+ BSONObj obj = b.obj();
+ StatusWith<RecordId> status =
+ _rs->updateRecord(opCtx, loc, obj.objdata(), obj.objsize(), false, NULL);
+ fassert(28522, status.getStatus());
+ invariant(status.getValue() == loc);
+ }
- stdx::lock_guard<stdx::mutex> lk( _identsLock );
- const NSToIdentMap::iterator fromIt = _idents.find(fromNS.toString());
- invariant(fromIt != _idents.end());
+ stdx::lock_guard<stdx::mutex> lk(_identsLock);
+ const NSToIdentMap::iterator fromIt = _idents.find(fromNS.toString());
+ invariant(fromIt != _idents.end());
- opCtx->recoveryUnit()->registerChange(new RemoveIdentChange(this, fromNS, fromIt->second));
- opCtx->recoveryUnit()->registerChange(new AddIdentChange(this, toNS));
+ opCtx->recoveryUnit()->registerChange(new RemoveIdentChange(this, fromNS, fromIt->second));
+ opCtx->recoveryUnit()->registerChange(new AddIdentChange(this, toNS));
- _idents.erase(fromIt);
- _idents[toNS.toString()] = Entry( old["ident"].String(), loc );
+ _idents.erase(fromIt);
+ _idents[toNS.toString()] = Entry(old["ident"].String(), loc);
- return Status::OK();
- }
+ return Status::OK();
+}
- Status KVCatalog::dropCollection( OperationContext* opCtx,
- StringData ns ) {
- invariant( opCtx->lockState() == NULL ||
- opCtx->lockState()->isDbLockedForMode( nsToDatabaseSubstring(ns), MODE_X ) );
- std::unique_ptr<Lock::ResourceLock> rLk;
- if (!_isRsThreadSafe && opCtx->lockState()) {
- rLk.reset(new Lock::ResourceLock(opCtx->lockState(),
- resourceIdCatalogMetadata,
- MODE_X));
- }
+Status KVCatalog::dropCollection(OperationContext* opCtx, StringData ns) {
+ invariant(opCtx->lockState() == NULL ||
+ opCtx->lockState()->isDbLockedForMode(nsToDatabaseSubstring(ns), MODE_X));
+ std::unique_ptr<Lock::ResourceLock> rLk;
+ if (!_isRsThreadSafe && opCtx->lockState()) {
+ rLk.reset(new Lock::ResourceLock(opCtx->lockState(), resourceIdCatalogMetadata, MODE_X));
+ }
- stdx::lock_guard<stdx::mutex> lk( _identsLock );
- const NSToIdentMap::iterator it = _idents.find(ns.toString());
- if (it == _idents.end()) {
- return Status( ErrorCodes::NamespaceNotFound, "collection not found" );
- }
+ stdx::lock_guard<stdx::mutex> lk(_identsLock);
+ const NSToIdentMap::iterator it = _idents.find(ns.toString());
+ if (it == _idents.end()) {
+ return Status(ErrorCodes::NamespaceNotFound, "collection not found");
+ }
- opCtx->recoveryUnit()->registerChange(new RemoveIdentChange(this, ns, it->second));
+ opCtx->recoveryUnit()->registerChange(new RemoveIdentChange(this, ns, it->second));
- LOG(1) << "deleting metadata for " << ns << " @ " << it->second.storedLoc;
- _rs->deleteRecord( opCtx, it->second.storedLoc );
- _idents.erase(it);
+ LOG(1) << "deleting metadata for " << ns << " @ " << it->second.storedLoc;
+ _rs->deleteRecord(opCtx, it->second.storedLoc);
+ _idents.erase(it);
- return Status::OK();
- }
+ return Status::OK();
+}
- std::vector<std::string> KVCatalog::getAllIdentsForDB( StringData db ) const {
- std::vector<std::string> v;
+std::vector<std::string> KVCatalog::getAllIdentsForDB(StringData db) const {
+ std::vector<std::string> v;
- {
- stdx::lock_guard<stdx::mutex> lk( _identsLock );
- for ( NSToIdentMap::const_iterator it = _idents.begin(); it != _idents.end(); ++it ) {
- NamespaceString ns( it->first );
- if ( ns.db() != db )
- continue;
- v.push_back( it->second.ident );
- }
+ {
+ stdx::lock_guard<stdx::mutex> lk(_identsLock);
+ for (NSToIdentMap::const_iterator it = _idents.begin(); it != _idents.end(); ++it) {
+ NamespaceString ns(it->first);
+ if (ns.db() != db)
+ continue;
+ v.push_back(it->second.ident);
}
-
- return v;
}
- std::vector<std::string> KVCatalog::getAllIdents( OperationContext* opCtx ) const {
- std::vector<std::string> v;
+ return v;
+}
- auto cursor = _rs->getCursor(opCtx);
- while (auto record = cursor->next()) {
- BSONObj obj = record->data.releaseToBson();
- v.push_back( obj["ident"].String() );
+std::vector<std::string> KVCatalog::getAllIdents(OperationContext* opCtx) const {
+ std::vector<std::string> v;
- BSONElement e = obj["idxIdent"];
- if ( !e.isABSONObj() )
- continue;
- BSONObj idxIdent = e.Obj();
+ auto cursor = _rs->getCursor(opCtx);
+ while (auto record = cursor->next()) {
+ BSONObj obj = record->data.releaseToBson();
+ v.push_back(obj["ident"].String());
- BSONObjIterator sub( idxIdent );
- while ( sub.more() ) {
- BSONElement e = sub.next();
- v.push_back( e.String() );
- }
- }
+ BSONElement e = obj["idxIdent"];
+ if (!e.isABSONObj())
+ continue;
+ BSONObj idxIdent = e.Obj();
- return v;
+ BSONObjIterator sub(idxIdent);
+ while (sub.more()) {
+ BSONElement e = sub.next();
+ v.push_back(e.String());
+ }
}
- bool KVCatalog::isUserDataIdent( StringData ident ) const {
- return
- ident.find( "index-" ) != std::string::npos ||
- ident.find( "index/" ) != std::string::npos ||
- ident.find( "collection-" ) != std::string::npos ||
- ident.find( "collection/" ) != std::string::npos;
- }
+ return v;
+}
+bool KVCatalog::isUserDataIdent(StringData ident) const {
+ return ident.find("index-") != std::string::npos || ident.find("index/") != std::string::npos ||
+ ident.find("collection-") != std::string::npos ||
+ ident.find("collection/") != std::string::npos;
+}
}
diff --git a/src/mongo/db/storage/kv/kv_catalog.h b/src/mongo/db/storage/kv/kv_catalog.h
index d253b9a1828..577fdba1faf 100644
--- a/src/mongo/db/storage/kv/kv_catalog.h
+++ b/src/mongo/db/storage/kv/kv_catalog.h
@@ -41,93 +41,81 @@
namespace mongo {
- class OperationContext;
- class RecordStore;
-
- class KVCatalog {
- public:
- /**
- * @param rs - does NOT take ownership
- */
- KVCatalog( RecordStore* rs,
- bool isRsThreadSafe,
- bool directoryPerDb,
- bool directoryForIndexes );
- ~KVCatalog();
-
- void init( OperationContext* opCtx );
-
- void getAllCollections( std::vector<std::string>* out ) const;
-
- /**
- * @return error or ident for instance
- */
- Status newCollection( OperationContext* opCtx,
- StringData ns,
- const CollectionOptions& options );
-
- std::string getCollectionIdent( StringData ns ) const;
-
- std::string getIndexIdent( OperationContext* opCtx,
- StringData ns,
- StringData idName ) const;
-
- const BSONCollectionCatalogEntry::MetaData getMetaData( OperationContext* opCtx,
- StringData ns );
- void putMetaData( OperationContext* opCtx,
- StringData ns,
- BSONCollectionCatalogEntry::MetaData& md );
-
- Status renameCollection( OperationContext* opCtx,
- StringData fromNS,
- StringData toNS,
- bool stayTemp );
-
- Status dropCollection( OperationContext* opCtx,
- StringData ns );
-
- std::vector<std::string> getAllIdentsForDB( StringData db ) const;
- std::vector<std::string> getAllIdents( OperationContext* opCtx ) const;
-
- bool isUserDataIdent( StringData ident ) const;
- private:
- class AddIdentChange;
- class RemoveIdentChange;
-
- BSONObj _findEntry( OperationContext* opCtx,
- StringData ns,
- RecordId* out=NULL ) const;
-
- /**
- * Generates a new unique identifier for a new "thing".
- * @param ns - the containing ns
- * @param kind - what this "thing" is, likely collection or index
- */
- std::string _newUniqueIdent(StringData ns, const char* kind);
-
- // Helpers only used by constructor and init(). Don't call from elsewhere.
- static std::string _newRand();
- bool _hasEntryCollidingWithRand() const;
-
- RecordStore* _rs; // not owned
- const bool _isRsThreadSafe;
- const bool _directoryPerDb;
- const bool _directoryForIndexes;
-
- // These two are only used for ident generation inside _newUniqueIdent.
- std::string _rand; // effectively const after init() returns
- AtomicUInt64 _next;
-
- struct Entry {
- Entry(){}
- Entry( std::string i, RecordId l )
- : ident(i), storedLoc( l ) {}
- std::string ident;
- RecordId storedLoc;
- };
- typedef std::map<std::string,Entry> NSToIdentMap;
- NSToIdentMap _idents;
- mutable stdx::mutex _identsLock;
- };
+class OperationContext;
+class RecordStore;
+
+class KVCatalog {
+public:
+ /**
+ * @param rs - does NOT take ownership
+ */
+ KVCatalog(RecordStore* rs, bool isRsThreadSafe, bool directoryPerDb, bool directoryForIndexes);
+ ~KVCatalog();
+
+ void init(OperationContext* opCtx);
+
+ void getAllCollections(std::vector<std::string>* out) const;
+
+ /**
+ * @return error or ident for instance
+ */
+ Status newCollection(OperationContext* opCtx, StringData ns, const CollectionOptions& options);
+
+ std::string getCollectionIdent(StringData ns) const;
+
+ std::string getIndexIdent(OperationContext* opCtx, StringData ns, StringData idName) const;
+
+ const BSONCollectionCatalogEntry::MetaData getMetaData(OperationContext* opCtx, StringData ns);
+ void putMetaData(OperationContext* opCtx,
+ StringData ns,
+ BSONCollectionCatalogEntry::MetaData& md);
+
+ Status renameCollection(OperationContext* opCtx,
+ StringData fromNS,
+ StringData toNS,
+ bool stayTemp);
+
+ Status dropCollection(OperationContext* opCtx, StringData ns);
+ std::vector<std::string> getAllIdentsForDB(StringData db) const;
+ std::vector<std::string> getAllIdents(OperationContext* opCtx) const;
+
+ bool isUserDataIdent(StringData ident) const;
+
+private:
+ class AddIdentChange;
+ class RemoveIdentChange;
+
+ BSONObj _findEntry(OperationContext* opCtx, StringData ns, RecordId* out = NULL) const;
+
+ /**
+ * Generates a new unique identifier for a new "thing".
+ * @param ns - the containing ns
+ * @param kind - what this "thing" is, likely collection or index
+ */
+ std::string _newUniqueIdent(StringData ns, const char* kind);
+
+ // Helpers only used by constructor and init(). Don't call from elsewhere.
+ static std::string _newRand();
+ bool _hasEntryCollidingWithRand() const;
+
+ RecordStore* _rs; // not owned
+ const bool _isRsThreadSafe;
+ const bool _directoryPerDb;
+ const bool _directoryForIndexes;
+
+ // These two are only used for ident generation inside _newUniqueIdent.
+ std::string _rand; // effectively const after init() returns
+ AtomicUInt64 _next;
+
+ struct Entry {
+ Entry() {}
+ Entry(std::string i, RecordId l) : ident(i), storedLoc(l) {}
+ std::string ident;
+ RecordId storedLoc;
+ };
+ typedef std::map<std::string, Entry> NSToIdentMap;
+ NSToIdentMap _idents;
+ mutable stdx::mutex _identsLock;
+};
}
diff --git a/src/mongo/db/storage/kv/kv_collection_catalog_entry.cpp b/src/mongo/db/storage/kv/kv_collection_catalog_entry.cpp
index 48a310a576e..ed92ccecc5f 100644
--- a/src/mongo/db/storage/kv/kv_collection_catalog_entry.cpp
+++ b/src/mongo/db/storage/kv/kv_collection_catalog_entry.cpp
@@ -36,157 +36,141 @@
namespace mongo {
- using std::string;
-
- class KVCollectionCatalogEntry::AddIndexChange : public RecoveryUnit::Change {
- public:
- AddIndexChange(OperationContext* opCtx, KVCollectionCatalogEntry* cce,
- StringData ident)
- : _opCtx(opCtx)
- , _cce(cce)
- , _ident(ident.toString())
- {}
-
- virtual void commit() {}
- virtual void rollback() {
- // Intentionally ignoring failure.
- _cce->_engine->dropIdent(_opCtx, _ident);
- }
-
- OperationContext* const _opCtx;
- KVCollectionCatalogEntry* const _cce;
- const std::string _ident;
- };
-
- class KVCollectionCatalogEntry::RemoveIndexChange : public RecoveryUnit::Change {
- public:
- RemoveIndexChange(OperationContext* opCtx, KVCollectionCatalogEntry* cce,
- StringData ident)
- : _opCtx(opCtx)
- , _cce(cce)
- , _ident(ident.toString())
- {}
-
- virtual void rollback() {}
- virtual void commit() {
- // Intentionally ignoring failure here. Since we've removed the metadata pointing to the
- // index, we should never see it again anyway.
- _cce->_engine->dropIdent(_opCtx, _ident);
- }
-
- OperationContext* const _opCtx;
- KVCollectionCatalogEntry* const _cce;
- const std::string _ident;
- };
-
-
- KVCollectionCatalogEntry::KVCollectionCatalogEntry( KVEngine* engine,
- KVCatalog* catalog,
- StringData ns,
- StringData ident,
- RecordStore* rs)
- : BSONCollectionCatalogEntry( ns ),
- _engine( engine ),
- _catalog( catalog ),
- _ident( ident.toString() ),
- _recordStore( rs ) {
- }
+using std::string;
- KVCollectionCatalogEntry::~KVCollectionCatalogEntry() {
- }
+class KVCollectionCatalogEntry::AddIndexChange : public RecoveryUnit::Change {
+public:
+ AddIndexChange(OperationContext* opCtx, KVCollectionCatalogEntry* cce, StringData ident)
+ : _opCtx(opCtx), _cce(cce), _ident(ident.toString()) {}
- bool KVCollectionCatalogEntry::setIndexIsMultikey(OperationContext* txn,
- StringData indexName,
- bool multikey ) {
- MetaData md = _getMetaData(txn);
-
- int offset = md.findIndexOffset( indexName );
- invariant( offset >= 0 );
- if ( md.indexes[offset].multikey == multikey )
- return false;
- md.indexes[offset].multikey = multikey;
- _catalog->putMetaData( txn, ns().toString(), md );
- return true;
+ virtual void commit() {}
+ virtual void rollback() {
+ // Intentionally ignoring failure.
+ _cce->_engine->dropIdent(_opCtx, _ident);
}
- void KVCollectionCatalogEntry::setIndexHead( OperationContext* txn,
- StringData indexName,
- const RecordId& newHead ) {
- MetaData md = _getMetaData( txn );
- int offset = md.findIndexOffset( indexName );
- invariant( offset >= 0 );
- md.indexes[offset].head = newHead;
- _catalog->putMetaData( txn, ns().toString(), md );
+ OperationContext* const _opCtx;
+ KVCollectionCatalogEntry* const _cce;
+ const std::string _ident;
+};
+
+class KVCollectionCatalogEntry::RemoveIndexChange : public RecoveryUnit::Change {
+public:
+ RemoveIndexChange(OperationContext* opCtx, KVCollectionCatalogEntry* cce, StringData ident)
+ : _opCtx(opCtx), _cce(cce), _ident(ident.toString()) {}
+
+ virtual void rollback() {}
+ virtual void commit() {
+ // Intentionally ignoring failure here. Since we've removed the metadata pointing to the
+ // index, we should never see it again anyway.
+ _cce->_engine->dropIdent(_opCtx, _ident);
}
- Status KVCollectionCatalogEntry::removeIndex( OperationContext* txn,
- StringData indexName ) {
- MetaData md = _getMetaData( txn );
-
- if (md.findIndexOffset(indexName) < 0)
- return Status::OK(); // never had the index so nothing to do.
+ OperationContext* const _opCtx;
+ KVCollectionCatalogEntry* const _cce;
+ const std::string _ident;
+};
+
+
+KVCollectionCatalogEntry::KVCollectionCatalogEntry(
+ KVEngine* engine, KVCatalog* catalog, StringData ns, StringData ident, RecordStore* rs)
+ : BSONCollectionCatalogEntry(ns),
+ _engine(engine),
+ _catalog(catalog),
+ _ident(ident.toString()),
+ _recordStore(rs) {}
+
+KVCollectionCatalogEntry::~KVCollectionCatalogEntry() {}
+
+bool KVCollectionCatalogEntry::setIndexIsMultikey(OperationContext* txn,
+ StringData indexName,
+ bool multikey) {
+ MetaData md = _getMetaData(txn);
+
+ int offset = md.findIndexOffset(indexName);
+ invariant(offset >= 0);
+ if (md.indexes[offset].multikey == multikey)
+ return false;
+ md.indexes[offset].multikey = multikey;
+ _catalog->putMetaData(txn, ns().toString(), md);
+ return true;
+}
- const string ident = _catalog->getIndexIdent( txn, ns().ns(), indexName );
+void KVCollectionCatalogEntry::setIndexHead(OperationContext* txn,
+ StringData indexName,
+ const RecordId& newHead) {
+ MetaData md = _getMetaData(txn);
+ int offset = md.findIndexOffset(indexName);
+ invariant(offset >= 0);
+ md.indexes[offset].head = newHead;
+ _catalog->putMetaData(txn, ns().toString(), md);
+}
- md.eraseIndex( indexName );
- _catalog->putMetaData( txn, ns().toString(), md );
+Status KVCollectionCatalogEntry::removeIndex(OperationContext* txn, StringData indexName) {
+ MetaData md = _getMetaData(txn);
- // Lazily remove to isolate underlying engine from rollback.
- txn->recoveryUnit()->registerChange(new RemoveIndexChange(txn, this, ident));
- return Status::OK();
- }
+ if (md.findIndexOffset(indexName) < 0)
+ return Status::OK(); // never had the index so nothing to do.
- Status KVCollectionCatalogEntry::prepareForIndexBuild( OperationContext* txn,
- const IndexDescriptor* spec ) {
- MetaData md = _getMetaData( txn );
- md.indexes.push_back( IndexMetaData( spec->infoObj(), false, RecordId(), false ) );
- _catalog->putMetaData( txn, ns().toString(), md );
+ const string ident = _catalog->getIndexIdent(txn, ns().ns(), indexName);
- string ident = _catalog->getIndexIdent( txn, ns().ns(), spec->indexName() );
+ md.eraseIndex(indexName);
+ _catalog->putMetaData(txn, ns().toString(), md);
- const Status status = _engine->createSortedDataInterface( txn, ident, spec );
- if (status.isOK()) {
- txn->recoveryUnit()->registerChange(new AddIndexChange(txn, this, ident));
- }
+ // Lazily remove to isolate underlying engine from rollback.
+ txn->recoveryUnit()->registerChange(new RemoveIndexChange(txn, this, ident));
+ return Status::OK();
+}
- return status;
- }
+Status KVCollectionCatalogEntry::prepareForIndexBuild(OperationContext* txn,
+ const IndexDescriptor* spec) {
+ MetaData md = _getMetaData(txn);
+ md.indexes.push_back(IndexMetaData(spec->infoObj(), false, RecordId(), false));
+ _catalog->putMetaData(txn, ns().toString(), md);
- void KVCollectionCatalogEntry::indexBuildSuccess( OperationContext* txn,
- StringData indexName ) {
- MetaData md = _getMetaData( txn );
- int offset = md.findIndexOffset( indexName );
- invariant( offset >= 0 );
- md.indexes[offset].ready = true;
- _catalog->putMetaData( txn, ns().toString(), md );
- }
+ string ident = _catalog->getIndexIdent(txn, ns().ns(), spec->indexName());
- void KVCollectionCatalogEntry::updateTTLSetting( OperationContext* txn,
- StringData idxName,
- long long newExpireSeconds ) {
- MetaData md = _getMetaData( txn );
- int offset = md.findIndexOffset( idxName );
- invariant( offset >= 0 );
- md.indexes[offset].updateTTLSetting( newExpireSeconds );
- _catalog->putMetaData( txn, ns().toString(), md );
+ const Status status = _engine->createSortedDataInterface(txn, ident, spec);
+ if (status.isOK()) {
+ txn->recoveryUnit()->registerChange(new AddIndexChange(txn, this, ident));
}
- void KVCollectionCatalogEntry::updateFlags(OperationContext* txn, int newValue) {
- MetaData md = _getMetaData( txn );
- md.options.flags = newValue;
- md.options.flagsSet = true;
- _catalog->putMetaData( txn, ns().toString(), md );
- }
+ return status;
+}
- void KVCollectionCatalogEntry::updateValidator(OperationContext* txn,
- const BSONObj& validator) {
- MetaData md = _getMetaData(txn);
- md.options.validator = validator;
- _catalog->putMetaData(txn, ns().toString(), md);
- }
+void KVCollectionCatalogEntry::indexBuildSuccess(OperationContext* txn, StringData indexName) {
+ MetaData md = _getMetaData(txn);
+ int offset = md.findIndexOffset(indexName);
+ invariant(offset >= 0);
+ md.indexes[offset].ready = true;
+ _catalog->putMetaData(txn, ns().toString(), md);
+}
- BSONCollectionCatalogEntry::MetaData KVCollectionCatalogEntry::_getMetaData( OperationContext* txn ) const {
- return _catalog->getMetaData( txn, ns().toString() );
- }
+void KVCollectionCatalogEntry::updateTTLSetting(OperationContext* txn,
+ StringData idxName,
+ long long newExpireSeconds) {
+ MetaData md = _getMetaData(txn);
+ int offset = md.findIndexOffset(idxName);
+ invariant(offset >= 0);
+ md.indexes[offset].updateTTLSetting(newExpireSeconds);
+ _catalog->putMetaData(txn, ns().toString(), md);
+}
+void KVCollectionCatalogEntry::updateFlags(OperationContext* txn, int newValue) {
+ MetaData md = _getMetaData(txn);
+ md.options.flags = newValue;
+ md.options.flagsSet = true;
+ _catalog->putMetaData(txn, ns().toString(), md);
+}
+
+void KVCollectionCatalogEntry::updateValidator(OperationContext* txn, const BSONObj& validator) {
+ MetaData md = _getMetaData(txn);
+ md.options.validator = validator;
+ _catalog->putMetaData(txn, ns().toString(), md);
+}
+
+BSONCollectionCatalogEntry::MetaData KVCollectionCatalogEntry::_getMetaData(
+ OperationContext* txn) const {
+ return _catalog->getMetaData(txn, ns().toString());
+}
}
diff --git a/src/mongo/db/storage/kv/kv_collection_catalog_entry.h b/src/mongo/db/storage/kv/kv_collection_catalog_entry.h
index 963c9d623f9..285db3754f7 100644
--- a/src/mongo/db/storage/kv/kv_collection_catalog_entry.h
+++ b/src/mongo/db/storage/kv/kv_collection_catalog_entry.h
@@ -36,60 +36,57 @@
namespace mongo {
- class KVCatalog;
- class KVEngine;
+class KVCatalog;
+class KVEngine;
- class KVCollectionCatalogEntry final : public BSONCollectionCatalogEntry {
- public:
- KVCollectionCatalogEntry( KVEngine* engine,
- KVCatalog* catalog,
- StringData ns,
- StringData ident,
- RecordStore* rs );
+class KVCollectionCatalogEntry final : public BSONCollectionCatalogEntry {
+public:
+ KVCollectionCatalogEntry(
+ KVEngine* engine, KVCatalog* catalog, StringData ns, StringData ident, RecordStore* rs);
- ~KVCollectionCatalogEntry() final;
+ ~KVCollectionCatalogEntry() final;
- int getMaxAllowedIndexes() const final { return 64; };
-
- bool setIndexIsMultikey(OperationContext* txn,
- StringData indexName,
- bool multikey = true) final;
+ int getMaxAllowedIndexes() const final {
+ return 64;
+ };
- void setIndexHead( OperationContext* txn,
- StringData indexName,
- const RecordId& newHead ) final;
+ bool setIndexIsMultikey(OperationContext* txn,
+ StringData indexName,
+ bool multikey = true) final;
- Status removeIndex( OperationContext* txn,
- StringData indexName ) final;
+ void setIndexHead(OperationContext* txn, StringData indexName, const RecordId& newHead) final;
- Status prepareForIndexBuild( OperationContext* txn,
- const IndexDescriptor* spec ) final;
+ Status removeIndex(OperationContext* txn, StringData indexName) final;
- void indexBuildSuccess( OperationContext* txn,
- StringData indexName ) final;
+ Status prepareForIndexBuild(OperationContext* txn, const IndexDescriptor* spec) final;
- void updateTTLSetting( OperationContext* txn,
- StringData idxName,
- long long newExpireSeconds ) final;
+ void indexBuildSuccess(OperationContext* txn, StringData indexName) final;
- void updateFlags(OperationContext* txn, int newValue) final;
+ void updateTTLSetting(OperationContext* txn,
+ StringData idxName,
+ long long newExpireSeconds) final;
- void updateValidator(OperationContext* txn, const BSONObj& validator) final;
+ void updateFlags(OperationContext* txn, int newValue) final;
- RecordStore* getRecordStore() { return _recordStore.get(); }
- const RecordStore* getRecordStore() const { return _recordStore.get(); }
+ void updateValidator(OperationContext* txn, const BSONObj& validator) final;
- protected:
- MetaData _getMetaData( OperationContext* txn ) const final;
+ RecordStore* getRecordStore() {
+ return _recordStore.get();
+ }
+ const RecordStore* getRecordStore() const {
+ return _recordStore.get();
+ }
- private:
- class AddIndexChange;
- class RemoveIndexChange;
+protected:
+ MetaData _getMetaData(OperationContext* txn) const final;
- KVEngine* _engine; // not owned
- KVCatalog* _catalog; // not owned
- std::string _ident;
- std::unique_ptr<RecordStore> _recordStore; // owned
- };
+private:
+ class AddIndexChange;
+ class RemoveIndexChange;
+ KVEngine* _engine; // not owned
+ KVCatalog* _catalog; // not owned
+ std::string _ident;
+ std::unique_ptr<RecordStore> _recordStore; // owned
+};
}
diff --git a/src/mongo/db/storage/kv/kv_database_catalog_entry.cpp b/src/mongo/db/storage/kv/kv_database_catalog_entry.cpp
index a72aa8a12b6..b88eeb6bc20 100644
--- a/src/mongo/db/storage/kv/kv_database_catalog_entry.cpp
+++ b/src/mongo/db/storage/kv/kv_database_catalog_entry.cpp
@@ -38,333 +38,313 @@
namespace mongo {
- using std::string;
- using std::vector;
-
- class KVDatabaseCatalogEntry::AddCollectionChange : public RecoveryUnit::Change {
- public:
- AddCollectionChange(OperationContext* opCtx, KVDatabaseCatalogEntry* dce,
- StringData collection, StringData ident,
- bool dropOnRollback)
- : _opCtx(opCtx)
- , _dce(dce)
- , _collection(collection.toString())
- , _ident(ident.toString())
- , _dropOnRollback(dropOnRollback)
- {}
-
- virtual void commit() {}
- virtual void rollback() {
- if (_dropOnRollback) {
- // Intentionally ignoring failure
- _dce->_engine->getEngine()->dropIdent(_opCtx, _ident);
- }
-
- const CollectionMap::iterator it = _dce->_collections.find(_collection);
- if (it != _dce->_collections.end()) {
- delete it->second;
- _dce->_collections.erase(it);
- }
+using std::string;
+using std::vector;
+
+class KVDatabaseCatalogEntry::AddCollectionChange : public RecoveryUnit::Change {
+public:
+ AddCollectionChange(OperationContext* opCtx,
+ KVDatabaseCatalogEntry* dce,
+ StringData collection,
+ StringData ident,
+ bool dropOnRollback)
+ : _opCtx(opCtx),
+ _dce(dce),
+ _collection(collection.toString()),
+ _ident(ident.toString()),
+ _dropOnRollback(dropOnRollback) {}
+
+ virtual void commit() {}
+ virtual void rollback() {
+ if (_dropOnRollback) {
+ // Intentionally ignoring failure
+ _dce->_engine->getEngine()->dropIdent(_opCtx, _ident);
}
- OperationContext* const _opCtx;
- KVDatabaseCatalogEntry* const _dce;
- const std::string _collection;
- const std::string _ident;
- const bool _dropOnRollback;
- };
-
- class KVDatabaseCatalogEntry::RemoveCollectionChange : public RecoveryUnit::Change {
- public:
- RemoveCollectionChange(OperationContext* opCtx, KVDatabaseCatalogEntry* dce,
- StringData collection, StringData ident,
- KVCollectionCatalogEntry* entry, bool dropOnCommit)
- : _opCtx(opCtx)
- , _dce(dce)
- , _collection(collection.toString())
- , _ident(ident.toString())
- , _entry(entry)
- , _dropOnCommit(dropOnCommit)
- {}
-
- virtual void commit() {
- delete _entry;
-
- // Intentionally ignoring failure here. Since we've removed the metadata pointing to the
- // collection, we should never see it again anyway.
- if (_dropOnCommit)
- _dce->_engine->getEngine()->dropIdent( _opCtx, _ident );
- }
-
- virtual void rollback() {
- _dce->_collections[_collection] = _entry;
- }
-
- OperationContext* const _opCtx;
- KVDatabaseCatalogEntry* const _dce;
- const std::string _collection;
- const std::string _ident;
- KVCollectionCatalogEntry* const _entry;
- const bool _dropOnCommit;
- };
-
- KVDatabaseCatalogEntry::KVDatabaseCatalogEntry( StringData db, KVStorageEngine* engine )
- : DatabaseCatalogEntry( db ), _engine( engine ) {
-
- }
-
- KVDatabaseCatalogEntry::~KVDatabaseCatalogEntry() {
- for ( CollectionMap::const_iterator it = _collections.begin(); it != _collections.end(); ++it ) {
+ const CollectionMap::iterator it = _dce->_collections.find(_collection);
+ if (it != _dce->_collections.end()) {
delete it->second;
+ _dce->_collections.erase(it);
}
- _collections.clear();
}
- bool KVDatabaseCatalogEntry::exists() const {
- return !isEmpty();
+ OperationContext* const _opCtx;
+ KVDatabaseCatalogEntry* const _dce;
+ const std::string _collection;
+ const std::string _ident;
+ const bool _dropOnRollback;
+};
+
+class KVDatabaseCatalogEntry::RemoveCollectionChange : public RecoveryUnit::Change {
+public:
+ RemoveCollectionChange(OperationContext* opCtx,
+ KVDatabaseCatalogEntry* dce,
+ StringData collection,
+ StringData ident,
+ KVCollectionCatalogEntry* entry,
+ bool dropOnCommit)
+ : _opCtx(opCtx),
+ _dce(dce),
+ _collection(collection.toString()),
+ _ident(ident.toString()),
+ _entry(entry),
+ _dropOnCommit(dropOnCommit) {}
+
+ virtual void commit() {
+ delete _entry;
+
+ // Intentionally ignoring failure here. Since we've removed the metadata pointing to the
+ // collection, we should never see it again anyway.
+ if (_dropOnCommit)
+ _dce->_engine->getEngine()->dropIdent(_opCtx, _ident);
}
- bool KVDatabaseCatalogEntry::isEmpty() const {
- return _collections.empty();
+ virtual void rollback() {
+ _dce->_collections[_collection] = _entry;
}
- bool KVDatabaseCatalogEntry::hasUserData() const {
- return !isEmpty();
- }
+ OperationContext* const _opCtx;
+ KVDatabaseCatalogEntry* const _dce;
+ const std::string _collection;
+ const std::string _ident;
+ KVCollectionCatalogEntry* const _entry;
+ const bool _dropOnCommit;
+};
- int64_t KVDatabaseCatalogEntry::sizeOnDisk( OperationContext* opCtx ) const {
- int64_t size = 0;
+KVDatabaseCatalogEntry::KVDatabaseCatalogEntry(StringData db, KVStorageEngine* engine)
+ : DatabaseCatalogEntry(db), _engine(engine) {}
- for ( CollectionMap::const_iterator it = _collections.begin(); it != _collections.end(); ++it ) {
- const KVCollectionCatalogEntry* coll = it->second;
- if ( !coll )
- continue;
- size += coll->getRecordStore()->storageSize( opCtx );
+KVDatabaseCatalogEntry::~KVDatabaseCatalogEntry() {
+ for (CollectionMap::const_iterator it = _collections.begin(); it != _collections.end(); ++it) {
+ delete it->second;
+ }
+ _collections.clear();
+}
- vector<string> indexNames;
- coll->getAllIndexes( opCtx, &indexNames );
+bool KVDatabaseCatalogEntry::exists() const {
+ return !isEmpty();
+}
- for ( size_t i = 0; i < indexNames.size(); i++ ) {
- string ident = _engine->getCatalog()->getIndexIdent( opCtx,
- coll->ns().ns(),
- indexNames[i] );
- size += _engine->getEngine()->getIdentSize( opCtx, ident );
- }
- }
+bool KVDatabaseCatalogEntry::isEmpty() const {
+ return _collections.empty();
+}
- return size;
- }
+bool KVDatabaseCatalogEntry::hasUserData() const {
+ return !isEmpty();
+}
- void KVDatabaseCatalogEntry::appendExtraStats( OperationContext* opCtx,
- BSONObjBuilder* out,
- double scale ) const {
- }
+int64_t KVDatabaseCatalogEntry::sizeOnDisk(OperationContext* opCtx) const {
+ int64_t size = 0;
- bool KVDatabaseCatalogEntry::currentFilesCompatible( OperationContext* opCtx ) const {
- // todo
- return true;
- }
+ for (CollectionMap::const_iterator it = _collections.begin(); it != _collections.end(); ++it) {
+ const KVCollectionCatalogEntry* coll = it->second;
+ if (!coll)
+ continue;
+ size += coll->getRecordStore()->storageSize(opCtx);
- void KVDatabaseCatalogEntry::getCollectionNamespaces( std::list<std::string>* out ) const {
- for (CollectionMap::const_iterator it = _collections.begin();
- it != _collections.end();
- ++it) {
+ vector<string> indexNames;
+ coll->getAllIndexes(opCtx, &indexNames);
- out->push_back( it->first );
+ for (size_t i = 0; i < indexNames.size(); i++) {
+ string ident =
+ _engine->getCatalog()->getIndexIdent(opCtx, coll->ns().ns(), indexNames[i]);
+ size += _engine->getEngine()->getIdentSize(opCtx, ident);
}
}
- CollectionCatalogEntry* KVDatabaseCatalogEntry::getCollectionCatalogEntry(
- StringData ns ) const {
-
- CollectionMap::const_iterator it = _collections.find( ns.toString() );
- if (it == _collections.end()) {
- return NULL;
- }
+ return size;
+}
- return it->second;
- }
+void KVDatabaseCatalogEntry::appendExtraStats(OperationContext* opCtx,
+ BSONObjBuilder* out,
+ double scale) const {}
- RecordStore* KVDatabaseCatalogEntry::getRecordStore( StringData ns ) const {
- CollectionMap::const_iterator it = _collections.find( ns.toString() );
- if (it == _collections.end()) {
- return NULL;
- }
+bool KVDatabaseCatalogEntry::currentFilesCompatible(OperationContext* opCtx) const {
+ // todo
+ return true;
+}
- return it->second->getRecordStore();
+void KVDatabaseCatalogEntry::getCollectionNamespaces(std::list<std::string>* out) const {
+ for (CollectionMap::const_iterator it = _collections.begin(); it != _collections.end(); ++it) {
+ out->push_back(it->first);
}
+}
- Status KVDatabaseCatalogEntry::createCollection( OperationContext* txn,
- StringData ns,
- const CollectionOptions& options,
- bool allocateDefaultSpace ) {
+CollectionCatalogEntry* KVDatabaseCatalogEntry::getCollectionCatalogEntry(StringData ns) const {
+ CollectionMap::const_iterator it = _collections.find(ns.toString());
+ if (it == _collections.end()) {
+ return NULL;
+ }
- invariant(txn->lockState()->isDbLockedForMode(name(), MODE_X));
+ return it->second;
+}
- if (ns.empty()) {
- return Status(ErrorCodes::BadValue, "Collection namespace cannot be empty");
- }
+RecordStore* KVDatabaseCatalogEntry::getRecordStore(StringData ns) const {
+ CollectionMap::const_iterator it = _collections.find(ns.toString());
+ if (it == _collections.end()) {
+ return NULL;
+ }
- if (_collections.count(ns.toString())) {
- invariant(_collections[ns.toString()]);
- return Status(ErrorCodes::NamespaceExists, "collection already exists");
- }
+ return it->second->getRecordStore();
+}
- // need to create it
- Status status = _engine->getCatalog()->newCollection( txn, ns, options );
- if ( !status.isOK() )
- return status;
+Status KVDatabaseCatalogEntry::createCollection(OperationContext* txn,
+ StringData ns,
+ const CollectionOptions& options,
+ bool allocateDefaultSpace) {
+ invariant(txn->lockState()->isDbLockedForMode(name(), MODE_X));
- string ident = _engine->getCatalog()->getCollectionIdent( ns );
+ if (ns.empty()) {
+ return Status(ErrorCodes::BadValue, "Collection namespace cannot be empty");
+ }
- status = _engine->getEngine()->createRecordStore( txn, ns, ident, options );
- if ( !status.isOK() )
- return status;
+ if (_collections.count(ns.toString())) {
+ invariant(_collections[ns.toString()]);
+ return Status(ErrorCodes::NamespaceExists, "collection already exists");
+ }
- RecordStore* rs = _engine->getEngine()->getRecordStore( txn, ns, ident, options );
- invariant( rs );
+ // need to create it
+ Status status = _engine->getCatalog()->newCollection(txn, ns, options);
+ if (!status.isOK())
+ return status;
- txn->recoveryUnit()->registerChange(new AddCollectionChange(txn, this, ns, ident, true));
- _collections[ns.toString()] =
- new KVCollectionCatalogEntry( _engine->getEngine(), _engine->getCatalog(),
- ns, ident, rs );
+ string ident = _engine->getCatalog()->getCollectionIdent(ns);
- return Status::OK();
- }
+ status = _engine->getEngine()->createRecordStore(txn, ns, ident, options);
+ if (!status.isOK())
+ return status;
- void KVDatabaseCatalogEntry::initCollection( OperationContext* opCtx,
- const std::string& ns,
- bool forRepair ) {
- invariant(!_collections.count(ns));
+ RecordStore* rs = _engine->getEngine()->getRecordStore(txn, ns, ident, options);
+ invariant(rs);
- const std::string ident = _engine->getCatalog()->getCollectionIdent( ns );
+ txn->recoveryUnit()->registerChange(new AddCollectionChange(txn, this, ns, ident, true));
+ _collections[ns.toString()] =
+ new KVCollectionCatalogEntry(_engine->getEngine(), _engine->getCatalog(), ns, ident, rs);
- RecordStore* rs;
- if (forRepair) {
- // Using a NULL rs since we don't want to open this record store before it has been
- // repaired. This also ensures that if we try to use it, it will blow up.
- rs = NULL;
- }
- else {
- BSONCollectionCatalogEntry::MetaData md = _engine->getCatalog()->getMetaData(opCtx, ns);
- rs = _engine->getEngine()->getRecordStore( opCtx, ns, ident, md.options );
- invariant( rs );
- }
+ return Status::OK();
+}
- // No change registration since this is only for committed collections
- _collections[ns] = new KVCollectionCatalogEntry( _engine->getEngine(),
- _engine->getCatalog(),
- ns,
- ident,
- rs );
+void KVDatabaseCatalogEntry::initCollection(OperationContext* opCtx,
+ const std::string& ns,
+ bool forRepair) {
+ invariant(!_collections.count(ns));
+
+ const std::string ident = _engine->getCatalog()->getCollectionIdent(ns);
+
+ RecordStore* rs;
+ if (forRepair) {
+ // Using a NULL rs since we don't want to open this record store before it has been
+ // repaired. This also ensures that if we try to use it, it will blow up.
+ rs = NULL;
+ } else {
+ BSONCollectionCatalogEntry::MetaData md = _engine->getCatalog()->getMetaData(opCtx, ns);
+ rs = _engine->getEngine()->getRecordStore(opCtx, ns, ident, md.options);
+ invariant(rs);
}
- void KVDatabaseCatalogEntry::reinitCollectionAfterRepair(OperationContext* opCtx,
- const std::string& ns) {
- // Get rid of the old entry.
- CollectionMap::iterator it = _collections.find(ns);
- invariant(it != _collections.end());
- delete it->second;
- _collections.erase(it);
+ // No change registration since this is only for committed collections
+ _collections[ns] =
+ new KVCollectionCatalogEntry(_engine->getEngine(), _engine->getCatalog(), ns, ident, rs);
+}
- // Now reopen fully initialized.
- initCollection(opCtx, ns, false);
- }
+void KVDatabaseCatalogEntry::reinitCollectionAfterRepair(OperationContext* opCtx,
+ const std::string& ns) {
+ // Get rid of the old entry.
+ CollectionMap::iterator it = _collections.find(ns);
+ invariant(it != _collections.end());
+ delete it->second;
+ _collections.erase(it);
- Status KVDatabaseCatalogEntry::renameCollection( OperationContext* txn,
- StringData fromNS,
- StringData toNS,
- bool stayTemp ) {
+ // Now reopen fully initialized.
+ initCollection(opCtx, ns, false);
+}
- invariant(txn->lockState()->isDbLockedForMode(name(), MODE_X));
+Status KVDatabaseCatalogEntry::renameCollection(OperationContext* txn,
+ StringData fromNS,
+ StringData toNS,
+ bool stayTemp) {
+ invariant(txn->lockState()->isDbLockedForMode(name(), MODE_X));
- RecordStore* originalRS = NULL;
+ RecordStore* originalRS = NULL;
- CollectionMap::const_iterator it = _collections.find( fromNS.toString() );
- if (it == _collections.end()) {
- return Status(ErrorCodes::NamespaceNotFound, "rename cannot find collection");
- }
+ CollectionMap::const_iterator it = _collections.find(fromNS.toString());
+ if (it == _collections.end()) {
+ return Status(ErrorCodes::NamespaceNotFound, "rename cannot find collection");
+ }
- originalRS = it->second->getRecordStore();
+ originalRS = it->second->getRecordStore();
- it = _collections.find( toNS.toString() );
- if (it != _collections.end()) {
- return Status(ErrorCodes::NamespaceExists, "for rename to already exists");
- }
+ it = _collections.find(toNS.toString());
+ if (it != _collections.end()) {
+ return Status(ErrorCodes::NamespaceExists, "for rename to already exists");
+ }
- const std::string identFrom = _engine->getCatalog()->getCollectionIdent( fromNS );
+ const std::string identFrom = _engine->getCatalog()->getCollectionIdent(fromNS);
- Status status = _engine->getEngine()->okToRename( txn, fromNS, toNS, identFrom, originalRS );
- if ( !status.isOK() )
- return status;
+ Status status = _engine->getEngine()->okToRename(txn, fromNS, toNS, identFrom, originalRS);
+ if (!status.isOK())
+ return status;
- status = _engine->getCatalog()->renameCollection( txn, fromNS, toNS, stayTemp );
- if ( !status.isOK() )
- return status;
+ status = _engine->getCatalog()->renameCollection(txn, fromNS, toNS, stayTemp);
+ if (!status.isOK())
+ return status;
- const std::string identTo = _engine->getCatalog()->getCollectionIdent( toNS );
+ const std::string identTo = _engine->getCatalog()->getCollectionIdent(toNS);
- invariant( identFrom == identTo );
+ invariant(identFrom == identTo);
- BSONCollectionCatalogEntry::MetaData md = _engine->getCatalog()->getMetaData( txn, toNS );
- RecordStore* rs = _engine->getEngine()->getRecordStore( txn, toNS, identTo, md.options );
+ BSONCollectionCatalogEntry::MetaData md = _engine->getCatalog()->getMetaData(txn, toNS);
+ RecordStore* rs = _engine->getEngine()->getRecordStore(txn, toNS, identTo, md.options);
- const CollectionMap::iterator itFrom = _collections.find(fromNS.toString());
- invariant(itFrom != _collections.end());
- txn->recoveryUnit()->registerChange(new RemoveCollectionChange(txn, this, fromNS, identFrom,
- itFrom->second, false));
- _collections.erase(itFrom);
+ const CollectionMap::iterator itFrom = _collections.find(fromNS.toString());
+ invariant(itFrom != _collections.end());
+ txn->recoveryUnit()->registerChange(
+ new RemoveCollectionChange(txn, this, fromNS, identFrom, itFrom->second, false));
+ _collections.erase(itFrom);
- txn->recoveryUnit()->registerChange(
- new AddCollectionChange(txn, this, toNS, identTo, false));
- _collections[toNS.toString()] =
- new KVCollectionCatalogEntry( _engine->getEngine(), _engine->getCatalog(),
- toNS, identTo, rs );
+ txn->recoveryUnit()->registerChange(new AddCollectionChange(txn, this, toNS, identTo, false));
+ _collections[toNS.toString()] = new KVCollectionCatalogEntry(
+ _engine->getEngine(), _engine->getCatalog(), toNS, identTo, rs);
- return Status::OK();
- }
+ return Status::OK();
+}
- Status KVDatabaseCatalogEntry::dropCollection(OperationContext* opCtx, StringData ns) {
- invariant(opCtx->lockState()->isDbLockedForMode(name(), MODE_X));
+Status KVDatabaseCatalogEntry::dropCollection(OperationContext* opCtx, StringData ns) {
+ invariant(opCtx->lockState()->isDbLockedForMode(name(), MODE_X));
- CollectionMap::const_iterator it = _collections.find( ns.toString() );
- if (it == _collections.end()) {
- return Status(ErrorCodes::NamespaceNotFound, "cannnot find collection to drop");
- }
+ CollectionMap::const_iterator it = _collections.find(ns.toString());
+ if (it == _collections.end()) {
+ return Status(ErrorCodes::NamespaceNotFound, "cannnot find collection to drop");
+ }
- KVCollectionCatalogEntry* const entry = it->second;
+ KVCollectionCatalogEntry* const entry = it->second;
- invariant(entry->getTotalIndexCount(opCtx) == entry->getCompletedIndexCount(opCtx));
+ invariant(entry->getTotalIndexCount(opCtx) == entry->getCompletedIndexCount(opCtx));
- {
- std::vector<std::string> indexNames;
- entry->getAllIndexes( opCtx, &indexNames );
- for ( size_t i = 0; i < indexNames.size(); i++ ) {
- entry->removeIndex( opCtx, indexNames[i] );
- }
+ {
+ std::vector<std::string> indexNames;
+ entry->getAllIndexes(opCtx, &indexNames);
+ for (size_t i = 0; i < indexNames.size(); i++) {
+ entry->removeIndex(opCtx, indexNames[i]);
}
+ }
- invariant( entry->getTotalIndexCount( opCtx ) == 0 );
-
- const std::string ident = _engine->getCatalog()->getCollectionIdent(ns);
+ invariant(entry->getTotalIndexCount(opCtx) == 0);
- Status status = _engine->getCatalog()->dropCollection(opCtx, ns);
- if (!status.isOK()) {
- return status;
- }
+ const std::string ident = _engine->getCatalog()->getCollectionIdent(ns);
- // This will lazily delete the KVCollectionCatalogEntry and notify the storageEngine to
- // drop the collection only on WUOW::commit().
- opCtx->recoveryUnit()->registerChange(new RemoveCollectionChange(opCtx,
- this,
- ns,
- ident,
- it->second,
- true));
+ Status status = _engine->getCatalog()->dropCollection(opCtx, ns);
+ if (!status.isOK()) {
+ return status;
+ }
- _collections.erase( ns.toString() );
+ // This will lazily delete the KVCollectionCatalogEntry and notify the storageEngine to
+ // drop the collection only on WUOW::commit().
+ opCtx->recoveryUnit()->registerChange(
+ new RemoveCollectionChange(opCtx, this, ns, ident, it->second, true));
- return Status::OK();
- }
+ _collections.erase(ns.toString());
+ return Status::OK();
+}
}
diff --git a/src/mongo/db/storage/kv/kv_database_catalog_entry.h b/src/mongo/db/storage/kv/kv_database_catalog_entry.h
index 2bda9cc9afa..8cfd35f503b 100644
--- a/src/mongo/db/storage/kv/kv_database_catalog_entry.h
+++ b/src/mongo/db/storage/kv/kv_database_catalog_entry.h
@@ -37,69 +37,66 @@
namespace mongo {
- class KVCollectionCatalogEntry;
- class KVStorageEngine;
+class KVCollectionCatalogEntry;
+class KVStorageEngine;
- class KVDatabaseCatalogEntry : public DatabaseCatalogEntry {
- public:
- KVDatabaseCatalogEntry( StringData db, KVStorageEngine* engine );
- virtual ~KVDatabaseCatalogEntry();
+class KVDatabaseCatalogEntry : public DatabaseCatalogEntry {
+public:
+ KVDatabaseCatalogEntry(StringData db, KVStorageEngine* engine);
+ virtual ~KVDatabaseCatalogEntry();
- virtual bool exists() const;
- virtual bool isEmpty() const;
- virtual bool hasUserData() const;
+ virtual bool exists() const;
+ virtual bool isEmpty() const;
+ virtual bool hasUserData() const;
- virtual int64_t sizeOnDisk( OperationContext* opCtx ) const;
+ virtual int64_t sizeOnDisk(OperationContext* opCtx) const;
- virtual void appendExtraStats( OperationContext* opCtx,
- BSONObjBuilder* out,
- double scale ) const;
+ virtual void appendExtraStats(OperationContext* opCtx, BSONObjBuilder* out, double scale) const;
- virtual bool isOlderThan24( OperationContext* opCtx ) const { return false; }
- virtual void markIndexSafe24AndUp( OperationContext* opCtx ) {}
+ virtual bool isOlderThan24(OperationContext* opCtx) const {
+ return false;
+ }
+ virtual void markIndexSafe24AndUp(OperationContext* opCtx) {}
- virtual bool currentFilesCompatible( OperationContext* opCtx ) const;
+ virtual bool currentFilesCompatible(OperationContext* opCtx) const;
- virtual void getCollectionNamespaces( std::list<std::string>* out ) const;
+ virtual void getCollectionNamespaces(std::list<std::string>* out) const;
- virtual CollectionCatalogEntry* getCollectionCatalogEntry( StringData ns ) const;
+ virtual CollectionCatalogEntry* getCollectionCatalogEntry(StringData ns) const;
- virtual RecordStore* getRecordStore( StringData ns ) const;
+ virtual RecordStore* getRecordStore(StringData ns) const;
- virtual IndexAccessMethod* getIndex( OperationContext* txn,
- const CollectionCatalogEntry* collection,
- IndexCatalogEntry* index );
+ virtual IndexAccessMethod* getIndex(OperationContext* txn,
+ const CollectionCatalogEntry* collection,
+ IndexCatalogEntry* index);
- virtual Status createCollection( OperationContext* txn,
- StringData ns,
- const CollectionOptions& options,
- bool allocateDefaultSpace );
+ virtual Status createCollection(OperationContext* txn,
+ StringData ns,
+ const CollectionOptions& options,
+ bool allocateDefaultSpace);
- virtual Status renameCollection( OperationContext* txn,
- StringData fromNS,
- StringData toNS,
- bool stayTemp );
+ virtual Status renameCollection(OperationContext* txn,
+ StringData fromNS,
+ StringData toNS,
+ bool stayTemp);
- virtual Status dropCollection( OperationContext* opCtx,
- StringData ns );
+ virtual Status dropCollection(OperationContext* opCtx, StringData ns);
- // --------------
+ // --------------
- void initCollection( OperationContext* opCtx,
- const std::string& ns,
- bool forRepair );
+ void initCollection(OperationContext* opCtx, const std::string& ns, bool forRepair);
- void initCollectionBeforeRepair(OperationContext* opCtx, const std::string& ns);
- void reinitCollectionAfterRepair(OperationContext* opCtx, const std::string& ns);
+ void initCollectionBeforeRepair(OperationContext* opCtx, const std::string& ns);
+ void reinitCollectionAfterRepair(OperationContext* opCtx, const std::string& ns);
- private:
- class AddCollectionChange;
- class RemoveCollectionChange;
+private:
+ class AddCollectionChange;
+ class RemoveCollectionChange;
- typedef std::map<std::string, KVCollectionCatalogEntry*> CollectionMap;
+ typedef std::map<std::string, KVCollectionCatalogEntry*> CollectionMap;
- KVStorageEngine* const _engine; // not owned here
- CollectionMap _collections;
- };
+ KVStorageEngine* const _engine; // not owned here
+ CollectionMap _collections;
+};
}
diff --git a/src/mongo/db/storage/kv/kv_database_catalog_entry_get_index.cpp b/src/mongo/db/storage/kv/kv_database_catalog_entry_get_index.cpp
index 3ca8f8e5631..4df30c2b98a 100644
--- a/src/mongo/db/storage/kv/kv_database_catalog_entry_get_index.cpp
+++ b/src/mongo/db/storage/kv/kv_database_catalog_entry_get_index.cpp
@@ -49,40 +49,37 @@
namespace mongo {
- IndexAccessMethod* KVDatabaseCatalogEntry::getIndex( OperationContext* txn,
- const CollectionCatalogEntry* collection,
- IndexCatalogEntry* index ) {
- IndexDescriptor* desc = index->descriptor();
+IndexAccessMethod* KVDatabaseCatalogEntry::getIndex(OperationContext* txn,
+ const CollectionCatalogEntry* collection,
+ IndexCatalogEntry* index) {
+ IndexDescriptor* desc = index->descriptor();
- const string& type = desc->getAccessMethodName();
+ const string& type = desc->getAccessMethodName();
- string ident = _engine->getCatalog()->getIndexIdent( txn,
- collection->ns().ns(),
- desc->indexName() );
+ string ident =
+ _engine->getCatalog()->getIndexIdent(txn, collection->ns().ns(), desc->indexName());
- SortedDataInterface* sdi =
- _engine->getEngine()->getSortedDataInterface( txn, ident, desc );
+ SortedDataInterface* sdi = _engine->getEngine()->getSortedDataInterface(txn, ident, desc);
- if ("" == type)
- return new BtreeAccessMethod( index, sdi );
+ if ("" == type)
+ return new BtreeAccessMethod(index, sdi);
- if (IndexNames::HASHED == type)
- return new HashAccessMethod( index, sdi );
+ if (IndexNames::HASHED == type)
+ return new HashAccessMethod(index, sdi);
- if (IndexNames::GEO_2DSPHERE == type)
- return new S2AccessMethod( index, sdi );
+ if (IndexNames::GEO_2DSPHERE == type)
+ return new S2AccessMethod(index, sdi);
- if (IndexNames::TEXT == type)
- return new FTSAccessMethod( index, sdi );
+ if (IndexNames::TEXT == type)
+ return new FTSAccessMethod(index, sdi);
- if (IndexNames::GEO_HAYSTACK == type)
- return new HaystackAccessMethod( index, sdi );
+ if (IndexNames::GEO_HAYSTACK == type)
+ return new HaystackAccessMethod(index, sdi);
- if (IndexNames::GEO_2D == type)
- return new TwoDAccessMethod( index, sdi );
-
- log() << "Can't find index for keyPattern " << desc->keyPattern();
- invariant( false );
- }
+ if (IndexNames::GEO_2D == type)
+ return new TwoDAccessMethod(index, sdi);
+ log() << "Can't find index for keyPattern " << desc->keyPattern();
+ invariant(false);
+}
}
diff --git a/src/mongo/db/storage/kv/kv_database_catalog_entry_get_index_mock.cpp b/src/mongo/db/storage/kv/kv_database_catalog_entry_get_index_mock.cpp
index 7ab8760db3a..6b453609a24 100644
--- a/src/mongo/db/storage/kv/kv_database_catalog_entry_get_index_mock.cpp
+++ b/src/mongo/db/storage/kv/kv_database_catalog_entry_get_index_mock.cpp
@@ -34,11 +34,10 @@
namespace mongo {
- // Used to satisfy link dependencies in unit test - not invoked.
- IndexAccessMethod* KVDatabaseCatalogEntry::getIndex(OperationContext* txn,
- const CollectionCatalogEntry* collection,
- IndexCatalogEntry* index) {
- invariant( false );
- }
-
+// Used to satisfy link dependencies in unit test - not invoked.
+IndexAccessMethod* KVDatabaseCatalogEntry::getIndex(OperationContext* txn,
+ const CollectionCatalogEntry* collection,
+ IndexCatalogEntry* index) {
+ invariant(false);
+}
}
diff --git a/src/mongo/db/storage/kv/kv_database_catalog_entry_test.cpp b/src/mongo/db/storage/kv/kv_database_catalog_entry_test.cpp
index 798add81b64..f9027d721c8 100644
--- a/src/mongo/db/storage/kv/kv_database_catalog_entry_test.cpp
+++ b/src/mongo/db/storage/kv/kv_database_catalog_entry_test.cpp
@@ -39,57 +39,57 @@
namespace {
- using namespace mongo;
+using namespace mongo;
- TEST(KVDatabaseCatalogEntryTest, CreateCollectionValidNamespace) {
- KVStorageEngine storageEngine(new DevNullKVEngine());
- storageEngine.finishInit();
- KVDatabaseCatalogEntry dbEntry("mydb", &storageEngine);
- OperationContextNoop ctx;
- ASSERT_OK(dbEntry.createCollection(&ctx, "mydb.mycoll", CollectionOptions(), true));
- std::list<std::string> collectionNamespaces;
- dbEntry.getCollectionNamespaces(&collectionNamespaces);
- ASSERT_FALSE(collectionNamespaces.empty());
- }
+TEST(KVDatabaseCatalogEntryTest, CreateCollectionValidNamespace) {
+ KVStorageEngine storageEngine(new DevNullKVEngine());
+ storageEngine.finishInit();
+ KVDatabaseCatalogEntry dbEntry("mydb", &storageEngine);
+ OperationContextNoop ctx;
+ ASSERT_OK(dbEntry.createCollection(&ctx, "mydb.mycoll", CollectionOptions(), true));
+ std::list<std::string> collectionNamespaces;
+ dbEntry.getCollectionNamespaces(&collectionNamespaces);
+ ASSERT_FALSE(collectionNamespaces.empty());
+}
- TEST(KVDatabaseCatalogEntryTest, CreateCollectionEmptyNamespace) {
- KVStorageEngine storageEngine(new DevNullKVEngine());
- storageEngine.finishInit();
- KVDatabaseCatalogEntry dbEntry("mydb", &storageEngine);
- OperationContextNoop ctx;
- ASSERT_NOT_OK(dbEntry.createCollection(&ctx, "", CollectionOptions(), true));
- std::list<std::string> collectionNamespaces;
- dbEntry.getCollectionNamespaces(&collectionNamespaces);
- ASSERT_TRUE(collectionNamespaces.empty());
- }
+TEST(KVDatabaseCatalogEntryTest, CreateCollectionEmptyNamespace) {
+ KVStorageEngine storageEngine(new DevNullKVEngine());
+ storageEngine.finishInit();
+ KVDatabaseCatalogEntry dbEntry("mydb", &storageEngine);
+ OperationContextNoop ctx;
+ ASSERT_NOT_OK(dbEntry.createCollection(&ctx, "", CollectionOptions(), true));
+ std::list<std::string> collectionNamespaces;
+ dbEntry.getCollectionNamespaces(&collectionNamespaces);
+ ASSERT_TRUE(collectionNamespaces.empty());
+}
- /**
- * Derived class of devnull KV engine where createRecordStore is overridden to fail
- * on an empty namespace (provided by the test).
- */
- class InvalidRecordStoreKVEngine : public DevNullKVEngine {
- public:
- virtual Status createRecordStore( OperationContext* opCtx,
- StringData ns,
- StringData ident,
- const CollectionOptions& options ) {
- if (ns == "fail.me") {
- return Status(ErrorCodes::BadValue, "failed to create record store");
- }
- return DevNullKVEngine::createRecordStore(opCtx, ns, ident, options);
+/**
+ * Derived class of devnull KV engine where createRecordStore is overridden to fail
+ * on an empty namespace (provided by the test).
+ */
+class InvalidRecordStoreKVEngine : public DevNullKVEngine {
+public:
+ virtual Status createRecordStore(OperationContext* opCtx,
+ StringData ns,
+ StringData ident,
+ const CollectionOptions& options) {
+ if (ns == "fail.me") {
+ return Status(ErrorCodes::BadValue, "failed to create record store");
}
- };
-
- // After createCollection fails, collection namespaces should remain empty.
- TEST(KVDatabaseCatalogEntryTest, CreateCollectionInvalidRecordStore) {
- KVStorageEngine storageEngine(new InvalidRecordStoreKVEngine());
- storageEngine.finishInit();
- KVDatabaseCatalogEntry dbEntry("fail", &storageEngine);
- OperationContextNoop ctx;
- ASSERT_NOT_OK(dbEntry.createCollection(&ctx, "fail.me", CollectionOptions(), true));
- std::list<std::string> collectionNamespaces;
- dbEntry.getCollectionNamespaces(&collectionNamespaces);
- ASSERT_TRUE(collectionNamespaces.empty());
+ return DevNullKVEngine::createRecordStore(opCtx, ns, ident, options);
}
+};
+
+// After createCollection fails, collection namespaces should remain empty.
+TEST(KVDatabaseCatalogEntryTest, CreateCollectionInvalidRecordStore) {
+ KVStorageEngine storageEngine(new InvalidRecordStoreKVEngine());
+ storageEngine.finishInit();
+ KVDatabaseCatalogEntry dbEntry("fail", &storageEngine);
+ OperationContextNoop ctx;
+ ASSERT_NOT_OK(dbEntry.createCollection(&ctx, "fail.me", CollectionOptions(), true));
+ std::list<std::string> collectionNamespaces;
+ dbEntry.getCollectionNamespaces(&collectionNamespaces);
+ ASSERT_TRUE(collectionNamespaces.empty());
+}
} // namespace
diff --git a/src/mongo/db/storage/kv/kv_engine.h b/src/mongo/db/storage/kv/kv_engine.h
index ccc127c1abd..bf73380e4e9 100644
--- a/src/mongo/db/storage/kv/kv_engine.h
+++ b/src/mongo/db/storage/kv/kv_engine.h
@@ -39,103 +39,100 @@
namespace mongo {
- class IndexDescriptor;
- class OperationContext;
- class RecordStore;
- class RecoveryUnit;
- class SortedDataInterface;
-
- class KVEngine {
- public:
-
- virtual RecoveryUnit* newRecoveryUnit() = 0;
-
- // ---------
-
- /**
- * Caller takes ownership
- * Having multiple out for the same ns is a rules violation;
- * Calling on a non-created ident is invalid and may crash.
- */
- virtual RecordStore* getRecordStore( OperationContext* opCtx,
- StringData ns,
+class IndexDescriptor;
+class OperationContext;
+class RecordStore;
+class RecoveryUnit;
+class SortedDataInterface;
+
+class KVEngine {
+public:
+ virtual RecoveryUnit* newRecoveryUnit() = 0;
+
+ // ---------
+
+ /**
+ * Caller takes ownership
+ * Having multiple out for the same ns is a rules violation;
+ * Calling on a non-created ident is invalid and may crash.
+ */
+ virtual RecordStore* getRecordStore(OperationContext* opCtx,
+ StringData ns,
+ StringData ident,
+ const CollectionOptions& options) = 0;
+
+ virtual SortedDataInterface* getSortedDataInterface(OperationContext* opCtx,
+ StringData ident,
+ const IndexDescriptor* desc) = 0;
+
+ //
+ // The create and drop methods on KVEngine are not transactional. Transactional semantics
+ // are provided by the KVStorageEngine code that calls these. For example, drop will be
+ // called if a create is rolled back. A higher-level drop operation will only propagate to a
+ // drop call on the KVEngine once the WUOW commits. Therefore drops will never be rolled
+ // back and it is safe to immediately reclaim storage.
+ //
+
+ virtual Status createRecordStore(OperationContext* opCtx,
+ StringData ns,
+ StringData ident,
+ const CollectionOptions& options) = 0;
+
+ virtual Status createSortedDataInterface(OperationContext* opCtx,
StringData ident,
- const CollectionOptions& options ) = 0;
-
- virtual SortedDataInterface* getSortedDataInterface( OperationContext* opCtx,
- StringData ident,
- const IndexDescriptor* desc ) = 0;
-
- //
- // The create and drop methods on KVEngine are not transactional. Transactional semantics
- // are provided by the KVStorageEngine code that calls these. For example, drop will be
- // called if a create is rolled back. A higher-level drop operation will only propagate to a
- // drop call on the KVEngine once the WUOW commits. Therefore drops will never be rolled
- // back and it is safe to immediately reclaim storage.
- //
-
- virtual Status createRecordStore( OperationContext* opCtx,
- StringData ns,
- StringData ident,
- const CollectionOptions& options ) = 0;
-
- virtual Status createSortedDataInterface( OperationContext* opCtx,
- StringData ident,
- const IndexDescriptor* desc ) = 0;
-
- virtual int64_t getIdentSize( OperationContext* opCtx,
- StringData ident ) = 0;
-
- virtual Status repairIdent( OperationContext* opCtx,
- StringData ident ) = 0;
-
- virtual Status dropIdent( OperationContext* opCtx,
- StringData ident ) = 0;
-
- // optional
- virtual int flushAllFiles( bool sync ) { return 0; }
-
- virtual bool isDurable() const = 0;
-
- /**
- * This must not change over the lifetime of the engine.
- */
- virtual bool supportsDocLocking() const = 0;
-
- /**
- * Returns true if storage engine supports --directoryperdb.
- * See:
- * http://docs.mongodb.org/manual/reference/program/mongod/#cmdoption--directoryperdb
- */
- virtual bool supportsDirectoryPerDB() const = 0;
-
- virtual Status okToRename( OperationContext* opCtx,
- StringData fromNS,
- StringData toNS,
- StringData ident,
- const RecordStore* originalRecordStore ) const {
- return Status::OK();
- }
-
- virtual bool hasIdent(OperationContext* opCtx, StringData ident) const = 0;
-
- virtual std::vector<std::string> getAllIdents( OperationContext* opCtx ) const = 0;
-
- /**
- * This method will be called before there is a clean shutdown. Storage engines should
- * override this method if they have clean-up to do that is different from unclean shutdown.
- * MongoDB will not call into the storage subsystem after calling this function.
- *
- * There is intentionally no uncleanShutdown().
- */
- virtual void cleanShutdown() = 0;
-
- /**
- * The destructor will never be called from mongod, but may be called from tests.
- * Engines may assume that this will only be called in the case of clean shutdown, even if
- * cleanShutdown() hasn't been called.
- */
- virtual ~KVEngine() {}
- };
-
+ const IndexDescriptor* desc) = 0;
+
+ virtual int64_t getIdentSize(OperationContext* opCtx, StringData ident) = 0;
+
+ virtual Status repairIdent(OperationContext* opCtx, StringData ident) = 0;
+
+ virtual Status dropIdent(OperationContext* opCtx, StringData ident) = 0;
+
+ // optional
+ virtual int flushAllFiles(bool sync) {
+ return 0;
+ }
+
+ virtual bool isDurable() const = 0;
+
+ /**
+ * This must not change over the lifetime of the engine.
+ */
+ virtual bool supportsDocLocking() const = 0;
+
+ /**
+ * Returns true if storage engine supports --directoryperdb.
+ * See:
+ * http://docs.mongodb.org/manual/reference/program/mongod/#cmdoption--directoryperdb
+ */
+ virtual bool supportsDirectoryPerDB() const = 0;
+
+ virtual Status okToRename(OperationContext* opCtx,
+ StringData fromNS,
+ StringData toNS,
+ StringData ident,
+ const RecordStore* originalRecordStore) const {
+ return Status::OK();
+ }
+
+ virtual bool hasIdent(OperationContext* opCtx, StringData ident) const = 0;
+
+ virtual std::vector<std::string> getAllIdents(OperationContext* opCtx) const = 0;
+
+ /**
+ * This method will be called before there is a clean shutdown. Storage engines should
+ * override this method if they have clean-up to do that is different from unclean shutdown.
+ * MongoDB will not call into the storage subsystem after calling this function.
+ *
+ * There is intentionally no uncleanShutdown().
+ */
+ virtual void cleanShutdown() = 0;
+
+ /**
+ * The destructor will never be called from mongod, but may be called from tests.
+ * Engines may assume that this will only be called in the case of clean shutdown, even if
+ * cleanShutdown() hasn't been called.
+ */
+ virtual ~KVEngine() {}
+};
}
diff --git a/src/mongo/db/storage/kv/kv_engine_test_harness.cpp b/src/mongo/db/storage/kv/kv_engine_test_harness.cpp
index 36aeefcf2c4..c225fb7ada3 100644
--- a/src/mongo/db/storage/kv/kv_engine_test_harness.cpp
+++ b/src/mongo/db/storage/kv/kv_engine_test_harness.cpp
@@ -41,372 +41,366 @@
namespace mongo {
- using std::unique_ptr;
- using std::string;
-
- namespace {
- class MyOperationContext : public OperationContextNoop {
- public:
- MyOperationContext( KVEngine* engine )
- : OperationContextNoop( engine->newRecoveryUnit() ) {
- }
- };
- }
-
- TEST( KVEngineTestHarness, SimpleRS1 ) {
- unique_ptr<KVHarnessHelper> helper( KVHarnessHelper::create() );
- KVEngine* engine = helper->getEngine();
- ASSERT( engine );
-
- string ns = "a.b";
- unique_ptr<RecordStore> rs;
- {
- MyOperationContext opCtx( engine );
- ASSERT_OK( engine->createRecordStore( &opCtx, ns, ns, CollectionOptions() ) );
- rs.reset( engine->getRecordStore( &opCtx, ns, ns, CollectionOptions() ) );
- ASSERT( rs );
- }
-
-
- RecordId loc;
- {
- MyOperationContext opCtx( engine );
- WriteUnitOfWork uow( &opCtx );
- StatusWith<RecordId> res = rs->insertRecord( &opCtx, "abc", 4, false );
- ASSERT_OK( res.getStatus() );
- loc = res.getValue();
- uow.commit();
- }
-
- {
- MyOperationContext opCtx( engine );
- ASSERT_EQUALS( string("abc"), rs->dataFor( &opCtx, loc ).data() );
- }
-
- {
- MyOperationContext opCtx( engine );
- std::vector<std::string> all = engine->getAllIdents( &opCtx );
- ASSERT_EQUALS( 1U, all.size() );
- ASSERT_EQUALS( ns, all[0] );
- }
+using std::unique_ptr;
+using std::string;
+
+namespace {
+class MyOperationContext : public OperationContextNoop {
+public:
+ MyOperationContext(KVEngine* engine) : OperationContextNoop(engine->newRecoveryUnit()) {}
+};
+}
+TEST(KVEngineTestHarness, SimpleRS1) {
+ unique_ptr<KVHarnessHelper> helper(KVHarnessHelper::create());
+ KVEngine* engine = helper->getEngine();
+ ASSERT(engine);
+
+ string ns = "a.b";
+ unique_ptr<RecordStore> rs;
+ {
+ MyOperationContext opCtx(engine);
+ ASSERT_OK(engine->createRecordStore(&opCtx, ns, ns, CollectionOptions()));
+ rs.reset(engine->getRecordStore(&opCtx, ns, ns, CollectionOptions()));
+ ASSERT(rs);
}
- TEST( KVEngineTestHarness, Restart1 ) {
- unique_ptr<KVHarnessHelper> helper( KVHarnessHelper::create() );
- KVEngine* engine = helper->getEngine();
- ASSERT( engine );
-
- string ns = "a.b";
-
- // 'loc' holds location of "abc" and is referenced after restarting engine.
- RecordId loc;
- {
- unique_ptr<RecordStore> rs;
- {
- MyOperationContext opCtx( engine );
- ASSERT_OK( engine->createRecordStore( &opCtx, ns, ns, CollectionOptions() ) );
- rs.reset( engine->getRecordStore( &opCtx, ns, ns, CollectionOptions() ) );
- ASSERT( rs );
- }
-
- {
- MyOperationContext opCtx( engine );
- WriteUnitOfWork uow( &opCtx );
- StatusWith<RecordId> res = rs->insertRecord( &opCtx, "abc", 4, false );
- ASSERT_OK( res.getStatus() );
- loc = res.getValue();
- uow.commit();
- }
-
- {
- MyOperationContext opCtx( engine );
- ASSERT_EQUALS( string("abc"), rs->dataFor( &opCtx, loc ).data() );
- }
- }
- engine = helper->restartEngine();
+ RecordId loc;
+ {
+ MyOperationContext opCtx(engine);
+ WriteUnitOfWork uow(&opCtx);
+ StatusWith<RecordId> res = rs->insertRecord(&opCtx, "abc", 4, false);
+ ASSERT_OK(res.getStatus());
+ loc = res.getValue();
+ uow.commit();
+ }
- {
- unique_ptr<RecordStore> rs;
- MyOperationContext opCtx( engine );
- rs.reset( engine->getRecordStore( &opCtx, ns, ns, CollectionOptions() ) );
- ASSERT_EQUALS( string("abc"), rs->dataFor( &opCtx, loc ).data() );
- }
+ {
+ MyOperationContext opCtx(engine);
+ ASSERT_EQUALS(string("abc"), rs->dataFor(&opCtx, loc).data());
+ }
+ {
+ MyOperationContext opCtx(engine);
+ std::vector<std::string> all = engine->getAllIdents(&opCtx);
+ ASSERT_EQUALS(1U, all.size());
+ ASSERT_EQUALS(ns, all[0]);
}
+}
+TEST(KVEngineTestHarness, Restart1) {
+ unique_ptr<KVHarnessHelper> helper(KVHarnessHelper::create());
+ KVEngine* engine = helper->getEngine();
+ ASSERT(engine);
- TEST( KVEngineTestHarness, SimpleSorted1 ) {
- unique_ptr<KVHarnessHelper> helper( KVHarnessHelper::create() );
- KVEngine* engine = helper->getEngine();
- ASSERT( engine );
+ string ns = "a.b";
- string ident = "abc";
- IndexDescriptor desc( NULL, "", BSON( "key" << BSON( "a" << 1 ) ) );
- unique_ptr<SortedDataInterface> sorted;
+ // 'loc' holds location of "abc" and is referenced after restarting engine.
+ RecordId loc;
+ {
+ unique_ptr<RecordStore> rs;
{
- MyOperationContext opCtx( engine );
- ASSERT_OK( engine->createSortedDataInterface( &opCtx, ident, &desc ) );
- sorted.reset( engine->getSortedDataInterface( &opCtx, ident, &desc ) );
- ASSERT( sorted );
+ MyOperationContext opCtx(engine);
+ ASSERT_OK(engine->createRecordStore(&opCtx, ns, ns, CollectionOptions()));
+ rs.reset(engine->getRecordStore(&opCtx, ns, ns, CollectionOptions()));
+ ASSERT(rs);
}
{
- MyOperationContext opCtx( engine );
- WriteUnitOfWork uow( &opCtx );
- ASSERT_OK( sorted->insert( &opCtx, BSON( "" << 5 ), RecordId( 6, 4 ), true ) );
+ MyOperationContext opCtx(engine);
+ WriteUnitOfWork uow(&opCtx);
+ StatusWith<RecordId> res = rs->insertRecord(&opCtx, "abc", 4, false);
+ ASSERT_OK(res.getStatus());
+ loc = res.getValue();
uow.commit();
}
{
- MyOperationContext opCtx( engine );
- ASSERT_EQUALS( 1, sorted->numEntries( &opCtx ) );
+ MyOperationContext opCtx(engine);
+ ASSERT_EQUALS(string("abc"), rs->dataFor(&opCtx, loc).data());
}
-
}
- TEST( KVCatalogTest, Coll1 ) {
- unique_ptr<KVHarnessHelper> helper( KVHarnessHelper::create() );
- KVEngine* engine = helper->getEngine();
+ engine = helper->restartEngine();
+ {
unique_ptr<RecordStore> rs;
- unique_ptr<KVCatalog> catalog;
- {
- MyOperationContext opCtx( engine );
- WriteUnitOfWork uow( &opCtx );
- ASSERT_OK( engine->createRecordStore( &opCtx, "catalog", "catalog", CollectionOptions() ) );
- rs.reset( engine->getRecordStore( &opCtx, "catalog", "catalog", CollectionOptions() ) );
- catalog.reset( new KVCatalog( rs.get(), true, false, false) );
- uow.commit();
- }
+ MyOperationContext opCtx(engine);
+ rs.reset(engine->getRecordStore(&opCtx, ns, ns, CollectionOptions()));
+ ASSERT_EQUALS(string("abc"), rs->dataFor(&opCtx, loc).data());
+ }
+}
- {
- MyOperationContext opCtx( engine );
- WriteUnitOfWork uow( &opCtx );
- ASSERT_OK( catalog->newCollection( &opCtx, "a.b", CollectionOptions() ) );
- ASSERT_NOT_EQUALS( "a.b", catalog->getCollectionIdent( "a.b" ) );
- uow.commit();
- }
- string ident = catalog->getCollectionIdent( "a.b" );
- {
- MyOperationContext opCtx( engine );
- WriteUnitOfWork uow( &opCtx );
- catalog.reset( new KVCatalog( rs.get(), true, false, false) );
- catalog->init( &opCtx );
- uow.commit();
- }
- ASSERT_EQUALS( ident, catalog->getCollectionIdent( "a.b" ) );
+TEST(KVEngineTestHarness, SimpleSorted1) {
+ unique_ptr<KVHarnessHelper> helper(KVHarnessHelper::create());
+ KVEngine* engine = helper->getEngine();
+ ASSERT(engine);
- {
- MyOperationContext opCtx( engine );
- WriteUnitOfWork uow( &opCtx );
- catalog->dropCollection( &opCtx, "a.b" );
- catalog->newCollection( &opCtx, "a.b", CollectionOptions() );
- uow.commit();
- }
- ASSERT_NOT_EQUALS( ident, catalog->getCollectionIdent( "a.b" ) );
+ string ident = "abc";
+ IndexDescriptor desc(NULL, "", BSON("key" << BSON("a" << 1)));
+ unique_ptr<SortedDataInterface> sorted;
+ {
+ MyOperationContext opCtx(engine);
+ ASSERT_OK(engine->createSortedDataInterface(&opCtx, ident, &desc));
+ sorted.reset(engine->getSortedDataInterface(&opCtx, ident, &desc));
+ ASSERT(sorted);
}
+ {
+ MyOperationContext opCtx(engine);
+ WriteUnitOfWork uow(&opCtx);
+ ASSERT_OK(sorted->insert(&opCtx, BSON("" << 5), RecordId(6, 4), true));
+ uow.commit();
+ }
- TEST( KVCatalogTest, Idx1 ) {
- unique_ptr<KVHarnessHelper> helper( KVHarnessHelper::create() );
- KVEngine* engine = helper->getEngine();
-
- unique_ptr<RecordStore> rs;
- unique_ptr<KVCatalog> catalog;
- {
- MyOperationContext opCtx( engine );
- WriteUnitOfWork uow( &opCtx );
- ASSERT_OK( engine->createRecordStore( &opCtx, "catalog", "catalog", CollectionOptions() ) );
- rs.reset( engine->getRecordStore( &opCtx, "catalog", "catalog", CollectionOptions() ) );
- catalog.reset( new KVCatalog( rs.get(), true, false, false) );
- uow.commit();
- }
-
- {
- MyOperationContext opCtx( engine );
- WriteUnitOfWork uow( &opCtx );
- ASSERT_OK( catalog->newCollection( &opCtx, "a.b", CollectionOptions() ) );
- ASSERT_NOT_EQUALS( "a.b", catalog->getCollectionIdent( "a.b" ) );
- ASSERT_TRUE( catalog->isUserDataIdent( catalog->getCollectionIdent( "a.b" ) ) );
- uow.commit();
- }
+ {
+ MyOperationContext opCtx(engine);
+ ASSERT_EQUALS(1, sorted->numEntries(&opCtx));
+ }
+}
- {
- MyOperationContext opCtx( engine );
- WriteUnitOfWork uow( &opCtx );
-
- BSONCollectionCatalogEntry::MetaData md;
- md.ns ="a.b";
- md.indexes.push_back( BSONCollectionCatalogEntry::IndexMetaData( BSON( "name" << "foo" ),
- false,
- RecordId(),
- false ) );
- catalog->putMetaData( &opCtx, "a.b", md );
- uow.commit();
- }
+TEST(KVCatalogTest, Coll1) {
+ unique_ptr<KVHarnessHelper> helper(KVHarnessHelper::create());
+ KVEngine* engine = helper->getEngine();
+
+ unique_ptr<RecordStore> rs;
+ unique_ptr<KVCatalog> catalog;
+ {
+ MyOperationContext opCtx(engine);
+ WriteUnitOfWork uow(&opCtx);
+ ASSERT_OK(engine->createRecordStore(&opCtx, "catalog", "catalog", CollectionOptions()));
+ rs.reset(engine->getRecordStore(&opCtx, "catalog", "catalog", CollectionOptions()));
+ catalog.reset(new KVCatalog(rs.get(), true, false, false));
+ uow.commit();
+ }
- string idxIndent;
- {
- MyOperationContext opCtx( engine );
- idxIndent = catalog->getIndexIdent( &opCtx, "a.b", "foo" );
- }
+ {
+ MyOperationContext opCtx(engine);
+ WriteUnitOfWork uow(&opCtx);
+ ASSERT_OK(catalog->newCollection(&opCtx, "a.b", CollectionOptions()));
+ ASSERT_NOT_EQUALS("a.b", catalog->getCollectionIdent("a.b"));
+ uow.commit();
+ }
- {
- MyOperationContext opCtx( engine );
- ASSERT_EQUALS( idxIndent, catalog->getIndexIdent( &opCtx, "a.b", "foo" ) );
- ASSERT_TRUE( catalog->isUserDataIdent( catalog->getIndexIdent( &opCtx, "a.b", "foo" ) ) );
- }
+ string ident = catalog->getCollectionIdent("a.b");
+ {
+ MyOperationContext opCtx(engine);
+ WriteUnitOfWork uow(&opCtx);
+ catalog.reset(new KVCatalog(rs.get(), true, false, false));
+ catalog->init(&opCtx);
+ uow.commit();
+ }
+ ASSERT_EQUALS(ident, catalog->getCollectionIdent("a.b"));
+
+ {
+ MyOperationContext opCtx(engine);
+ WriteUnitOfWork uow(&opCtx);
+ catalog->dropCollection(&opCtx, "a.b");
+ catalog->newCollection(&opCtx, "a.b", CollectionOptions());
+ uow.commit();
+ }
+ ASSERT_NOT_EQUALS(ident, catalog->getCollectionIdent("a.b"));
+}
- {
- MyOperationContext opCtx( engine );
- WriteUnitOfWork uow( &opCtx );
-
- BSONCollectionCatalogEntry::MetaData md;
- md.ns ="a.b";
- catalog->putMetaData( &opCtx, "a.b", md ); // remove index
- md.indexes.push_back( BSONCollectionCatalogEntry::IndexMetaData( BSON( "name" << "foo" ),
- false,
- RecordId(),
- false ) );
- catalog->putMetaData( &opCtx, "a.b", md );
- uow.commit();
- }
- {
- MyOperationContext opCtx( engine );
- ASSERT_NOT_EQUALS( idxIndent, catalog->getIndexIdent( &opCtx, "a.b", "foo" ) );
- }
+TEST(KVCatalogTest, Idx1) {
+ unique_ptr<KVHarnessHelper> helper(KVHarnessHelper::create());
+ KVEngine* engine = helper->getEngine();
+ unique_ptr<RecordStore> rs;
+ unique_ptr<KVCatalog> catalog;
+ {
+ MyOperationContext opCtx(engine);
+ WriteUnitOfWork uow(&opCtx);
+ ASSERT_OK(engine->createRecordStore(&opCtx, "catalog", "catalog", CollectionOptions()));
+ rs.reset(engine->getRecordStore(&opCtx, "catalog", "catalog", CollectionOptions()));
+ catalog.reset(new KVCatalog(rs.get(), true, false, false));
+ uow.commit();
}
- TEST( KVCatalogTest, DirectoryPerDb1 ) {
- unique_ptr<KVHarnessHelper> helper( KVHarnessHelper::create() );
- KVEngine* engine = helper->getEngine();
-
- unique_ptr<RecordStore> rs;
- unique_ptr<KVCatalog> catalog;
- {
- MyOperationContext opCtx( engine );
- WriteUnitOfWork uow( &opCtx );
- ASSERT_OK( engine->createRecordStore( &opCtx, "catalog", "catalog", CollectionOptions() ) );
- rs.reset( engine->getRecordStore( &opCtx, "catalog", "catalog", CollectionOptions() ) );
- catalog.reset( new KVCatalog( rs.get(), true, true, false) );
- uow.commit();
- }
-
- { // collection
- MyOperationContext opCtx( engine );
- WriteUnitOfWork uow( &opCtx );
- ASSERT_OK( catalog->newCollection( &opCtx, "a.b", CollectionOptions() ) );
- ASSERT_STRING_CONTAINS( catalog->getCollectionIdent( "a.b" ), "a/" );
- ASSERT_TRUE( catalog->isUserDataIdent( catalog->getCollectionIdent( "a.b" ) ) );
- uow.commit();
- }
+ {
+ MyOperationContext opCtx(engine);
+ WriteUnitOfWork uow(&opCtx);
+ ASSERT_OK(catalog->newCollection(&opCtx, "a.b", CollectionOptions()));
+ ASSERT_NOT_EQUALS("a.b", catalog->getCollectionIdent("a.b"));
+ ASSERT_TRUE(catalog->isUserDataIdent(catalog->getCollectionIdent("a.b")));
+ uow.commit();
+ }
- { // index
- MyOperationContext opCtx( engine );
- WriteUnitOfWork uow( &opCtx );
-
- BSONCollectionCatalogEntry::MetaData md;
- md.ns ="a.b";
- md.indexes.push_back( BSONCollectionCatalogEntry::IndexMetaData( BSON( "name" << "foo" ),
- false,
- RecordId(),
- false ) );
- catalog->putMetaData( &opCtx, "a.b", md );
- ASSERT_STRING_CONTAINS( catalog->getIndexIdent( &opCtx, "a.b", "foo" ), "a/" );
- ASSERT_TRUE( catalog->isUserDataIdent( catalog->getIndexIdent( &opCtx, "a.b", "foo" ) ) );
- uow.commit();
- }
+ {
+ MyOperationContext opCtx(engine);
+ WriteUnitOfWork uow(&opCtx);
+
+ BSONCollectionCatalogEntry::MetaData md;
+ md.ns = "a.b";
+ md.indexes.push_back(BSONCollectionCatalogEntry::IndexMetaData(BSON("name"
+ << "foo"),
+ false,
+ RecordId(),
+ false));
+ catalog->putMetaData(&opCtx, "a.b", md);
+ uow.commit();
+ }
+ string idxIndent;
+ {
+ MyOperationContext opCtx(engine);
+ idxIndent = catalog->getIndexIdent(&opCtx, "a.b", "foo");
}
- TEST( KVCatalogTest, Split1 ) {
- unique_ptr<KVHarnessHelper> helper( KVHarnessHelper::create() );
- KVEngine* engine = helper->getEngine();
+ {
+ MyOperationContext opCtx(engine);
+ ASSERT_EQUALS(idxIndent, catalog->getIndexIdent(&opCtx, "a.b", "foo"));
+ ASSERT_TRUE(catalog->isUserDataIdent(catalog->getIndexIdent(&opCtx, "a.b", "foo")));
+ }
- unique_ptr<RecordStore> rs;
- unique_ptr<KVCatalog> catalog;
- {
- MyOperationContext opCtx( engine );
- WriteUnitOfWork uow( &opCtx );
- ASSERT_OK( engine->createRecordStore( &opCtx, "catalog", "catalog", CollectionOptions() ) );
- rs.reset( engine->getRecordStore( &opCtx, "catalog", "catalog", CollectionOptions() ) );
- catalog.reset( new KVCatalog( rs.get(), true, false, true) );
- uow.commit();
- }
+ {
+ MyOperationContext opCtx(engine);
+ WriteUnitOfWork uow(&opCtx);
+
+ BSONCollectionCatalogEntry::MetaData md;
+ md.ns = "a.b";
+ catalog->putMetaData(&opCtx, "a.b", md); // remove index
+ md.indexes.push_back(BSONCollectionCatalogEntry::IndexMetaData(BSON("name"
+ << "foo"),
+ false,
+ RecordId(),
+ false));
+ catalog->putMetaData(&opCtx, "a.b", md);
+ uow.commit();
+ }
- {
- MyOperationContext opCtx( engine );
- WriteUnitOfWork uow( &opCtx );
- ASSERT_OK( catalog->newCollection( &opCtx, "a.b", CollectionOptions() ) );
- ASSERT_STRING_CONTAINS( catalog->getCollectionIdent( "a.b" ), "collection/" );
- ASSERT_TRUE( catalog->isUserDataIdent( catalog->getCollectionIdent( "a.b" ) ) );
- uow.commit();
- }
+ {
+ MyOperationContext opCtx(engine);
+ ASSERT_NOT_EQUALS(idxIndent, catalog->getIndexIdent(&opCtx, "a.b", "foo"));
+ }
+}
- { // index
- MyOperationContext opCtx( engine );
- WriteUnitOfWork uow( &opCtx );
-
- BSONCollectionCatalogEntry::MetaData md;
- md.ns ="a.b";
- md.indexes.push_back( BSONCollectionCatalogEntry::IndexMetaData( BSON( "name" << "foo" ),
- false,
- RecordId(),
- false ) );
- catalog->putMetaData( &opCtx, "a.b", md );
- ASSERT_STRING_CONTAINS( catalog->getIndexIdent( &opCtx, "a.b", "foo" ), "index/" );
- ASSERT_TRUE( catalog->isUserDataIdent( catalog->getIndexIdent( &opCtx, "a.b", "foo" ) ) );
- uow.commit();
- }
+TEST(KVCatalogTest, DirectoryPerDb1) {
+ unique_ptr<KVHarnessHelper> helper(KVHarnessHelper::create());
+ KVEngine* engine = helper->getEngine();
+
+ unique_ptr<RecordStore> rs;
+ unique_ptr<KVCatalog> catalog;
+ {
+ MyOperationContext opCtx(engine);
+ WriteUnitOfWork uow(&opCtx);
+ ASSERT_OK(engine->createRecordStore(&opCtx, "catalog", "catalog", CollectionOptions()));
+ rs.reset(engine->getRecordStore(&opCtx, "catalog", "catalog", CollectionOptions()));
+ catalog.reset(new KVCatalog(rs.get(), true, true, false));
+ uow.commit();
+ }
+ { // collection
+ MyOperationContext opCtx(engine);
+ WriteUnitOfWork uow(&opCtx);
+ ASSERT_OK(catalog->newCollection(&opCtx, "a.b", CollectionOptions()));
+ ASSERT_STRING_CONTAINS(catalog->getCollectionIdent("a.b"), "a/");
+ ASSERT_TRUE(catalog->isUserDataIdent(catalog->getCollectionIdent("a.b")));
+ uow.commit();
}
- TEST( KVCatalogTest, DirectoryPerAndSplit1 ) {
- unique_ptr<KVHarnessHelper> helper( KVHarnessHelper::create() );
- KVEngine* engine = helper->getEngine();
+ { // index
+ MyOperationContext opCtx(engine);
+ WriteUnitOfWork uow(&opCtx);
+
+ BSONCollectionCatalogEntry::MetaData md;
+ md.ns = "a.b";
+ md.indexes.push_back(BSONCollectionCatalogEntry::IndexMetaData(BSON("name"
+ << "foo"),
+ false,
+ RecordId(),
+ false));
+ catalog->putMetaData(&opCtx, "a.b", md);
+ ASSERT_STRING_CONTAINS(catalog->getIndexIdent(&opCtx, "a.b", "foo"), "a/");
+ ASSERT_TRUE(catalog->isUserDataIdent(catalog->getIndexIdent(&opCtx, "a.b", "foo")));
+ uow.commit();
+ }
+}
- unique_ptr<RecordStore> rs;
- unique_ptr<KVCatalog> catalog;
- {
- MyOperationContext opCtx( engine );
- WriteUnitOfWork uow( &opCtx );
- ASSERT_OK( engine->createRecordStore( &opCtx, "catalog", "catalog", CollectionOptions() ) );
- rs.reset( engine->getRecordStore( &opCtx, "catalog", "catalog", CollectionOptions() ) );
- catalog.reset( new KVCatalog( rs.get(), true, true, true) );
- uow.commit();
- }
+TEST(KVCatalogTest, Split1) {
+ unique_ptr<KVHarnessHelper> helper(KVHarnessHelper::create());
+ KVEngine* engine = helper->getEngine();
+
+ unique_ptr<RecordStore> rs;
+ unique_ptr<KVCatalog> catalog;
+ {
+ MyOperationContext opCtx(engine);
+ WriteUnitOfWork uow(&opCtx);
+ ASSERT_OK(engine->createRecordStore(&opCtx, "catalog", "catalog", CollectionOptions()));
+ rs.reset(engine->getRecordStore(&opCtx, "catalog", "catalog", CollectionOptions()));
+ catalog.reset(new KVCatalog(rs.get(), true, false, true));
+ uow.commit();
+ }
- {
- MyOperationContext opCtx( engine );
- WriteUnitOfWork uow( &opCtx );
- ASSERT_OK( catalog->newCollection( &opCtx, "a.b", CollectionOptions() ) );
- ASSERT_STRING_CONTAINS( catalog->getCollectionIdent( "a.b" ), "a/collection/" );
- ASSERT_TRUE( catalog->isUserDataIdent( catalog->getCollectionIdent( "a.b" ) ) );
- uow.commit();
- }
+ {
+ MyOperationContext opCtx(engine);
+ WriteUnitOfWork uow(&opCtx);
+ ASSERT_OK(catalog->newCollection(&opCtx, "a.b", CollectionOptions()));
+ ASSERT_STRING_CONTAINS(catalog->getCollectionIdent("a.b"), "collection/");
+ ASSERT_TRUE(catalog->isUserDataIdent(catalog->getCollectionIdent("a.b")));
+ uow.commit();
+ }
- { // index
- MyOperationContext opCtx( engine );
- WriteUnitOfWork uow( &opCtx );
-
- BSONCollectionCatalogEntry::MetaData md;
- md.ns ="a.b";
- md.indexes.push_back( BSONCollectionCatalogEntry::IndexMetaData( BSON( "name" << "foo" ),
- false,
- RecordId(),
- false ) );
- catalog->putMetaData( &opCtx, "a.b", md );
- ASSERT_STRING_CONTAINS( catalog->getIndexIdent( &opCtx, "a.b", "foo" ), "a/index/" );
- ASSERT_TRUE( catalog->isUserDataIdent( catalog->getIndexIdent( &opCtx, "a.b", "foo" ) ) );
- uow.commit();
- }
+ { // index
+ MyOperationContext opCtx(engine);
+ WriteUnitOfWork uow(&opCtx);
+
+ BSONCollectionCatalogEntry::MetaData md;
+ md.ns = "a.b";
+ md.indexes.push_back(BSONCollectionCatalogEntry::IndexMetaData(BSON("name"
+ << "foo"),
+ false,
+ RecordId(),
+ false));
+ catalog->putMetaData(&opCtx, "a.b", md);
+ ASSERT_STRING_CONTAINS(catalog->getIndexIdent(&opCtx, "a.b", "foo"), "index/");
+ ASSERT_TRUE(catalog->isUserDataIdent(catalog->getIndexIdent(&opCtx, "a.b", "foo")));
+ uow.commit();
+ }
+}
+TEST(KVCatalogTest, DirectoryPerAndSplit1) {
+ unique_ptr<KVHarnessHelper> helper(KVHarnessHelper::create());
+ KVEngine* engine = helper->getEngine();
+
+ unique_ptr<RecordStore> rs;
+ unique_ptr<KVCatalog> catalog;
+ {
+ MyOperationContext opCtx(engine);
+ WriteUnitOfWork uow(&opCtx);
+ ASSERT_OK(engine->createRecordStore(&opCtx, "catalog", "catalog", CollectionOptions()));
+ rs.reset(engine->getRecordStore(&opCtx, "catalog", "catalog", CollectionOptions()));
+ catalog.reset(new KVCatalog(rs.get(), true, true, true));
+ uow.commit();
}
+ {
+ MyOperationContext opCtx(engine);
+ WriteUnitOfWork uow(&opCtx);
+ ASSERT_OK(catalog->newCollection(&opCtx, "a.b", CollectionOptions()));
+ ASSERT_STRING_CONTAINS(catalog->getCollectionIdent("a.b"), "a/collection/");
+ ASSERT_TRUE(catalog->isUserDataIdent(catalog->getCollectionIdent("a.b")));
+ uow.commit();
+ }
+ { // index
+ MyOperationContext opCtx(engine);
+ WriteUnitOfWork uow(&opCtx);
+
+ BSONCollectionCatalogEntry::MetaData md;
+ md.ns = "a.b";
+ md.indexes.push_back(BSONCollectionCatalogEntry::IndexMetaData(BSON("name"
+ << "foo"),
+ false,
+ RecordId(),
+ false));
+ catalog->putMetaData(&opCtx, "a.b", md);
+ ASSERT_STRING_CONTAINS(catalog->getIndexIdent(&opCtx, "a.b", "foo"), "a/index/");
+ ASSERT_TRUE(catalog->isUserDataIdent(catalog->getIndexIdent(&opCtx, "a.b", "foo")));
+ uow.commit();
+ }
+}
}
diff --git a/src/mongo/db/storage/kv/kv_engine_test_harness.h b/src/mongo/db/storage/kv/kv_engine_test_harness.h
index fd828681cc7..15ed43a1249 100644
--- a/src/mongo/db/storage/kv/kv_engine_test_harness.h
+++ b/src/mongo/db/storage/kv/kv_engine_test_harness.h
@@ -33,15 +33,15 @@
#include "mongo/db/storage/kv/kv_engine.h"
namespace mongo {
- class KVHarnessHelper {
- public:
- virtual ~KVHarnessHelper(){}
+class KVHarnessHelper {
+public:
+ virtual ~KVHarnessHelper() {}
- // returns same thing for entire life
- virtual KVEngine* getEngine() = 0;
+ // returns same thing for entire life
+ virtual KVEngine* getEngine() = 0;
- virtual KVEngine* restartEngine() = 0;
+ virtual KVEngine* restartEngine() = 0;
- static KVHarnessHelper* create();
- };
+ static KVHarnessHelper* create();
+};
}
diff --git a/src/mongo/db/storage/kv/kv_storage_engine.cpp b/src/mongo/db/storage/kv/kv_storage_engine.cpp
index 15a17987ff7..8e4d63f843d 100644
--- a/src/mongo/db/storage/kv/kv_storage_engine.cpp
+++ b/src/mongo/db/storage/kv/kv_storage_engine.cpp
@@ -41,238 +41,222 @@
namespace mongo {
- using std::string;
- using std::vector;
+using std::string;
+using std::vector;
- namespace {
- const std::string catalogInfo = "_mdb_catalog";
- }
+namespace {
+const std::string catalogInfo = "_mdb_catalog";
+}
- class KVStorageEngine::RemoveDBChange : public RecoveryUnit::Change {
- public:
- RemoveDBChange(KVStorageEngine* engine, StringData db, KVDatabaseCatalogEntry* entry)
- : _engine(engine)
- , _db(db.toString())
- , _entry(entry)
- {}
+class KVStorageEngine::RemoveDBChange : public RecoveryUnit::Change {
+public:
+ RemoveDBChange(KVStorageEngine* engine, StringData db, KVDatabaseCatalogEntry* entry)
+ : _engine(engine), _db(db.toString()), _entry(entry) {}
- virtual void commit() {
- delete _entry;
- }
+ virtual void commit() {
+ delete _entry;
+ }
- virtual void rollback() {
- stdx::lock_guard<stdx::mutex> lk(_engine->_dbsLock);
- _engine->_dbs[_db] = _entry;
- }
+ virtual void rollback() {
+ stdx::lock_guard<stdx::mutex> lk(_engine->_dbsLock);
+ _engine->_dbs[_db] = _entry;
+ }
+
+ KVStorageEngine* const _engine;
+ const std::string _db;
+ KVDatabaseCatalogEntry* const _entry;
+};
- KVStorageEngine* const _engine;
- const std::string _db;
- KVDatabaseCatalogEntry* const _entry;
- };
+KVStorageEngine::KVStorageEngine(KVEngine* engine, const KVStorageEngineOptions& options)
+ : _options(options), _engine(engine), _supportsDocLocking(_engine->supportsDocLocking()) {
+ uassert(28601,
+ "Storage engine does not support --directoryperdb",
+ !(options.directoryPerDB && !engine->supportsDirectoryPerDB()));
- KVStorageEngine::KVStorageEngine( KVEngine* engine,
- const KVStorageEngineOptions& options )
- : _options( options )
- , _engine( engine )
- , _supportsDocLocking(_engine->supportsDocLocking()) {
+ OperationContextNoop opCtx(_engine->newRecoveryUnit());
- uassert(28601, "Storage engine does not support --directoryperdb",
- !(options.directoryPerDB && !engine->supportsDirectoryPerDB()));
+ if (options.forRepair && engine->hasIdent(&opCtx, catalogInfo)) {
+ log() << "Repairing catalog metadata";
+ // TODO should also validate all BSON in the catalog.
+ engine->repairIdent(&opCtx, catalogInfo);
+ }
- OperationContextNoop opCtx( _engine->newRecoveryUnit() );
+ {
+ WriteUnitOfWork uow(&opCtx);
- if (options.forRepair && engine->hasIdent(&opCtx, catalogInfo)) {
- log() << "Repairing catalog metadata";
- // TODO should also validate all BSON in the catalog.
- engine->repairIdent(&opCtx, catalogInfo);
+ Status status =
+ _engine->createRecordStore(&opCtx, catalogInfo, catalogInfo, CollectionOptions());
+ // BadValue is usually caused by invalid configuration string.
+ // We still fassert() but without a stack trace.
+ if (status.code() == ErrorCodes::BadValue) {
+ fassertFailedNoTrace(28562);
}
-
- {
- WriteUnitOfWork uow( &opCtx );
-
- Status status = _engine->createRecordStore( &opCtx,
- catalogInfo,
- catalogInfo,
- CollectionOptions() );
- // BadValue is usually caused by invalid configuration string.
- // We still fassert() but without a stack trace.
- if (status.code() == ErrorCodes::BadValue) {
- fassertFailedNoTrace(28562);
- }
- fassert( 28520, status );
-
- _catalogRecordStore.reset( _engine->getRecordStore( &opCtx,
- catalogInfo,
- catalogInfo,
- CollectionOptions() ) );
- _catalog.reset( new KVCatalog( _catalogRecordStore.get(),
- _supportsDocLocking,
- _options.directoryPerDB,
- _options.directoryForIndexes) );
- _catalog->init( &opCtx );
-
- std::vector<std::string> collections;
- _catalog->getAllCollections( &collections );
-
- for ( size_t i = 0; i < collections.size(); i++ ) {
- std::string coll = collections[i];
- NamespaceString nss( coll );
- string dbName = nss.db().toString();
-
- // No rollback since this is only for committed dbs.
- KVDatabaseCatalogEntry*& db = _dbs[dbName];
- if ( !db ) {
- db = new KVDatabaseCatalogEntry( dbName, this );
- }
-
- db->initCollection( &opCtx, coll, options.forRepair );
+ fassert(28520, status);
+
+ _catalogRecordStore.reset(
+ _engine->getRecordStore(&opCtx, catalogInfo, catalogInfo, CollectionOptions()));
+ _catalog.reset(new KVCatalog(_catalogRecordStore.get(),
+ _supportsDocLocking,
+ _options.directoryPerDB,
+ _options.directoryForIndexes));
+ _catalog->init(&opCtx);
+
+ std::vector<std::string> collections;
+ _catalog->getAllCollections(&collections);
+
+ for (size_t i = 0; i < collections.size(); i++) {
+ std::string coll = collections[i];
+ NamespaceString nss(coll);
+ string dbName = nss.db().toString();
+
+ // No rollback since this is only for committed dbs.
+ KVDatabaseCatalogEntry*& db = _dbs[dbName];
+ if (!db) {
+ db = new KVDatabaseCatalogEntry(dbName, this);
}
- uow.commit();
+ db->initCollection(&opCtx, coll, options.forRepair);
}
- opCtx.recoveryUnit()->abandonSnapshot();
+ uow.commit();
+ }
+
+ opCtx.recoveryUnit()->abandonSnapshot();
- // now clean up orphaned idents
+ // now clean up orphaned idents
+ {
+ // get all idents
+ std::set<std::string> allIdents;
{
- // get all idents
- std::set<std::string> allIdents;
- {
- std::vector<std::string> v = _engine->getAllIdents( &opCtx );
- allIdents.insert( v.begin(), v.end() );
- allIdents.erase( catalogInfo );
- }
+ std::vector<std::string> v = _engine->getAllIdents(&opCtx);
+ allIdents.insert(v.begin(), v.end());
+ allIdents.erase(catalogInfo);
+ }
- // remove ones still in use
- {
- vector<string> idents = _catalog->getAllIdents( &opCtx );
- for ( size_t i = 0; i < idents.size(); i++ ) {
- allIdents.erase( idents[i] );
- }
+ // remove ones still in use
+ {
+ vector<string> idents = _catalog->getAllIdents(&opCtx);
+ for (size_t i = 0; i < idents.size(); i++) {
+ allIdents.erase(idents[i]);
}
+ }
- for ( std::set<std::string>::const_iterator it = allIdents.begin();
- it != allIdents.end();
- ++it ) {
- const std::string& toRemove = *it;
- if ( !_catalog->isUserDataIdent( toRemove ) )
- continue;
- log() << "dropping unused ident: " << toRemove;
- WriteUnitOfWork wuow( &opCtx );
- _engine->dropIdent( &opCtx, toRemove );
- wuow.commit();
- }
+ for (std::set<std::string>::const_iterator it = allIdents.begin(); it != allIdents.end();
+ ++it) {
+ const std::string& toRemove = *it;
+ if (!_catalog->isUserDataIdent(toRemove))
+ continue;
+ log() << "dropping unused ident: " << toRemove;
+ WriteUnitOfWork wuow(&opCtx);
+ _engine->dropIdent(&opCtx, toRemove);
+ wuow.commit();
}
+ }
+}
+void KVStorageEngine::cleanShutdown() {
+ for (DBMap::const_iterator it = _dbs.begin(); it != _dbs.end(); ++it) {
+ delete it->second;
}
+ _dbs.clear();
- void KVStorageEngine::cleanShutdown() {
+ _catalog.reset(NULL);
+ _catalogRecordStore.reset(NULL);
- for ( DBMap::const_iterator it = _dbs.begin(); it != _dbs.end(); ++it ) {
- delete it->second;
- }
- _dbs.clear();
+ _engine->cleanShutdown();
+ // intentionally not deleting _engine
+}
- _catalog.reset( NULL );
- _catalogRecordStore.reset( NULL );
+KVStorageEngine::~KVStorageEngine() {}
- _engine->cleanShutdown();
- // intentionally not deleting _engine
- }
+void KVStorageEngine::finishInit() {}
- KVStorageEngine::~KVStorageEngine() {
+RecoveryUnit* KVStorageEngine::newRecoveryUnit() {
+ if (!_engine) {
+ // shutdown
+ return NULL;
}
+ return _engine->newRecoveryUnit();
+}
- void KVStorageEngine::finishInit() {
- }
-
- RecoveryUnit* KVStorageEngine::newRecoveryUnit() {
- if ( !_engine ) {
- // shutdown
- return NULL;
- }
- return _engine->newRecoveryUnit();
+void KVStorageEngine::listDatabases(std::vector<std::string>* out) const {
+ stdx::lock_guard<stdx::mutex> lk(_dbsLock);
+ for (DBMap::const_iterator it = _dbs.begin(); it != _dbs.end(); ++it) {
+ if (it->second->isEmpty())
+ continue;
+ out->push_back(it->first);
}
+}
- void KVStorageEngine::listDatabases( std::vector<std::string>* out ) const {
- stdx::lock_guard<stdx::mutex> lk( _dbsLock );
- for ( DBMap::const_iterator it = _dbs.begin(); it != _dbs.end(); ++it ) {
- if ( it->second->isEmpty() )
- continue;
- out->push_back( it->first );
- }
+DatabaseCatalogEntry* KVStorageEngine::getDatabaseCatalogEntry(OperationContext* opCtx,
+ StringData dbName) {
+ stdx::lock_guard<stdx::mutex> lk(_dbsLock);
+ KVDatabaseCatalogEntry*& db = _dbs[dbName.toString()];
+ if (!db) {
+ // Not registering change since db creation is implicit and never rolled back.
+ db = new KVDatabaseCatalogEntry(dbName, this);
}
+ return db;
+}
- DatabaseCatalogEntry* KVStorageEngine::getDatabaseCatalogEntry( OperationContext* opCtx,
- StringData dbName ) {
- stdx::lock_guard<stdx::mutex> lk( _dbsLock );
- KVDatabaseCatalogEntry*& db = _dbs[dbName.toString()];
- if ( !db ) {
- // Not registering change since db creation is implicit and never rolled back.
- db = new KVDatabaseCatalogEntry( dbName, this );
- }
- return db;
- }
+Status KVStorageEngine::closeDatabase(OperationContext* txn, StringData db) {
+ // This is ok to be a no-op as there is no database layer in kv.
+ return Status::OK();
+}
- Status KVStorageEngine::closeDatabase( OperationContext* txn, StringData db ) {
- // This is ok to be a no-op as there is no database layer in kv.
- return Status::OK();
+Status KVStorageEngine::dropDatabase(OperationContext* txn, StringData db) {
+ KVDatabaseCatalogEntry* entry;
+ {
+ stdx::lock_guard<stdx::mutex> lk(_dbsLock);
+ DBMap::const_iterator it = _dbs.find(db.toString());
+ if (it == _dbs.end())
+ return Status(ErrorCodes::NamespaceNotFound, "db not found to drop");
+ entry = it->second;
}
- Status KVStorageEngine::dropDatabase( OperationContext* txn, StringData db ) {
-
- KVDatabaseCatalogEntry* entry;
- {
- stdx::lock_guard<stdx::mutex> lk( _dbsLock );
- DBMap::const_iterator it = _dbs.find( db.toString() );
- if ( it == _dbs.end() )
- return Status( ErrorCodes::NamespaceNotFound, "db not found to drop" );
- entry = it->second;
- }
-
- // This is called outside of a WUOW since MMAPv1 has unfortunate behavior around dropping
- // databases. We need to create one here since we want db dropping to all-or-nothing
- // wherever possible. Eventually we want to move this up so that it can include the logOp
- // inside of the WUOW, but that would require making DB dropping happen inside the Dur
- // system for MMAPv1.
- WriteUnitOfWork wuow(txn);
-
- std::list<std::string> toDrop;
- entry->getCollectionNamespaces( &toDrop );
+ // This is called outside of a WUOW since MMAPv1 has unfortunate behavior around dropping
+ // databases. We need to create one here since we want db dropping to all-or-nothing
+ // wherever possible. Eventually we want to move this up so that it can include the logOp
+ // inside of the WUOW, but that would require making DB dropping happen inside the Dur
+ // system for MMAPv1.
+ WriteUnitOfWork wuow(txn);
- for ( std::list<std::string>::iterator it = toDrop.begin(); it != toDrop.end(); ++it ) {
- string coll = *it;
- entry->dropCollection( txn, coll );
- }
- toDrop.clear();
- entry->getCollectionNamespaces( &toDrop );
- invariant( toDrop.empty() );
-
- {
- stdx::lock_guard<stdx::mutex> lk( _dbsLock );
- txn->recoveryUnit()->registerChange(new RemoveDBChange(this, db, entry));
- _dbs.erase( db.toString() );
- }
+ std::list<std::string> toDrop;
+ entry->getCollectionNamespaces(&toDrop);
- wuow.commit();
- return Status::OK();
+ for (std::list<std::string>::iterator it = toDrop.begin(); it != toDrop.end(); ++it) {
+ string coll = *it;
+ entry->dropCollection(txn, coll);
}
-
- int KVStorageEngine::flushAllFiles( bool sync ) {
- return _engine->flushAllFiles( sync );
+ toDrop.clear();
+ entry->getCollectionNamespaces(&toDrop);
+ invariant(toDrop.empty());
+
+ {
+ stdx::lock_guard<stdx::mutex> lk(_dbsLock);
+ txn->recoveryUnit()->registerChange(new RemoveDBChange(this, db, entry));
+ _dbs.erase(db.toString());
}
- bool KVStorageEngine::isDurable() const {
- return _engine->isDurable();
- }
+ wuow.commit();
+ return Status::OK();
+}
- Status KVStorageEngine::repairRecordStore(OperationContext* txn, const std::string& ns) {
- Status status = _engine->repairIdent(txn, _catalog->getCollectionIdent(ns));
- if (!status.isOK())
- return status;
+int KVStorageEngine::flushAllFiles(bool sync) {
+ return _engine->flushAllFiles(sync);
+}
- _dbs[nsToDatabase(ns)]->reinitCollectionAfterRepair(txn, ns);
- return Status::OK();
- }
+bool KVStorageEngine::isDurable() const {
+ return _engine->isDurable();
+}
+
+Status KVStorageEngine::repairRecordStore(OperationContext* txn, const std::string& ns) {
+ Status status = _engine->repairIdent(txn, _catalog->getCollectionIdent(ns));
+ if (!status.isOK())
+ return status;
+
+ _dbs[nsToDatabase(ns)]->reinitCollectionAfterRepair(txn, ns);
+ return Status::OK();
+}
}
diff --git a/src/mongo/db/storage/kv/kv_storage_engine.h b/src/mongo/db/storage/kv/kv_storage_engine.h
index 3159180d193..08836f6745b 100644
--- a/src/mongo/db/storage/kv/kv_storage_engine.h
+++ b/src/mongo/db/storage/kv/kv_storage_engine.h
@@ -40,77 +40,83 @@
namespace mongo {
- class KVCatalog;
- class KVEngine;
- class KVDatabaseCatalogEntry;
+class KVCatalog;
+class KVEngine;
+class KVDatabaseCatalogEntry;
- struct KVStorageEngineOptions {
- KVStorageEngineOptions() :
- directoryPerDB(false),
- directoryForIndexes(false),
- forRepair(false) {}
+struct KVStorageEngineOptions {
+ KVStorageEngineOptions()
+ : directoryPerDB(false), directoryForIndexes(false), forRepair(false) {}
- bool directoryPerDB;
- bool directoryForIndexes;
- bool forRepair;
- };
+ bool directoryPerDB;
+ bool directoryForIndexes;
+ bool forRepair;
+};
- class KVStorageEngine : public StorageEngine {
- public:
- /**
- * @param engine - owneership passes to me
- */
- KVStorageEngine( KVEngine* engine,
- const KVStorageEngineOptions& options = KVStorageEngineOptions() );
- virtual ~KVStorageEngine();
+class KVStorageEngine : public StorageEngine {
+public:
+ /**
+ * @param engine - owneership passes to me
+ */
+ KVStorageEngine(KVEngine* engine,
+ const KVStorageEngineOptions& options = KVStorageEngineOptions());
+ virtual ~KVStorageEngine();
- virtual void finishInit();
+ virtual void finishInit();
- virtual RecoveryUnit* newRecoveryUnit();
+ virtual RecoveryUnit* newRecoveryUnit();
- virtual void listDatabases( std::vector<std::string>* out ) const;
+ virtual void listDatabases(std::vector<std::string>* out) const;
- virtual DatabaseCatalogEntry* getDatabaseCatalogEntry( OperationContext* opCtx,
- StringData db );
+ virtual DatabaseCatalogEntry* getDatabaseCatalogEntry(OperationContext* opCtx, StringData db);
- virtual bool supportsDocLocking() const { return _supportsDocLocking; }
+ virtual bool supportsDocLocking() const {
+ return _supportsDocLocking;
+ }
- virtual Status closeDatabase( OperationContext* txn, StringData db );
+ virtual Status closeDatabase(OperationContext* txn, StringData db);
- virtual Status dropDatabase( OperationContext* txn, StringData db );
+ virtual Status dropDatabase(OperationContext* txn, StringData db);
- virtual int flushAllFiles( bool sync );
+ virtual int flushAllFiles(bool sync);
- virtual bool isDurable() const;
+ virtual bool isDurable() const;
- virtual Status repairRecordStore(OperationContext* txn, const std::string& ns);
+ virtual Status repairRecordStore(OperationContext* txn, const std::string& ns);
- virtual void cleanShutdown();
+ virtual void cleanShutdown();
- // ------ kv ------
+ // ------ kv ------
- KVEngine* getEngine() { return _engine.get(); }
- const KVEngine* getEngine() const { return _engine.get(); }
+ KVEngine* getEngine() {
+ return _engine.get();
+ }
+ const KVEngine* getEngine() const {
+ return _engine.get();
+ }
- KVCatalog* getCatalog() { return _catalog.get(); }
- const KVCatalog* getCatalog() const { return _catalog.get(); }
+ KVCatalog* getCatalog() {
+ return _catalog.get();
+ }
+ const KVCatalog* getCatalog() const {
+ return _catalog.get();
+ }
- private:
- class RemoveDBChange;
+private:
+ class RemoveDBChange;
- KVStorageEngineOptions _options;
+ KVStorageEngineOptions _options;
- // This must be the first member so it is destroyed last.
- std::unique_ptr<KVEngine> _engine;
+ // This must be the first member so it is destroyed last.
+ std::unique_ptr<KVEngine> _engine;
- const bool _supportsDocLocking;
+ const bool _supportsDocLocking;
- std::unique_ptr<RecordStore> _catalogRecordStore;
- std::unique_ptr<KVCatalog> _catalog;
-
- typedef std::map<std::string,KVDatabaseCatalogEntry*> DBMap;
- DBMap _dbs;
- mutable stdx::mutex _dbsLock;
- };
+ std::unique_ptr<RecordStore> _catalogRecordStore;
+ std::unique_ptr<KVCatalog> _catalog;
+ typedef std::map<std::string, KVDatabaseCatalogEntry*> DBMap;
+ DBMap _dbs;
+ mutable stdx::mutex _dbsLock;
+};
}
diff --git a/src/mongo/db/storage/mmap_v1/aligned_builder.cpp b/src/mongo/db/storage/mmap_v1/aligned_builder.cpp
index 96b16e59f4a..8742f25e285 100644
--- a/src/mongo/db/storage/mmap_v1/aligned_builder.cpp
+++ b/src/mongo/db/storage/mmap_v1/aligned_builder.cpp
@@ -37,135 +37,136 @@
namespace mongo {
- using std::endl;
+using std::endl;
- AlignedBuilder::AlignedBuilder(unsigned initSize) {
- _len = 0;
- _malloc(initSize);
- uassert(13584, "out of memory AlignedBuilder", _p._allocationAddress);
- }
+AlignedBuilder::AlignedBuilder(unsigned initSize) {
+ _len = 0;
+ _malloc(initSize);
+ uassert(13584, "out of memory AlignedBuilder", _p._allocationAddress);
+}
- BOOST_STATIC_ASSERT(sizeof(void*) == sizeof(size_t));
+BOOST_STATIC_ASSERT(sizeof(void*) == sizeof(size_t));
- /** reset for a re-use. shrinks if > 128MB */
- void AlignedBuilder::reset() {
- _len = 0;
- RARELY {
- const unsigned sizeCap = 128*1024*1024;
- if (_p._size > sizeCap)
- _realloc(sizeCap, _len);
- }
+/** reset for a re-use. shrinks if > 128MB */
+void AlignedBuilder::reset() {
+ _len = 0;
+ RARELY {
+ const unsigned sizeCap = 128 * 1024 * 1024;
+ if (_p._size > sizeCap)
+ _realloc(sizeCap, _len);
}
+}
- /** reset with a hint as to the upcoming needed size specified */
- void AlignedBuilder::reset(unsigned sz) {
- _len = 0;
- unsigned Q = 32 * 1024 * 1024 - 1;
- unsigned want = (sz+Q) & (~Q);
- if( _p._size == want ) {
+/** reset with a hint as to the upcoming needed size specified */
+void AlignedBuilder::reset(unsigned sz) {
+ _len = 0;
+ unsigned Q = 32 * 1024 * 1024 - 1;
+ unsigned want = (sz + Q) & (~Q);
+ if (_p._size == want) {
+ return;
+ }
+ if (_p._size > want) {
+ if (_p._size <= 64 * 1024 * 1024)
return;
- }
- if( _p._size > want ) {
- if( _p._size <= 64 * 1024 * 1024 )
- return;
- bool downsize = false;
- RARELY { downsize = true; }
- if( !downsize )
- return;
+ bool downsize = false;
+ RARELY {
+ downsize = true;
}
- _realloc(want, _len);
- }
-
- void AlignedBuilder::mallocSelfAligned(unsigned sz) {
- verify( sz == _p._size );
- void *p = malloc(sz + Alignment - 1);
- _p._allocationAddress = p;
- size_t s = (size_t) p;
- size_t sold = s;
- s += Alignment - 1;
- s = (s/Alignment)*Alignment;
- verify( s >= sold ); // beginning
- verify( (s + sz) <= (sold + sz + Alignment - 1) ); //end
- _p._data = (char *) s;
+ if (!downsize)
+ return;
}
+ _realloc(want, _len);
+}
- /* "slow"/infrequent portion of 'grow()' */
- void NOINLINE_DECL AlignedBuilder::growReallocate(unsigned oldLen) {
- const unsigned MB = 1024*1024;
- const unsigned kMaxSize = (sizeof(int*) == 4) ? 512*MB : 2000*MB;
- const unsigned kWarnSize = (sizeof(int*) == 4) ? 256*MB : 512*MB;
+void AlignedBuilder::mallocSelfAligned(unsigned sz) {
+ verify(sz == _p._size);
+ void* p = malloc(sz + Alignment - 1);
+ _p._allocationAddress = p;
+ size_t s = (size_t)p;
+ size_t sold = s;
+ s += Alignment - 1;
+ s = (s / Alignment) * Alignment;
+ verify(s >= sold); // beginning
+ verify((s + sz) <= (sold + sz + Alignment - 1)); // end
+ _p._data = (char*)s;
+}
- const unsigned oldSize = _p._size;
+/* "slow"/infrequent portion of 'grow()' */
+void NOINLINE_DECL AlignedBuilder::growReallocate(unsigned oldLen) {
+ const unsigned MB = 1024 * 1024;
+ const unsigned kMaxSize = (sizeof(int*) == 4) ? 512 * MB : 2000 * MB;
+ const unsigned kWarnSize = (sizeof(int*) == 4) ? 256 * MB : 512 * MB;
- // Warn for unexpectedly large buffer
- wassert(_len <= kWarnSize);
+ const unsigned oldSize = _p._size;
- // Check validity of requested size
- invariant(_len > oldSize);
- if (_len > kMaxSize) {
- log() << "error writing journal: too much uncommitted data (" << _len << " bytes)";
- log() << "shutting down immediately to avoid corruption";
- fassert(28614, _len <= kMaxSize);
- }
+ // Warn for unexpectedly large buffer
+ wassert(_len <= kWarnSize);
- // Use smaller maximum for debug builds, as we should never be close the the maximum
- dassert(_len <= 256*MB);
+ // Check validity of requested size
+ invariant(_len > oldSize);
+ if (_len > kMaxSize) {
+ log() << "error writing journal: too much uncommitted data (" << _len << " bytes)";
+ log() << "shutting down immediately to avoid corruption";
+ fassert(28614, _len <= kMaxSize);
+ }
- // Compute newSize by doubling the existing maximum size until the maximum is reached
- invariant(oldSize > 0);
- uint64_t newSize = oldSize; // use 64 bits to defend against accidental overflow
- while (newSize < _len) {
- newSize *= 2;
- }
+ // Use smaller maximum for debug builds, as we should never be close the the maximum
+ dassert(_len <= 256 * MB);
- if (newSize > kMaxSize) {
- newSize = kMaxSize;
- }
+ // Compute newSize by doubling the existing maximum size until the maximum is reached
+ invariant(oldSize > 0);
+ uint64_t newSize = oldSize; // use 64 bits to defend against accidental overflow
+ while (newSize < _len) {
+ newSize *= 2;
+ }
- _realloc(newSize, oldLen);
+ if (newSize > kMaxSize) {
+ newSize = kMaxSize;
}
- void AlignedBuilder::_malloc(unsigned sz) {
- _p._size = sz;
+ _realloc(newSize, oldLen);
+}
+
+void AlignedBuilder::_malloc(unsigned sz) {
+ _p._size = sz;
#if defined(_WIN32)
- void *p = VirtualAlloc(0, sz, MEM_COMMIT | MEM_RESERVE, PAGE_READWRITE);
- _p._allocationAddress = p;
- _p._data = (char *) p;
+ void* p = VirtualAlloc(0, sz, MEM_COMMIT | MEM_RESERVE, PAGE_READWRITE);
+ _p._allocationAddress = p;
+ _p._data = (char*)p;
#elif defined(__linux__)
- // in theory #ifdef _POSIX_VERSION should work, but it doesn't on OS X 10.4, and needs to be tested on solaris.
- // so for now, linux only for this.
- void *p = 0;
- int res = posix_memalign(&p, Alignment, sz);
- massert(13524, "out of memory AlignedBuilder", res == 0);
- _p._allocationAddress = p;
- _p._data = (char *) p;
+ // in theory #ifdef _POSIX_VERSION should work, but it doesn't on OS X 10.4, and needs to be tested on solaris.
+ // so for now, linux only for this.
+ void* p = 0;
+ int res = posix_memalign(&p, Alignment, sz);
+ massert(13524, "out of memory AlignedBuilder", res == 0);
+ _p._allocationAddress = p;
+ _p._data = (char*)p;
#else
- mallocSelfAligned(sz);
- verify( ((size_t) _p._data) % Alignment == 0 );
+ mallocSelfAligned(sz);
+ verify(((size_t)_p._data) % Alignment == 0);
#endif
- }
+}
- void AlignedBuilder::_realloc(unsigned newSize, unsigned oldLen) {
- // posix_memalign alignment is not maintained on reallocs, so we can't use realloc().
- AllocationInfo old = _p;
- _malloc(newSize);
- verify( oldLen <= _len );
- memcpy(_p._data, old._data, oldLen);
- _free(old._allocationAddress);
- }
+void AlignedBuilder::_realloc(unsigned newSize, unsigned oldLen) {
+ // posix_memalign alignment is not maintained on reallocs, so we can't use realloc().
+ AllocationInfo old = _p;
+ _malloc(newSize);
+ verify(oldLen <= _len);
+ memcpy(_p._data, old._data, oldLen);
+ _free(old._allocationAddress);
+}
- void AlignedBuilder::_free(void *p) {
+void AlignedBuilder::_free(void* p) {
#if defined(_WIN32)
- VirtualFree(p, 0, MEM_RELEASE);
+ VirtualFree(p, 0, MEM_RELEASE);
#else
- free(p);
+ free(p);
#endif
- }
-
- void AlignedBuilder::kill() {
- _free(_p._allocationAddress);
- _p._allocationAddress = 0;
- _p._data = 0;
- }
+}
+void AlignedBuilder::kill() {
+ _free(_p._allocationAddress);
+ _p._allocationAddress = 0;
+ _p._data = 0;
+}
}
diff --git a/src/mongo/db/storage/mmap_v1/aligned_builder.h b/src/mongo/db/storage/mmap_v1/aligned_builder.h
index fb184424b66..f43cbee7d5d 100644
--- a/src/mongo/db/storage/mmap_v1/aligned_builder.h
+++ b/src/mongo/db/storage/mmap_v1/aligned_builder.h
@@ -33,104 +33,117 @@
namespace mongo {
- /** a page-aligned BufBuilder. */
- class AlignedBuilder {
- public:
- AlignedBuilder(unsigned init_size);
- ~AlignedBuilder() { kill(); }
-
- /** reset with a hint as to the upcoming needed size specified */
- void reset(unsigned sz);
-
- /** reset for a re-use. shrinks if > 128MB */
- void reset();
-
- /** note this may be deallocated (realloced) if you keep writing or reset(). */
- const char* buf() const { return _p._data; }
-
- /** leave room for some stuff later
- @return offset in the buffer that was our current position
- */
- size_t skip(unsigned n) {
- unsigned l = len();
- grow(n);
- return l;
+/** a page-aligned BufBuilder. */
+class AlignedBuilder {
+public:
+ AlignedBuilder(unsigned init_size);
+ ~AlignedBuilder() {
+ kill();
+ }
+
+ /** reset with a hint as to the upcoming needed size specified */
+ void reset(unsigned sz);
+
+ /** reset for a re-use. shrinks if > 128MB */
+ void reset();
+
+ /** note this may be deallocated (realloced) if you keep writing or reset(). */
+ const char* buf() const {
+ return _p._data;
+ }
+
+ /** leave room for some stuff later
+ @return offset in the buffer that was our current position
+ */
+ size_t skip(unsigned n) {
+ unsigned l = len();
+ grow(n);
+ return l;
+ }
+
+ /** if buffer grows pointer no longer valid */
+ char* atOfs(unsigned ofs) {
+ return _p._data + ofs;
+ }
+
+ /** if buffer grows pointer no longer valid */
+ char* cur() {
+ return _p._data + _len;
+ }
+
+ void appendChar(char j) {
+ *((char*)grow(sizeof(char))) = j;
+ }
+ void appendNum(char j) {
+ *((char*)grow(sizeof(char))) = j;
+ }
+ void appendNum(short j) {
+ *((short*)grow(sizeof(short))) = j;
+ }
+ void appendNum(int j) {
+ *((int*)grow(sizeof(int))) = j;
+ }
+ void appendNum(unsigned j) {
+ *((unsigned*)grow(sizeof(unsigned))) = j;
+ }
+ void appendNum(bool j) {
+ *((bool*)grow(sizeof(bool))) = j;
+ }
+ void appendNum(double j) {
+ *((double*)grow(sizeof(double))) = j;
+ }
+ void appendNum(long long j) {
+ *((long long*)grow(sizeof(long long))) = j;
+ }
+ void appendNum(unsigned long long j) {
+ *((unsigned long long*)grow(sizeof(unsigned long long))) = j;
+ }
+
+ void appendBuf(const void* src, size_t len) {
+ memcpy(grow((unsigned)len), src, len);
+ }
+
+ template <class T>
+ void appendStruct(const T& s) {
+ appendBuf(&s, sizeof(T));
+ }
+
+ void appendStr(StringData str, bool includeEOO = true) {
+ const unsigned len = str.size() + (includeEOO ? 1 : 0);
+ verify(len < (unsigned)BSONObjMaxUserSize);
+ str.copyTo(grow(len), includeEOO);
+ }
+
+ /** @return the in-use length */
+ unsigned len() const {
+ return _len;
+ }
+
+private:
+ static const unsigned Alignment = 8192;
+
+ /** returns the pre-grow write position */
+ inline char* grow(unsigned by) {
+ unsigned oldlen = _len;
+ _len += by;
+ if (MONGO_unlikely(_len > _p._size)) {
+ growReallocate(oldlen);
}
-
- /** if buffer grows pointer no longer valid */
- char* atOfs(unsigned ofs) { return _p._data + ofs; }
-
- /** if buffer grows pointer no longer valid */
- char* cur() { return _p._data + _len; }
-
- void appendChar(char j) {
- *((char*)grow(sizeof(char))) = j;
- }
- void appendNum(char j) {
- *((char*)grow(sizeof(char))) = j;
- }
- void appendNum(short j) {
- *((short*)grow(sizeof(short))) = j;
- }
- void appendNum(int j) {
- *((int*)grow(sizeof(int))) = j;
- }
- void appendNum(unsigned j) {
- *((unsigned*)grow(sizeof(unsigned))) = j;
- }
- void appendNum(bool j) {
- *((bool*)grow(sizeof(bool))) = j;
- }
- void appendNum(double j) {
- *((double*)grow(sizeof(double))) = j;
- }
- void appendNum(long long j) {
- *((long long*)grow(sizeof(long long))) = j;
- }
- void appendNum(unsigned long long j) {
- *((unsigned long long*)grow(sizeof(unsigned long long))) = j;
- }
-
- void appendBuf(const void *src, size_t len) { memcpy(grow((unsigned) len), src, len); }
-
- template<class T>
- void appendStruct(const T& s) { appendBuf(&s, sizeof(T)); }
-
- void appendStr(StringData str , bool includeEOO = true ) {
- const unsigned len = str.size() + ( includeEOO ? 1 : 0 );
- verify( len < (unsigned) BSONObjMaxUserSize );
- str.copyTo( grow(len), includeEOO );
- }
-
- /** @return the in-use length */
- unsigned len() const { return _len; }
-
- private:
- static const unsigned Alignment = 8192;
-
- /** returns the pre-grow write position */
- inline char* grow(unsigned by) {
- unsigned oldlen = _len;
- _len += by;
- if (MONGO_unlikely( _len > _p._size )) {
- growReallocate(oldlen);
- }
- return _p._data + oldlen;
- }
-
- void growReallocate(unsigned oldLenInUse);
- void kill();
- void mallocSelfAligned(unsigned sz);
- void _malloc(unsigned sz);
- void _realloc(unsigned newSize, unsigned oldLenInUse);
- void _free(void*);
-
- struct AllocationInfo {
- char *_data;
- void *_allocationAddress;
- unsigned _size;
- } _p;
- unsigned _len; // bytes in use
- };
-
+ return _p._data + oldlen;
+ }
+
+ void growReallocate(unsigned oldLenInUse);
+ void kill();
+ void mallocSelfAligned(unsigned sz);
+ void _malloc(unsigned sz);
+ void _realloc(unsigned newSize, unsigned oldLenInUse);
+ void _free(void*);
+
+ struct AllocationInfo {
+ char* _data;
+ void* _allocationAddress;
+ unsigned _size;
+ } _p;
+ unsigned _len; // bytes in use
+};
}
diff --git a/src/mongo/db/storage/mmap_v1/btree/btree_interface.cpp b/src/mongo/db/storage/mmap_v1/btree/btree_interface.cpp
index 422a6441e9a..ce1aa117fef 100644
--- a/src/mongo/db/storage/mmap_v1/btree/btree_interface.cpp
+++ b/src/mongo/db/storage/mmap_v1/btree/btree_interface.cpp
@@ -39,340 +39,335 @@
namespace mongo {
namespace {
- using std::unique_ptr;
- using std::string;
- using std::vector;
+using std::unique_ptr;
+using std::string;
+using std::vector;
+
+template <class OnDiskFormat>
+class BtreeBuilderInterfaceImpl final : public SortedDataBuilderInterface {
+public:
+ BtreeBuilderInterfaceImpl(OperationContext* trans,
+ typename BtreeLogic<OnDiskFormat>::Builder* builder)
+ : _builder(builder), _trans(trans) {}
+
+ Status addKey(const BSONObj& key, const RecordId& loc) {
+ return _builder->addKey(key, DiskLoc::fromRecordId(loc));
+ }
- template <class OnDiskFormat>
- class BtreeBuilderInterfaceImpl final : public SortedDataBuilderInterface {
- public:
- BtreeBuilderInterfaceImpl(OperationContext* trans,
- typename BtreeLogic<OnDiskFormat>::Builder* builder)
- : _builder(builder), _trans(trans) { }
+private:
+ std::unique_ptr<typename BtreeLogic<OnDiskFormat>::Builder> _builder;
+
+ // Not owned here.
+ OperationContext* _trans;
+};
+
+template <class OnDiskFormat>
+class BtreeInterfaceImpl final : public SortedDataInterface {
+public:
+ BtreeInterfaceImpl(HeadManager* headManager,
+ RecordStore* recordStore,
+ SavedCursorRegistry* cursorRegistry,
+ const Ordering& ordering,
+ const string& indexName) {
+ _btree.reset(new BtreeLogic<OnDiskFormat>(
+ headManager, recordStore, cursorRegistry, ordering, indexName));
+ }
- Status addKey(const BSONObj& key, const RecordId& loc) {
- return _builder->addKey(key, DiskLoc::fromRecordId(loc));
- }
+ virtual ~BtreeInterfaceImpl() {}
- private:
- std::unique_ptr<typename BtreeLogic<OnDiskFormat>::Builder> _builder;
+ virtual SortedDataBuilderInterface* getBulkBuilder(OperationContext* txn, bool dupsAllowed) {
+ return new BtreeBuilderInterfaceImpl<OnDiskFormat>(txn,
+ _btree->newBuilder(txn, dupsAllowed));
+ }
- // Not owned here.
- OperationContext* _trans;
- };
+ virtual Status insert(OperationContext* txn,
+ const BSONObj& key,
+ const RecordId& loc,
+ bool dupsAllowed) {
+ return _btree->insert(txn, key, DiskLoc::fromRecordId(loc), dupsAllowed);
+ }
- template <class OnDiskFormat>
- class BtreeInterfaceImpl final : public SortedDataInterface {
- public:
- BtreeInterfaceImpl(HeadManager* headManager,
- RecordStore* recordStore,
- SavedCursorRegistry* cursorRegistry,
- const Ordering& ordering,
- const string& indexName) {
- _btree.reset(new BtreeLogic<OnDiskFormat>(headManager,
- recordStore,
- cursorRegistry,
- ordering,
- indexName));
- }
+ virtual void unindex(OperationContext* txn,
+ const BSONObj& key,
+ const RecordId& loc,
+ bool dupsAllowed) {
+ _btree->unindex(txn, key, DiskLoc::fromRecordId(loc));
+ }
- virtual ~BtreeInterfaceImpl() { }
+ virtual void fullValidate(OperationContext* txn,
+ bool full,
+ long long* numKeysOut,
+ BSONObjBuilder* output) const {
+ *numKeysOut = _btree->fullValidate(txn, NULL, false, false, 0);
+ }
- virtual SortedDataBuilderInterface* getBulkBuilder(OperationContext* txn,
- bool dupsAllowed) {
+ virtual bool appendCustomStats(OperationContext* txn,
+ BSONObjBuilder* output,
+ double scale) const {
+ return false;
+ }
- return new BtreeBuilderInterfaceImpl<OnDiskFormat>(
- txn, _btree->newBuilder(txn, dupsAllowed));
- }
+ virtual long long getSpaceUsedBytes(OperationContext* txn) const {
+ return _btree->getRecordStore()->dataSize(txn);
+ }
- virtual Status insert(OperationContext* txn,
- const BSONObj& key,
- const RecordId& loc,
- bool dupsAllowed) {
+ virtual Status dupKeyCheck(OperationContext* txn, const BSONObj& key, const RecordId& loc) {
+ return _btree->dupKeyCheck(txn, key, DiskLoc::fromRecordId(loc));
+ }
- return _btree->insert(txn, key, DiskLoc::fromRecordId(loc), dupsAllowed);
- }
+ virtual bool isEmpty(OperationContext* txn) {
+ return _btree->isEmpty(txn);
+ }
- virtual void unindex(OperationContext* txn,
- const BSONObj& key,
- const RecordId& loc,
- bool dupsAllowed) {
+ virtual Status touch(OperationContext* txn) const {
+ return _btree->touch(txn);
+ }
- _btree->unindex(txn, key, DiskLoc::fromRecordId(loc));
- }
+ class Cursor final : public SortedDataInterface::Cursor {
+ public:
+ Cursor(OperationContext* txn, const BtreeLogic<OnDiskFormat>* btree, bool forward)
+ : _txn(txn), _btree(btree), _direction(forward ? 1 : -1), _ofs(0) {}
+
+ boost::optional<IndexKeyEntry> next(RequestedInfo parts) override {
+ if (isEOF())
+ return {};
+ if (_lastMoveWasRestore) {
+ // Return current position rather than advancing.
+ _lastMoveWasRestore = false;
+ } else {
+ _btree->advance(_txn, &_bucket, &_ofs, _direction);
+ }
- virtual void fullValidate(OperationContext* txn, bool full, long long *numKeysOut,
- BSONObjBuilder* output) const {
- *numKeysOut = _btree->fullValidate(txn, NULL, false, false, 0);
+ if (atEndPoint())
+ markEOF();
+ return curr(parts);
}
- virtual bool appendCustomStats(OperationContext* txn, BSONObjBuilder* output, double scale)
- const {
- return false;
- }
+ void setEndPosition(const BSONObj& key, bool inclusive) override {
+ if (key.isEmpty()) {
+ // This means scan to end of index.
+ _endState = {};
+ return;
+ }
- virtual long long getSpaceUsedBytes( OperationContext* txn ) const {
- return _btree->getRecordStore()->dataSize( txn );
+ _endState = {{key, inclusive}};
+ seekEndCursor(); // Completes initialization of _endState.
}
- virtual Status dupKeyCheck(OperationContext* txn,
- const BSONObj& key,
- const RecordId& loc) {
- return _btree->dupKeyCheck(txn, key, DiskLoc::fromRecordId(loc));
- }
+ boost::optional<IndexKeyEntry> seek(const BSONObj& key,
+ bool inclusive,
+ RequestedInfo parts) override {
+ locate(key, inclusive == forward() ? RecordId::min() : RecordId::max());
+ _lastMoveWasRestore = false;
- virtual bool isEmpty(OperationContext* txn) {
- return _btree->isEmpty(txn);
+ if (isEOF())
+ return {};
+ dassert(inclusive ? compareKeys(getKey(), key) >= 0 : compareKeys(getKey(), key) > 0);
+ return curr(parts);
}
- virtual Status touch(OperationContext* txn) const{
- return _btree->touch(txn);
- }
- class Cursor final : public SortedDataInterface::Cursor {
- public:
- Cursor(OperationContext* txn,
- const BtreeLogic<OnDiskFormat>* btree,
- bool forward)
- : _txn(txn),
- _btree(btree),
- _direction(forward ? 1 : -1),
- _ofs(0)
- {}
-
- boost::optional<IndexKeyEntry> next(RequestedInfo parts) override {
- if (isEOF()) return {};
- if (_lastMoveWasRestore) {
- // Return current position rather than advancing.
- _lastMoveWasRestore = false;
- }
- else {
- _btree->advance(_txn, &_bucket, &_ofs, _direction);
- }
+ boost::optional<IndexKeyEntry> seek(const IndexSeekPoint& seekPoint,
+ RequestedInfo parts) override {
+ bool canUseAdvanceTo = false;
+ if (!isEOF()) {
+ int cmp = _btree->customBSONCmp(getKey(), seekPoint, _direction);
- if (atEndPoint()) markEOF();
- return curr(parts);
+ // advanceTo requires that we are positioned "earlier" in the index than the
+ // seek point, in scan order.
+ canUseAdvanceTo = forward() ? cmp < 0 : cmp > 0;
}
- void setEndPosition(const BSONObj& key, bool inclusive) override {
- if (key.isEmpty()) {
- // This means scan to end of index.
- _endState = {};
- return;
- }
- _endState = {{key, inclusive}};
- seekEndCursor(); // Completes initialization of _endState.
+ if (canUseAdvanceTo) {
+ // This takes advantage of current location.
+ _btree->advanceTo(_txn, &_bucket, &_ofs, seekPoint, _direction);
+ } else {
+ // Start at root.
+ _bucket = _btree->getHead(_txn);
+ _ofs = 0;
+ _btree->customLocate(_txn, &_bucket, &_ofs, seekPoint, _direction);
}
- boost::optional<IndexKeyEntry> seek(const BSONObj& key, bool inclusive,
- RequestedInfo parts) override {
- locate(key, inclusive == forward() ? RecordId::min() : RecordId::max());
- _lastMoveWasRestore = false;
-
- if (isEOF()) return {};
- dassert(inclusive ? compareKeys(getKey(), key) >= 0
- : compareKeys(getKey(), key) > 0);
- return curr(parts);
- }
+ _lastMoveWasRestore = false;
+ if (atOrPastEndPointAfterSeeking())
+ markEOF();
+ return curr(parts);
+ }
- boost::optional<IndexKeyEntry> seek(const IndexSeekPoint& seekPoint,
- RequestedInfo parts) override {
- bool canUseAdvanceTo = false;
- if (!isEOF()) {
- int cmp = _btree->customBSONCmp(getKey(), seekPoint, _direction);
-
- // advanceTo requires that we are positioned "earlier" in the index than the
- // seek point, in scan order.
- canUseAdvanceTo = forward() ? cmp < 0 : cmp > 0;
- }
+ void savePositioned() override {
+ _txn = nullptr;
+ if (!_lastMoveWasRestore)
+ _savedEOF = isEOF();
- if (canUseAdvanceTo) {
- // This takes advantage of current location.
- _btree->advanceTo(_txn, &_bucket, &_ofs, seekPoint, _direction);
- }
- else {
- // Start at root.
- _bucket = _btree->getHead(_txn);
- _ofs = 0;
- _btree->customLocate(_txn, &_bucket, &_ofs, seekPoint, _direction);
+ if (!isEOF()) {
+ _saved.bucket = _bucket;
+ _btree->savedCursors()->registerCursor(&_saved);
+ // Don't want to change saved position if we only moved during restore.
+ if (!_lastMoveWasRestore) {
+ _saved.key = getKey().getOwned();
+ _saved.loc = getDiskLoc();
}
+ }
+ // Doing nothing with end cursor since it will do full reseek on restore.
+ }
- _lastMoveWasRestore = false;
+ void saveUnpositioned() override {
+ _txn = nullptr;
+ // Don't leak our registration if savePositioned() was previously called.
+ if (!_saved.bucket.isNull())
+ _btree->savedCursors()->unregisterCursor(&_saved);
- if (atOrPastEndPointAfterSeeking()) markEOF();
- return curr(parts);
- }
+ _saved.bucket = DiskLoc();
+ _savedEOF = true;
+ }
- void savePositioned() override {
- _txn = nullptr;
+ void restore(OperationContext* txn) override {
+ // guard against accidental double restore
+ invariant(!_txn);
+ _txn = txn;
- if (!_lastMoveWasRestore) _savedEOF = isEOF();
+ // Always do a full seek on restore. We cannot use our last position since index
+ // entries may have been inserted closer to our endpoint and we would need to move
+ // over them.
+ seekEndCursor();
- if (!isEOF()) {
- _saved.bucket = _bucket;
- _btree->savedCursors()->registerCursor(&_saved);
- // Don't want to change saved position if we only moved during restore.
- if (!_lastMoveWasRestore) {
- _saved.key = getKey().getOwned();
- _saved.loc = getDiskLoc();
- }
- }
- // Doing nothing with end cursor since it will do full reseek on restore.
+ if (_savedEOF) {
+ markEOF();
+ return;
}
- void saveUnpositioned() override {
- _txn = nullptr;
- // Don't leak our registration if savePositioned() was previously called.
- if (!_saved.bucket.isNull()) _btree->savedCursors()->unregisterCursor(&_saved);
-
- _saved.bucket = DiskLoc();
- _savedEOF = true;
+ if (_btree->savedCursors()->unregisterCursor(&_saved)) {
+ // We can use the fast restore mechanism.
+ _btree->restorePosition(_txn, _saved.key, _saved.loc, _direction, &_bucket, &_ofs);
+ } else {
+ // Need to find our position from the root.
+ locate(_saved.key, _saved.loc.toRecordId());
}
- void restore(OperationContext* txn) override {
- // guard against accidental double restore
- invariant(!_txn);
- _txn = txn;
+ _lastMoveWasRestore = isEOF() // We weren't EOF but now are.
+ || getDiskLoc() != _saved.loc || compareKeys(getKey(), _saved.key) != 0;
+ }
- // Always do a full seek on restore. We cannot use our last position since index
- // entries may have been inserted closer to our endpoint and we would need to move
- // over them.
- seekEndCursor();
+ private:
+ bool isEOF() const {
+ return _bucket.isNull();
+ }
+ void markEOF() {
+ _bucket = DiskLoc();
+ }
- if (_savedEOF) {
- markEOF();
- return;
- }
+ boost::optional<IndexKeyEntry> curr(RequestedInfo parts) {
+ if (isEOF())
+ return {};
+ return {{(parts & kWantKey) ? getKey() : BSONObj(),
+ (parts & kWantLoc) ? getDiskLoc().toRecordId() : RecordId()}};
+ }
- if (_btree->savedCursors()->unregisterCursor(&_saved)) {
- // We can use the fast restore mechanism.
- _btree->restorePosition(_txn, _saved.key, _saved.loc, _direction,
- &_bucket, &_ofs);
- }
- else {
- // Need to find our position from the root.
- locate(_saved.key, _saved.loc.toRecordId());
- }
+ bool atEndPoint() const {
+ return _endState && _bucket == _endState->bucket && (isEOF() || _ofs == _endState->ofs);
+ }
- _lastMoveWasRestore = isEOF() // We weren't EOF but now are.
- || getDiskLoc() != _saved.loc
- || compareKeys(getKey(), _saved.key) != 0;
- }
+ bool atOrPastEndPointAfterSeeking() const {
+ if (!_endState)
+ return false;
+ if (isEOF())
+ return true;
- private:
- bool isEOF() const { return _bucket.isNull(); }
- void markEOF() { _bucket = DiskLoc(); }
+ int cmp = compareKeys(getKey(), _endState->key);
+ return _endState->inclusive ? cmp > 0 : cmp >= 0;
+ }
- boost::optional<IndexKeyEntry> curr(RequestedInfo parts) {
- if (isEOF()) return {};
- return {{(parts & kWantKey) ? getKey() : BSONObj(),
- (parts & kWantLoc) ? getDiskLoc().toRecordId() : RecordId()}};
- }
+ void locate(const BSONObj& key, const RecordId& loc) {
+ _btree->locate(_txn, key, DiskLoc::fromRecordId(loc), _direction, &_ofs, &_bucket);
+ if (atOrPastEndPointAfterSeeking())
+ markEOF();
+ }
- bool atEndPoint() const {
- return _endState
- && _bucket == _endState->bucket
- && (isEOF() || _ofs == _endState->ofs);
- }
+ // Returns comparison relative to direction of scan. If rhs would be seen later, returns
+ // a positive value.
+ int compareKeys(const BSONObj& lhs, const BSONObj& rhs) const {
+ int cmp = lhs.woCompare(rhs, _btree->ordering(), /*considerFieldName*/ false);
+ return forward() ? cmp : -cmp;
+ }
- bool atOrPastEndPointAfterSeeking() const {
- if (!_endState) return false;
- if (isEOF()) return true;
-
- int cmp = compareKeys(getKey(), _endState->key);
- return _endState->inclusive ? cmp > 0 : cmp >= 0;
- }
+ BSONObj getKey() const {
+ return _btree->getKey(_txn, _bucket, _ofs);
+ }
+ DiskLoc getDiskLoc() const {
+ return _btree->getDiskLoc(_txn, _bucket, _ofs);
+ }
- void locate(const BSONObj& key, const RecordId& loc) {
- _btree->locate(_txn, key, DiskLoc::fromRecordId(loc), _direction, &_ofs, &_bucket);
- if (atOrPastEndPointAfterSeeking()) markEOF();
- }
+ void seekEndCursor() {
+ if (!_endState)
+ return;
+ _btree->locate(_txn,
+ _endState->key,
+ forward() == _endState->inclusive ? DiskLoc::max() : DiskLoc::min(),
+ _direction,
+ &_endState->ofs,
+ &_endState->bucket); // pure out params.
+ }
- // Returns comparison relative to direction of scan. If rhs would be seen later, returns
- // a positive value.
- int compareKeys(const BSONObj& lhs, const BSONObj& rhs) const {
- int cmp = lhs.woCompare(rhs, _btree->ordering(), /*considerFieldName*/false);
- return forward() ? cmp : -cmp;
- }
+ bool forward() const {
+ return _direction == 1;
+ }
- BSONObj getKey() const { return _btree->getKey(_txn, _bucket, _ofs); }
- DiskLoc getDiskLoc() const { return _btree->getDiskLoc(_txn, _bucket, _ofs); }
+ OperationContext* _txn; // not owned
+ const BtreeLogic<OnDiskFormat>* const _btree;
+ const int _direction;
- void seekEndCursor() {
- if (!_endState) return;
- _btree->locate(_txn,
- _endState->key,
- forward() == _endState->inclusive ? DiskLoc::max() : DiskLoc::min(),
- _direction,
- &_endState->ofs, &_endState->bucket); // pure out params.
- }
+ DiskLoc _bucket;
+ int _ofs;
- bool forward() const { return _direction == 1; }
-
- OperationContext* _txn; // not owned
- const BtreeLogic<OnDiskFormat>* const _btree;
- const int _direction;
-
- DiskLoc _bucket;
- int _ofs;
-
- struct EndState {
- BSONObj key;
- bool inclusive;
- DiskLoc bucket;
- int ofs;
- };
- boost::optional<EndState> _endState;
-
- // Used by next to decide to return current position rather than moving. Should be reset
- // to false by any operation that moves the cursor, other than subsequent save/restore
- // pairs.
- bool _lastMoveWasRestore = false;
-
- // Only used by save/restore() if _bucket is non-Null.
- bool _savedEOF = false;
- SavedCursorRegistry::SavedCursor _saved;
+ struct EndState {
+ BSONObj key;
+ bool inclusive;
+ DiskLoc bucket;
+ int ofs;
};
+ boost::optional<EndState> _endState;
- virtual std::unique_ptr<SortedDataInterface::Cursor> newCursor(
- OperationContext* txn,
- bool isForward = true) const {
- return stdx::make_unique<Cursor>(txn, _btree.get(), isForward);
- }
+ // Used by next to decide to return current position rather than moving. Should be reset
+ // to false by any operation that moves the cursor, other than subsequent save/restore
+ // pairs.
+ bool _lastMoveWasRestore = false;
- virtual Status initAsEmpty(OperationContext* txn) {
- return _btree->initAsEmpty(txn);
- }
-
- private:
- unique_ptr<BtreeLogic<OnDiskFormat> > _btree;
+ // Only used by save/restore() if _bucket is non-Null.
+ bool _savedEOF = false;
+ SavedCursorRegistry::SavedCursor _saved;
};
-} // namespace
-
- SortedDataInterface* getMMAPV1Interface(HeadManager* headManager,
- RecordStore* recordStore,
- SavedCursorRegistry* cursorRegistry,
- const Ordering& ordering,
- const string& indexName,
- int version) {
- if (0 == version) {
- return new BtreeInterfaceImpl<BtreeLayoutV0>(headManager,
- recordStore,
- cursorRegistry,
- ordering,
- indexName);
- }
- else {
- invariant(1 == version);
- return new BtreeInterfaceImpl<BtreeLayoutV1>(headManager,
- recordStore,
- cursorRegistry,
- ordering,
- indexName);
- }
+
+ virtual std::unique_ptr<SortedDataInterface::Cursor> newCursor(OperationContext* txn,
+ bool isForward = true) const {
+ return stdx::make_unique<Cursor>(txn, _btree.get(), isForward);
+ }
+
+ virtual Status initAsEmpty(OperationContext* txn) {
+ return _btree->initAsEmpty(txn);
+ }
+
+private:
+ unique_ptr<BtreeLogic<OnDiskFormat>> _btree;
+};
+} // namespace
+
+SortedDataInterface* getMMAPV1Interface(HeadManager* headManager,
+ RecordStore* recordStore,
+ SavedCursorRegistry* cursorRegistry,
+ const Ordering& ordering,
+ const string& indexName,
+ int version) {
+ if (0 == version) {
+ return new BtreeInterfaceImpl<BtreeLayoutV0>(
+ headManager, recordStore, cursorRegistry, ordering, indexName);
+ } else {
+ invariant(1 == version);
+ return new BtreeInterfaceImpl<BtreeLayoutV1>(
+ headManager, recordStore, cursorRegistry, ordering, indexName);
}
+}
} // namespace mongo
diff --git a/src/mongo/db/storage/mmap_v1/btree/btree_interface.h b/src/mongo/db/storage/mmap_v1/btree/btree_interface.h
index cb2cdd21125..b5814c8a1f5 100644
--- a/src/mongo/db/storage/mmap_v1/btree/btree_interface.h
+++ b/src/mongo/db/storage/mmap_v1/btree/btree_interface.h
@@ -39,12 +39,12 @@
#pragma once
namespace mongo {
- class SavedCursorRegistry;
+class SavedCursorRegistry;
- SortedDataInterface* getMMAPV1Interface(HeadManager* headManager,
- RecordStore* recordStore,
- SavedCursorRegistry* cursorRegistry,
- const Ordering& ordering,
- const std::string& indexName,
- int version);
+SortedDataInterface* getMMAPV1Interface(HeadManager* headManager,
+ RecordStore* recordStore,
+ SavedCursorRegistry* cursorRegistry,
+ const Ordering& ordering,
+ const std::string& indexName,
+ int version);
} // namespace mongo
diff --git a/src/mongo/db/storage/mmap_v1/btree/btree_interface_test.cpp b/src/mongo/db/storage/mmap_v1/btree/btree_interface_test.cpp
index 23f649bfcaa..1272ea4d080 100644
--- a/src/mongo/db/storage/mmap_v1/btree/btree_interface_test.cpp
+++ b/src/mongo/db/storage/mmap_v1/btree/btree_interface_test.cpp
@@ -35,40 +35,32 @@
namespace mongo {
- using std::unique_ptr;
+using std::unique_ptr;
- class MyHarnessHelper final : public HarnessHelper {
- public:
- MyHarnessHelper()
- : _recordStore("a.b"),
- _order(Ordering::make(BSONObj())) {
- }
+class MyHarnessHelper final : public HarnessHelper {
+public:
+ MyHarnessHelper() : _recordStore("a.b"), _order(Ordering::make(BSONObj())) {}
- std::unique_ptr<SortedDataInterface> newSortedDataInterface(bool unique) final {
- std::unique_ptr<SortedDataInterface> sorted(getMMAPV1Interface(&_headManager,
- &_recordStore,
- &_cursorRegistry,
- _order,
- "a_1",
- 1));
- OperationContextNoop op;
- massertStatusOK(sorted->initAsEmpty(&op));
- return sorted;
- }
-
- std::unique_ptr<RecoveryUnit> newRecoveryUnit() final {
- return stdx::make_unique<HeapRecordStoreBtreeRecoveryUnit>();
- }
-
- private:
- TestHeadManager _headManager;
- HeapRecordStoreBtree _recordStore;
- SavedCursorRegistry _cursorRegistry;
- Ordering _order;
- };
+ std::unique_ptr<SortedDataInterface> newSortedDataInterface(bool unique) final {
+ std::unique_ptr<SortedDataInterface> sorted(
+ getMMAPV1Interface(&_headManager, &_recordStore, &_cursorRegistry, _order, "a_1", 1));
+ OperationContextNoop op;
+ massertStatusOK(sorted->initAsEmpty(&op));
+ return sorted;
+ }
- std::unique_ptr<HarnessHelper> newHarnessHelper() {
- return stdx::make_unique<MyHarnessHelper>();
+ std::unique_ptr<RecoveryUnit> newRecoveryUnit() final {
+ return stdx::make_unique<HeapRecordStoreBtreeRecoveryUnit>();
}
+private:
+ TestHeadManager _headManager;
+ HeapRecordStoreBtree _recordStore;
+ SavedCursorRegistry _cursorRegistry;
+ Ordering _order;
+};
+
+std::unique_ptr<HarnessHelper> newHarnessHelper() {
+ return stdx::make_unique<MyHarnessHelper>();
+}
}
diff --git a/src/mongo/db/storage/mmap_v1/btree/btree_logic.cpp b/src/mongo/db/storage/mmap_v1/btree/btree_logic.cpp
index 1afe24331cf..11e31b3fce7 100644
--- a/src/mongo/db/storage/mmap_v1/btree/btree_logic.cpp
+++ b/src/mongo/db/storage/mmap_v1/btree/btree_logic.cpp
@@ -42,2383 +42,2299 @@
namespace mongo {
- using std::unique_ptr;
- using std::dec;
- using std::endl;
- using std::hex;
- using std::make_pair;
- using std::pair;
- using std::string;
- using std::stringstream;
- using std::vector;
-
- // BtreeLogic::Builder algorithm
- //
- // Phase 1:
- // Handled by caller. Extracts keys from raw documents and puts them in external sorter
- //
- // Phase 2 (the addKeys phase):
- // Add all keys to buckets. When a bucket gets full, pop the highest key (setting the
- // nextChild pointer of the bucket to the prevChild of the popped key), add the popped key to
- // a parent bucket, and create a new right sibling bucket to add the new key to. If the parent
- // bucket is full, this same operation is performed on the parent and all full ancestors. If
- // we get to the root and it is full, a new root is created above the current root. When
- // creating a new right sibling, it is set as its parent's nextChild as all keys in the right
- // sibling will be higher than all keys currently in the parent.
-
- //
- // Public Builder logic
- //
-
- template <class BtreeLayout>
- typename BtreeLogic<BtreeLayout>::Builder*
- BtreeLogic<BtreeLayout>::newBuilder(OperationContext* txn, bool dupsAllowed) {
- return new Builder(this, txn, dupsAllowed);
- }
-
- template <class BtreeLayout>
- BtreeLogic<BtreeLayout>::Builder::Builder(BtreeLogic* logic,
- OperationContext* txn,
- bool dupsAllowed)
- : _logic(logic),
- _dupsAllowed(dupsAllowed),
- _txn(txn) {
-
- // The normal bulk building path calls initAsEmpty, so we already have an empty root bucket.
- // This isn't the case in some unit tests that use the Builder directly rather than going
- // through an IndexAccessMethod.
- _rightLeafLoc = DiskLoc::fromRecordId(_logic->_headManager->getHead(txn));
- if (_rightLeafLoc.isNull()) {
- _rightLeafLoc = _logic->_addBucket(txn);
- _logic->_headManager->setHead(_txn, _rightLeafLoc.toRecordId());
- }
-
- // must be empty when starting
- invariant(_getBucket(_rightLeafLoc)->n == 0);
- }
-
- template <class BtreeLayout>
- class BtreeLogic<BtreeLayout>::Builder::SetRightLeafLocChange : public RecoveryUnit::Change {
- public:
- SetRightLeafLocChange(Builder* builder, DiskLoc oldLoc)
- : _builder(builder)
- , _oldLoc(oldLoc)
- {}
-
- virtual void commit() {}
- virtual void rollback() { _builder->_rightLeafLoc = _oldLoc; }
-
- Builder* _builder;
- const DiskLoc _oldLoc;
- };
-
- template <class BtreeLayout>
- Status BtreeLogic<BtreeLayout>::Builder::addKey(const BSONObj& keyObj, const DiskLoc& loc) {
- unique_ptr<KeyDataOwnedType> key(new KeyDataOwnedType(keyObj));
-
- if (key->dataSize() > BtreeLayout::KeyMax) {
- string msg = str::stream() << "Btree::insert: key too large to index, failing "
- << _logic->_indexName
- << ' ' << key->dataSize() << ' ' << key->toString();
- log() << msg << endl;
- return Status(ErrorCodes::KeyTooLong, msg);
- }
+using std::unique_ptr;
+using std::dec;
+using std::endl;
+using std::hex;
+using std::make_pair;
+using std::pair;
+using std::string;
+using std::stringstream;
+using std::vector;
+
+// BtreeLogic::Builder algorithm
+//
+// Phase 1:
+// Handled by caller. Extracts keys from raw documents and puts them in external sorter
+//
+// Phase 2 (the addKeys phase):
+// Add all keys to buckets. When a bucket gets full, pop the highest key (setting the
+// nextChild pointer of the bucket to the prevChild of the popped key), add the popped key to
+// a parent bucket, and create a new right sibling bucket to add the new key to. If the parent
+// bucket is full, this same operation is performed on the parent and all full ancestors. If
+// we get to the root and it is full, a new root is created above the current root. When
+// creating a new right sibling, it is set as its parent's nextChild as all keys in the right
+// sibling will be higher than all keys currently in the parent.
+
+//
+// Public Builder logic
+//
+
+template <class BtreeLayout>
+typename BtreeLogic<BtreeLayout>::Builder* BtreeLogic<BtreeLayout>::newBuilder(
+ OperationContext* txn, bool dupsAllowed) {
+ return new Builder(this, txn, dupsAllowed);
+}
+
+template <class BtreeLayout>
+BtreeLogic<BtreeLayout>::Builder::Builder(BtreeLogic* logic,
+ OperationContext* txn,
+ bool dupsAllowed)
+ : _logic(logic), _dupsAllowed(dupsAllowed), _txn(txn) {
+ // The normal bulk building path calls initAsEmpty, so we already have an empty root bucket.
+ // This isn't the case in some unit tests that use the Builder directly rather than going
+ // through an IndexAccessMethod.
+ _rightLeafLoc = DiskLoc::fromRecordId(_logic->_headManager->getHead(txn));
+ if (_rightLeafLoc.isNull()) {
+ _rightLeafLoc = _logic->_addBucket(txn);
+ _logic->_headManager->setHead(_txn, _rightLeafLoc.toRecordId());
+ }
+
+ // must be empty when starting
+ invariant(_getBucket(_rightLeafLoc)->n == 0);
+}
+
+template <class BtreeLayout>
+class BtreeLogic<BtreeLayout>::Builder::SetRightLeafLocChange : public RecoveryUnit::Change {
+public:
+ SetRightLeafLocChange(Builder* builder, DiskLoc oldLoc) : _builder(builder), _oldLoc(oldLoc) {}
+
+ virtual void commit() {}
+ virtual void rollback() {
+ _builder->_rightLeafLoc = _oldLoc;
+ }
+
+ Builder* _builder;
+ const DiskLoc _oldLoc;
+};
+
+template <class BtreeLayout>
+Status BtreeLogic<BtreeLayout>::Builder::addKey(const BSONObj& keyObj, const DiskLoc& loc) {
+ unique_ptr<KeyDataOwnedType> key(new KeyDataOwnedType(keyObj));
+
+ if (key->dataSize() > BtreeLayout::KeyMax) {
+ string msg = str::stream() << "Btree::insert: key too large to index, failing "
+ << _logic->_indexName << ' ' << key->dataSize() << ' '
+ << key->toString();
+ log() << msg << endl;
+ return Status(ErrorCodes::KeyTooLong, msg);
+ }
+
+ // If we have a previous key to compare to...
+ if (_keyLast.get()) {
+ int cmp = _keyLast->woCompare(*key, _logic->_ordering);
+
+ // This shouldn't happen ever. We expect keys in sorted order.
+ if (cmp > 0) {
+ return Status(ErrorCodes::InternalError, "Bad key order in btree builder");
+ }
+
+ // This could easily happen..
+ if (!_dupsAllowed && (cmp == 0)) {
+ return Status(ErrorCodes::DuplicateKey, _logic->dupKeyError(*_keyLast));
+ }
+ }
+
+ BucketType* rightLeaf = _getModifiableBucket(_rightLeafLoc);
+ if (!_logic->pushBack(rightLeaf, loc, *key, DiskLoc())) {
+ // bucket was full, so split and try with the new node.
+ _txn->recoveryUnit()->registerChange(new SetRightLeafLocChange(this, _rightLeafLoc));
+ _rightLeafLoc = newBucket(rightLeaf, _rightLeafLoc);
+ rightLeaf = _getModifiableBucket(_rightLeafLoc);
+ invariant(_logic->pushBack(rightLeaf, loc, *key, DiskLoc()));
+ }
+
+ _keyLast = std::move(key);
+ return Status::OK();
+}
+
+//
+// Private Builder logic
+//
+
+template <class BtreeLayout>
+DiskLoc BtreeLogic<BtreeLayout>::Builder::newBucket(BucketType* leftSib, DiskLoc leftSibLoc) {
+ invariant(leftSib->n >= 2); // Guaranteed by sufficiently small KeyMax.
+
+ if (leftSib->parent.isNull()) {
+ // Making a new root
+ invariant(leftSibLoc.toRecordId() == _logic->_headManager->getHead(_txn));
+ const DiskLoc newRootLoc = _logic->_addBucket(_txn);
+ leftSib->parent = newRootLoc;
+ _logic->_headManager->setHead(_txn, newRootLoc.toRecordId());
+
+ // Set the newRoot's nextChild to point to leftSib for the invariant below.
+ BucketType* newRoot = _getBucket(newRootLoc);
+ *_txn->recoveryUnit()->writing(&newRoot->nextChild) = leftSibLoc;
+ }
+
+ DiskLoc parentLoc = leftSib->parent;
+ BucketType* parent = _getModifiableBucket(parentLoc);
+
+ // For the pushBack below to be correct, leftSib must be the right-most child of parent.
+ invariant(parent->nextChild == leftSibLoc);
+
+ // Pull right-most key out of leftSib and move to parent, splitting parent if necessary.
+ // Note that popBack() handles setting leftSib's nextChild to the former prevChildNode of
+ // the popped key.
+ KeyDataType key;
+ DiskLoc val;
+ _logic->popBack(leftSib, &val, &key);
+ if (!_logic->pushBack(parent, val, key, leftSibLoc)) {
+ // parent is full, so split it.
+ parentLoc = newBucket(parent, parentLoc);
+ parent = _getModifiableBucket(parentLoc);
+ invariant(_logic->pushBack(parent, val, key, leftSibLoc));
+ leftSib->parent = parentLoc;
+ }
+
+ // Create a new bucket to the right of leftSib and set its parent pointer and the downward
+ // nextChild pointer from the parent.
+ DiskLoc newBucketLoc = _logic->_addBucket(_txn);
+ BucketType* newBucket = _getBucket(newBucketLoc);
+ *_txn->recoveryUnit()->writing(&newBucket->parent) = parentLoc;
+ *_txn->recoveryUnit()->writing(&parent->nextChild) = newBucketLoc;
+ return newBucketLoc;
+}
+
+template <class BtreeLayout>
+typename BtreeLogic<BtreeLayout>::BucketType*
+BtreeLogic<BtreeLayout>::Builder::_getModifiableBucket(DiskLoc loc) {
+ return _logic->btreemod(_txn, _logic->getBucket(_txn, loc));
+}
+
+template <class BtreeLayout>
+typename BtreeLogic<BtreeLayout>::BucketType* BtreeLogic<BtreeLayout>::Builder::_getBucket(
+ DiskLoc loc) {
+ return _logic->getBucket(_txn, loc);
+}
+
+//
+// BtreeLogic logic
+//
+
+// static
+template <class BtreeLayout>
+typename BtreeLogic<BtreeLayout>::FullKey BtreeLogic<BtreeLayout>::getFullKey(
+ const BucketType* bucket, int i) {
+ if (i >= bucket->n) {
+ int code = 13000;
+ massert(code,
+ (string) "invalid keyNode: " + BSON("i" << i << "n" << bucket->n).jsonString(),
+ i < bucket->n);
+ }
+ return FullKey(bucket, i);
+}
+
+// static
+template <class BtreeLayout>
+typename BtreeLogic<BtreeLayout>::KeyHeaderType& BtreeLogic<BtreeLayout>::getKeyHeader(
+ BucketType* bucket, int i) {
+ return ((KeyHeaderType*)bucket->data)[i];
+}
+
+// static
+template <class BtreeLayout>
+const typename BtreeLogic<BtreeLayout>::KeyHeaderType& BtreeLogic<BtreeLayout>::getKeyHeader(
+ const BucketType* bucket, int i) {
+ return ((const KeyHeaderType*)bucket->data)[i];
+}
+
+template <class BtreeLayout>
+void BtreeLogic<BtreeLayout>::markUnused(BucketType* bucket, int keyPos) {
+ invariant(keyPos >= 0 && keyPos < bucket->n);
+ getKeyHeader(bucket, keyPos).setUnused();
+}
+
+template <class BtreeLayout>
+char* BtreeLogic<BtreeLayout>::dataAt(BucketType* bucket, short ofs) {
+ return bucket->data + ofs;
+}
+
+template <class BtreeLayout>
+typename BtreeLogic<BtreeLayout>::BucketType* BtreeLogic<BtreeLayout>::btreemod(
+ OperationContext* txn, BucketType* bucket) {
+ txn->recoveryUnit()->writingPtr(bucket, BtreeLayout::BucketSize);
+ return bucket;
+}
+
+template <class BtreeLayout>
+int BtreeLogic<BtreeLayout>::totalDataSize(BucketType* bucket) {
+ return (int)(BtreeLayout::BucketSize - (bucket->data - (char*)bucket));
+}
+
+// We define this value as the maximum number of bytes such that, if we have
+// fewer than this many bytes, we must be able to either merge with or receive
+// keys from any neighboring node. If our utilization goes below this value we
+// know we can bring up the utilization with a simple operation. Ignoring the
+// 90/10 split policy which is sometimes employed and our 'unused' nodes, this
+// is a lower bound on bucket utilization for non root buckets.
+//
+// Note that the exact value here depends on the implementation of
+// _rebalancedSeparatorPos(). The conditions for lowWaterMark - 1 are as
+// follows: We know we cannot merge with the neighbor, so the total data size
+// for us, the neighbor, and the separator must be at least
+// BucketType::bodySize() + 1. We must be able to accept one key of any
+// allowed size, so our size plus storage for that additional key must be
+// <= BucketType::bodySize() / 2. This way, with the extra key we'll have a
+// new bucket data size < half the total data size and by the implementation
+// of _rebalancedSeparatorPos() the key must be added.
+template <class BtreeLayout>
+int BtreeLogic<BtreeLayout>::lowWaterMark() {
+ return BtreeLayout::BucketBodySize / 2 - BtreeLayout::KeyMax - sizeof(KeyHeaderType) + 1;
+}
+
+template <class BtreeLayout>
+void BtreeLogic<BtreeLayout>::init(BucketType* bucket) {
+ BtreeLayout::initBucket(bucket);
+ bucket->parent.Null();
+ bucket->nextChild.Null();
+ bucket->flags = Packed;
+ bucket->n = 0;
+ bucket->emptySize = totalDataSize(bucket);
+ bucket->topSize = 0;
+}
+
+template <class BtreeLayout>
+void BtreeLogic<BtreeLayout>::_unalloc(BucketType* bucket, int bytes) {
+ bucket->topSize -= bytes;
+ bucket->emptySize += bytes;
+}
- // If we have a previous key to compare to...
- if (_keyLast.get()) {
- int cmp = _keyLast->woCompare(*key, _logic->_ordering);
-
- // This shouldn't happen ever. We expect keys in sorted order.
- if (cmp > 0) {
- return Status(ErrorCodes::InternalError, "Bad key order in btree builder");
- }
+/**
+ * We allocate space from the end of the buffer for data. The keynodes grow from the front.
+ */
+template <class BtreeLayout>
+int BtreeLogic<BtreeLayout>::_alloc(BucketType* bucket, int bytes) {
+ invariant(bucket->emptySize >= bytes);
+ bucket->topSize += bytes;
+ bucket->emptySize -= bytes;
+ int ofs = totalDataSize(bucket) - bucket->topSize;
+ invariant(ofs > 0);
+ return ofs;
+}
+
+template <class BtreeLayout>
+void BtreeLogic<BtreeLayout>::setNotPacked(BucketType* bucket) {
+ bucket->flags &= ~Packed;
+}
+
+template <class BtreeLayout>
+void BtreeLogic<BtreeLayout>::setPacked(BucketType* bucket) {
+ bucket->flags |= Packed;
+}
+
+template <class BtreeLayout>
+void BtreeLogic<BtreeLayout>::_delKeyAtPos(BucketType* bucket, int keypos, bool mayEmpty) {
+ invariant(keypos >= 0 && keypos <= bucket->n);
+ invariant(childLocForPos(bucket, keypos).isNull());
+ invariant((mayEmpty && bucket->n > 0) || bucket->n > 1 || bucket->nextChild.isNull());
+
+ bucket->emptySize += sizeof(KeyHeaderType);
+ bucket->n--;
+
+ for (int j = keypos; j < bucket->n; j++) {
+ getKeyHeader(bucket, j) = getKeyHeader(bucket, j + 1);
+ }
+
+ setNotPacked(bucket);
+}
- // This could easily happen..
- if (!_dupsAllowed && (cmp == 0)) {
- return Status(ErrorCodes::DuplicateKey, _logic->dupKeyError(*_keyLast));
- }
- }
-
- BucketType* rightLeaf = _getModifiableBucket(_rightLeafLoc);
- if (!_logic->pushBack(rightLeaf, loc, *key, DiskLoc())) {
- // bucket was full, so split and try with the new node.
- _txn->recoveryUnit()->registerChange(new SetRightLeafLocChange(this, _rightLeafLoc));
- _rightLeafLoc = newBucket(rightLeaf, _rightLeafLoc);
- rightLeaf = _getModifiableBucket(_rightLeafLoc);
- invariant(_logic->pushBack(rightLeaf, loc, *key, DiskLoc()));
- }
+/**
+ * Pull rightmost key from the bucket and set its prevChild pointer to be the nextChild for the
+ * whole bucket. It is assumed that caller already has the old value of the nextChild
+ * pointer and is about to add a pointer to it elsewhere in the tree.
+ *
+ * This is only used by BtreeLogic::Builder. Think very hard (and change this comment) before
+ * using it anywhere else.
+ *
+ * WARNING: The keyDataOut that is filled out by this function points to newly unalloced memory
+ * inside of this bucket. It only remains valid until the next write to this bucket.
+ */
+template <class BtreeLayout>
+void BtreeLogic<BtreeLayout>::popBack(BucketType* bucket,
+ DiskLoc* recordLocOut,
+ KeyDataType* keyDataOut) {
+ massert(17435, "n==0 in btree popBack()", bucket->n > 0);
+
+ invariant(getKeyHeader(bucket, bucket->n - 1).isUsed());
+
+ FullKey kn = getFullKey(bucket, bucket->n - 1);
+ *recordLocOut = kn.recordLoc;
+ keyDataOut->assign(kn.data);
+ int keysize = kn.data.dataSize();
+
+ // The left/prev child of the node we are popping now goes in to the nextChild slot as all
+ // of its keys are greater than all remaining keys in this node.
+ bucket->nextChild = kn.prevChildBucket;
+ bucket->n--;
+
+ // This is risky because the keyDataOut we filled out above will now point to this newly
+ // unalloced memory.
+ bucket->emptySize += sizeof(KeyHeaderType);
+ _unalloc(bucket, keysize);
+}
- _keyLast = std::move(key);
- return Status::OK();
+/**
+ * Add a key. Must be > all existing. Be careful to set next ptr right.
+ */
+template <class BtreeLayout>
+bool BtreeLogic<BtreeLayout>::pushBack(BucketType* bucket,
+ const DiskLoc recordLoc,
+ const KeyDataType& key,
+ const DiskLoc prevChild) {
+ int bytesNeeded = key.dataSize() + sizeof(KeyHeaderType);
+ if (bytesNeeded > bucket->emptySize) {
+ return false;
}
+ invariant(bytesNeeded <= bucket->emptySize);
- //
- // Private Builder logic
- //
-
- template <class BtreeLayout>
- DiskLoc BtreeLogic<BtreeLayout>::Builder::newBucket(BucketType* leftSib,
- DiskLoc leftSibLoc) {
- invariant(leftSib->n >= 2); // Guaranteed by sufficiently small KeyMax.
-
- if (leftSib->parent.isNull()) {
- // Making a new root
- invariant(leftSibLoc.toRecordId() == _logic->_headManager->getHead(_txn));
- const DiskLoc newRootLoc = _logic->_addBucket(_txn);
- leftSib->parent = newRootLoc;
- _logic->_headManager->setHead(_txn, newRootLoc.toRecordId());
-
- // Set the newRoot's nextChild to point to leftSib for the invariant below.
- BucketType* newRoot = _getBucket(newRootLoc);
- *_txn->recoveryUnit()->writing(&newRoot->nextChild) = leftSibLoc;
- }
-
- DiskLoc parentLoc = leftSib->parent;
- BucketType* parent = _getModifiableBucket(parentLoc);
-
- // For the pushBack below to be correct, leftSib must be the right-most child of parent.
- invariant(parent->nextChild == leftSibLoc);
-
- // Pull right-most key out of leftSib and move to parent, splitting parent if necessary.
- // Note that popBack() handles setting leftSib's nextChild to the former prevChildNode of
- // the popped key.
- KeyDataType key;
- DiskLoc val;
- _logic->popBack(leftSib, &val, &key);
- if (!_logic->pushBack(parent, val, key, leftSibLoc)) {
- // parent is full, so split it.
- parentLoc = newBucket(parent, parentLoc);
- parent = _getModifiableBucket(parentLoc);
- invariant(_logic->pushBack(parent, val, key, leftSibLoc));
- leftSib->parent = parentLoc;
+ if (bucket->n) {
+ const FullKey klast = getFullKey(bucket, bucket->n - 1);
+ if (klast.data.woCompare(key, _ordering) > 0) {
+ log() << "btree bucket corrupt? "
+ "consider reindexing or running validate command" << endl;
+ log() << " klast: " << klast.data.toString() << endl;
+ log() << " key: " << key.toString() << endl;
+ invariant(false);
}
-
- // Create a new bucket to the right of leftSib and set its parent pointer and the downward
- // nextChild pointer from the parent.
- DiskLoc newBucketLoc = _logic->_addBucket(_txn);
- BucketType* newBucket = _getBucket(newBucketLoc);
- *_txn->recoveryUnit()->writing(&newBucket->parent) = parentLoc;
- *_txn->recoveryUnit()->writing(&parent->nextChild) = newBucketLoc;
- return newBucketLoc;
}
- template <class BtreeLayout>
- typename BtreeLogic<BtreeLayout>::BucketType*
- BtreeLogic<BtreeLayout>::Builder::_getModifiableBucket(DiskLoc loc) {
- return _logic->btreemod(_txn, _logic->getBucket(_txn, loc));
- }
+ bucket->emptySize -= sizeof(KeyHeaderType);
+ KeyHeaderType& kn = getKeyHeader(bucket, bucket->n++);
+ kn.prevChildBucket = prevChild;
+ kn.recordLoc = recordLoc;
+ kn.setKeyDataOfs((short)_alloc(bucket, key.dataSize()));
+ short ofs = kn.keyDataOfs();
+ char* p = dataAt(bucket, ofs);
+ memcpy(p, key.data(), key.dataSize());
+ return true;
+}
- template <class BtreeLayout>
- typename BtreeLogic<BtreeLayout>::BucketType*
- BtreeLogic<BtreeLayout>::Builder::_getBucket(DiskLoc loc) {
- return _logic->getBucket(_txn, loc);
- }
+/**
+ * Durability note:
+ *
+ * We do separate intent declarations herein. Arguably one could just declare the whole bucket
+ * given we do group commits. This is something we could investigate later as to what is
+ * faster.
+ **/
- //
- // BtreeLogic logic
- //
+/**
+ * Insert a key in a bucket with no complexity -- no splits required
+ * Returns false if a split is required.
+ */
+template <class BtreeLayout>
+bool BtreeLogic<BtreeLayout>::basicInsert(OperationContext* txn,
+ BucketType* bucket,
+ const DiskLoc bucketLoc,
+ int& keypos,
+ const KeyDataType& key,
+ const DiskLoc recordLoc) {
+ invariant(bucket->n < 1024);
+ invariant(keypos >= 0 && keypos <= bucket->n);
- // static
- template <class BtreeLayout>
- typename BtreeLogic<BtreeLayout>::FullKey
- BtreeLogic<BtreeLayout>::getFullKey(const BucketType* bucket, int i) {
- if (i >= bucket->n) {
- int code = 13000;
- massert(code,
- (string)"invalid keyNode: " + BSON( "i" << i << "n" << bucket->n ).jsonString(),
- i < bucket->n );
+ int bytesNeeded = key.dataSize() + sizeof(KeyHeaderType);
+ if (bytesNeeded > bucket->emptySize) {
+ _pack(txn, bucket, bucketLoc, keypos);
+ if (bytesNeeded > bucket->emptySize) {
+ return false;
}
- return FullKey(bucket, i);
}
- // static
- template <class BtreeLayout>
- typename BtreeLogic<BtreeLayout>::KeyHeaderType&
- BtreeLogic<BtreeLayout>::getKeyHeader(BucketType* bucket, int i) {
- return ((KeyHeaderType*)bucket->data)[i];
- }
+ invariant(getBucket(txn, bucketLoc) == bucket);
- // static
- template <class BtreeLayout>
- const typename BtreeLogic<BtreeLayout>::KeyHeaderType&
- BtreeLogic<BtreeLayout>::getKeyHeader(const BucketType* bucket, int i) {
- return ((const KeyHeaderType*)bucket->data)[i];
- }
+ {
+ // declare that we will write to [k(keypos),k(n)]
+ char* start = reinterpret_cast<char*>(&getKeyHeader(bucket, keypos));
+ char* end = reinterpret_cast<char*>(&getKeyHeader(bucket, bucket->n + 1));
- template <class BtreeLayout>
- void BtreeLogic<BtreeLayout>::markUnused(BucketType* bucket, int keyPos) {
- invariant(keyPos >= 0 && keyPos < bucket->n);
- getKeyHeader(bucket, keyPos).setUnused();
+ // Declare that we will write to [k(keypos),k(n)]
+ txn->recoveryUnit()->writingPtr(start, end - start);
}
- template <class BtreeLayout>
- char* BtreeLogic<BtreeLayout>::dataAt(BucketType* bucket, short ofs) {
- return bucket->data + ofs;
+ // e.g. for n==3, keypos==2
+ // 1 4 9 -> 1 4 _ 9
+ for (int j = bucket->n; j > keypos; j--) {
+ getKeyHeader(bucket, j) = getKeyHeader(bucket, j - 1);
}
- template <class BtreeLayout>
- typename BtreeLogic<BtreeLayout>::BucketType*
- BtreeLogic<BtreeLayout>::btreemod(OperationContext* txn, BucketType* bucket) {
- txn->recoveryUnit()->writingPtr(bucket, BtreeLayout::BucketSize);
- return bucket;
- }
-
- template <class BtreeLayout>
- int BtreeLogic<BtreeLayout>::totalDataSize(BucketType* bucket) {
- return (int) (BtreeLayout::BucketSize - (bucket->data - (char*)bucket));
- }
-
- // We define this value as the maximum number of bytes such that, if we have
- // fewer than this many bytes, we must be able to either merge with or receive
- // keys from any neighboring node. If our utilization goes below this value we
- // know we can bring up the utilization with a simple operation. Ignoring the
- // 90/10 split policy which is sometimes employed and our 'unused' nodes, this
- // is a lower bound on bucket utilization for non root buckets.
- //
- // Note that the exact value here depends on the implementation of
- // _rebalancedSeparatorPos(). The conditions for lowWaterMark - 1 are as
- // follows: We know we cannot merge with the neighbor, so the total data size
- // for us, the neighbor, and the separator must be at least
- // BucketType::bodySize() + 1. We must be able to accept one key of any
- // allowed size, so our size plus storage for that additional key must be
- // <= BucketType::bodySize() / 2. This way, with the extra key we'll have a
- // new bucket data size < half the total data size and by the implementation
- // of _rebalancedSeparatorPos() the key must be added.
- template <class BtreeLayout>
- int BtreeLogic<BtreeLayout>::lowWaterMark() {
- return BtreeLayout::BucketBodySize / 2 - BtreeLayout::KeyMax - sizeof(KeyHeaderType) + 1;
- }
-
- template <class BtreeLayout>
- void BtreeLogic<BtreeLayout>::init(BucketType* bucket) {
- BtreeLayout::initBucket(bucket);
- bucket->parent.Null();
- bucket->nextChild.Null();
- bucket->flags = Packed;
- bucket->n = 0;
- bucket->emptySize = totalDataSize(bucket);
- bucket->topSize = 0;
- }
-
- template <class BtreeLayout>
- void BtreeLogic<BtreeLayout>::_unalloc(BucketType* bucket, int bytes) {
- bucket->topSize -= bytes;
- bucket->emptySize += bytes;
- }
+ size_t writeLen = sizeof(bucket->emptySize) + sizeof(bucket->topSize) + sizeof(bucket->n);
+ txn->recoveryUnit()->writingPtr(&bucket->emptySize, writeLen);
+ bucket->emptySize -= sizeof(KeyHeaderType);
+ bucket->n++;
- /**
- * We allocate space from the end of the buffer for data. The keynodes grow from the front.
- */
- template <class BtreeLayout>
- int BtreeLogic<BtreeLayout>::_alloc(BucketType* bucket, int bytes) {
- invariant(bucket->emptySize >= bytes);
- bucket->topSize += bytes;
- bucket->emptySize -= bytes;
- int ofs = totalDataSize(bucket) - bucket->topSize;
- invariant(ofs > 0);
- return ofs;
- }
+ // This _KeyNode was marked for writing above.
+ KeyHeaderType& kn = getKeyHeader(bucket, keypos);
+ kn.prevChildBucket.Null();
+ kn.recordLoc = recordLoc;
+ kn.setKeyDataOfs((short)_alloc(bucket, key.dataSize()));
+ char* p = dataAt(bucket, kn.keyDataOfs());
+ txn->recoveryUnit()->writingPtr(p, key.dataSize());
+ memcpy(p, key.data(), key.dataSize());
+ return true;
+}
- template <class BtreeLayout>
- void BtreeLogic<BtreeLayout>::setNotPacked(BucketType* bucket) {
- bucket->flags &= ~Packed;
- }
+/**
+ * With this implementation, refPos == 0 disregards effect of refPos. index > 0 prevents
+ * creation of an empty bucket.
+ */
+template <class BtreeLayout>
+bool BtreeLogic<BtreeLayout>::mayDropKey(BucketType* bucket, int index, int refPos) {
+ return index > 0 && (index != refPos) && getKeyHeader(bucket, index).isUnused() &&
+ getKeyHeader(bucket, index).prevChildBucket.isNull();
+}
- template <class BtreeLayout>
- void BtreeLogic<BtreeLayout>::setPacked(BucketType* bucket) {
- bucket->flags |= Packed;
+template <class BtreeLayout>
+int BtreeLogic<BtreeLayout>::_packedDataSize(BucketType* bucket, int refPos) {
+ if (bucket->flags & Packed) {
+ return BtreeLayout::BucketSize - bucket->emptySize - BucketType::HeaderSize;
}
- template <class BtreeLayout>
- void BtreeLogic<BtreeLayout>::_delKeyAtPos(BucketType* bucket, int keypos, bool mayEmpty) {
- invariant(keypos >= 0 && keypos <= bucket->n);
- invariant(childLocForPos(bucket, keypos).isNull());
- invariant((mayEmpty && bucket->n > 0) || bucket->n > 1 || bucket->nextChild.isNull());
-
- bucket->emptySize += sizeof(KeyHeaderType);
- bucket->n--;
-
- for (int j = keypos; j < bucket->n; j++) {
- getKeyHeader(bucket, j) = getKeyHeader(bucket, j + 1);
+ int size = 0;
+ for (int j = 0; j < bucket->n; ++j) {
+ if (mayDropKey(bucket, j, refPos)) {
+ continue;
}
-
- setNotPacked(bucket);
- }
-
- /**
- * Pull rightmost key from the bucket and set its prevChild pointer to be the nextChild for the
- * whole bucket. It is assumed that caller already has the old value of the nextChild
- * pointer and is about to add a pointer to it elsewhere in the tree.
- *
- * This is only used by BtreeLogic::Builder. Think very hard (and change this comment) before
- * using it anywhere else.
- *
- * WARNING: The keyDataOut that is filled out by this function points to newly unalloced memory
- * inside of this bucket. It only remains valid until the next write to this bucket.
- */
- template <class BtreeLayout>
- void BtreeLogic<BtreeLayout>::popBack(BucketType* bucket,
- DiskLoc* recordLocOut,
- KeyDataType* keyDataOut) {
-
- massert(17435, "n==0 in btree popBack()", bucket->n > 0 );
-
- invariant(getKeyHeader(bucket, bucket->n - 1).isUsed());
-
- FullKey kn = getFullKey(bucket, bucket->n - 1);
- *recordLocOut = kn.recordLoc;
- keyDataOut->assign(kn.data);
- int keysize = kn.data.dataSize();
-
- // The left/prev child of the node we are popping now goes in to the nextChild slot as all
- // of its keys are greater than all remaining keys in this node.
- bucket->nextChild = kn.prevChildBucket;
- bucket->n--;
-
- // This is risky because the keyDataOut we filled out above will now point to this newly
- // unalloced memory.
- bucket->emptySize += sizeof(KeyHeaderType);
- _unalloc(bucket, keysize);
- }
-
- /**
- * Add a key. Must be > all existing. Be careful to set next ptr right.
- */
- template <class BtreeLayout>
- bool BtreeLogic<BtreeLayout>::pushBack(BucketType* bucket,
- const DiskLoc recordLoc,
- const KeyDataType& key,
- const DiskLoc prevChild) {
-
- int bytesNeeded = key.dataSize() + sizeof(KeyHeaderType);
- if (bytesNeeded > bucket->emptySize) {
- return false;
- }
- invariant(bytesNeeded <= bucket->emptySize);
-
- if (bucket->n) {
- const FullKey klast = getFullKey(bucket, bucket->n - 1);
- if (klast.data.woCompare(key, _ordering) > 0) {
- log() << "btree bucket corrupt? "
- "consider reindexing or running validate command" << endl;
- log() << " klast: " << klast.data.toString() << endl;
- log() << " key: " << key.toString() << endl;
- invariant(false);
- }
- }
-
- bucket->emptySize -= sizeof(KeyHeaderType);
- KeyHeaderType& kn = getKeyHeader(bucket, bucket->n++);
- kn.prevChildBucket = prevChild;
- kn.recordLoc = recordLoc;
- kn.setKeyDataOfs((short)_alloc(bucket, key.dataSize()));
- short ofs = kn.keyDataOfs();
- char *p = dataAt(bucket, ofs);
- memcpy(p, key.data(), key.dataSize());
- return true;
+ size += getFullKey(bucket, j).data.dataSize() + sizeof(KeyHeaderType);
}
- /**
- * Durability note:
- *
- * We do separate intent declarations herein. Arguably one could just declare the whole bucket
- * given we do group commits. This is something we could investigate later as to what is
- * faster.
- **/
-
- /**
- * Insert a key in a bucket with no complexity -- no splits required
- * Returns false if a split is required.
- */
- template <class BtreeLayout>
- bool BtreeLogic<BtreeLayout>::basicInsert(OperationContext* txn,
- BucketType* bucket,
- const DiskLoc bucketLoc,
- int& keypos,
- const KeyDataType& key,
- const DiskLoc recordLoc) {
- invariant(bucket->n < 1024);
- invariant(keypos >= 0 && keypos <= bucket->n);
+ return size;
+}
- int bytesNeeded = key.dataSize() + sizeof(KeyHeaderType);
- if (bytesNeeded > bucket->emptySize) {
- _pack(txn, bucket, bucketLoc, keypos);
- if (bytesNeeded > bucket->emptySize) {
- return false;
- }
- }
-
- invariant(getBucket(txn, bucketLoc) == bucket);
-
- {
- // declare that we will write to [k(keypos),k(n)]
- char* start = reinterpret_cast<char*>(&getKeyHeader(bucket, keypos));
- char* end = reinterpret_cast<char*>(&getKeyHeader(bucket, bucket->n + 1));
-
- // Declare that we will write to [k(keypos),k(n)]
- txn->recoveryUnit()->writingPtr(start, end - start);
- }
-
- // e.g. for n==3, keypos==2
- // 1 4 9 -> 1 4 _ 9
- for (int j = bucket->n; j > keypos; j--) {
- getKeyHeader(bucket, j) = getKeyHeader(bucket, j - 1);
- }
-
- size_t writeLen = sizeof(bucket->emptySize) + sizeof(bucket->topSize) + sizeof(bucket->n);
- txn->recoveryUnit()->writingPtr(&bucket->emptySize, writeLen);
- bucket->emptySize -= sizeof(KeyHeaderType);
- bucket->n++;
-
- // This _KeyNode was marked for writing above.
- KeyHeaderType& kn = getKeyHeader(bucket, keypos);
- kn.prevChildBucket.Null();
- kn.recordLoc = recordLoc;
- kn.setKeyDataOfs((short) _alloc(bucket, key.dataSize()));
- char *p = dataAt(bucket, kn.keyDataOfs());
- txn->recoveryUnit()->writingPtr(p, key.dataSize());
- memcpy(p, key.data(), key.dataSize());
- return true;
- }
-
- /**
- * With this implementation, refPos == 0 disregards effect of refPos. index > 0 prevents
- * creation of an empty bucket.
- */
- template <class BtreeLayout>
- bool BtreeLogic<BtreeLayout>::mayDropKey(BucketType* bucket, int index, int refPos) {
- return index > 0
- && (index != refPos)
- && getKeyHeader(bucket, index).isUnused()
- && getKeyHeader(bucket, index).prevChildBucket.isNull();
- }
-
- template <class BtreeLayout>
- int BtreeLogic<BtreeLayout>::_packedDataSize(BucketType* bucket, int refPos) {
- if (bucket->flags & Packed) {
- return BtreeLayout::BucketSize - bucket->emptySize - BucketType::HeaderSize;
- }
-
- int size = 0;
- for (int j = 0; j < bucket->n; ++j) {
- if (mayDropKey(bucket, j, refPos)) {
- continue;
- }
- size += getFullKey(bucket, j).data.dataSize() + sizeof(KeyHeaderType);
- }
+/**
+ * When we delete things, we just leave empty space until the node is full and then we repack
+ * it.
+ */
+template <class BtreeLayout>
+void BtreeLogic<BtreeLayout>::_pack(OperationContext* txn,
+ BucketType* bucket,
+ const DiskLoc thisLoc,
+ int& refPos) {
+ invariant(getBucket(txn, thisLoc) == bucket);
- return size;
+ if (bucket->flags & Packed) {
+ return;
}
- /**
- * When we delete things, we just leave empty space until the node is full and then we repack
- * it.
- */
- template <class BtreeLayout>
- void BtreeLogic<BtreeLayout>::_pack(OperationContext* txn,
- BucketType* bucket,
- const DiskLoc thisLoc,
- int &refPos) {
-
- invariant(getBucket(txn, thisLoc) == bucket);
+ _packReadyForMod(btreemod(txn, bucket), refPos);
+}
- if (bucket->flags & Packed) {
- return;
- }
-
- _packReadyForMod(btreemod(txn, bucket), refPos);
+/**
+ * Version when write intent already declared.
+ */
+template <class BtreeLayout>
+void BtreeLogic<BtreeLayout>::_packReadyForMod(BucketType* bucket, int& refPos) {
+ if (bucket->flags & Packed) {
+ return;
}
- /**
- * Version when write intent already declared.
- */
- template <class BtreeLayout>
- void BtreeLogic<BtreeLayout>::_packReadyForMod(BucketType* bucket, int &refPos) {
- if (bucket->flags & Packed) {
- return;
- }
-
- int tdz = totalDataSize(bucket);
- char temp[BtreeLayout::BucketSize];
- int ofs = tdz;
- bucket->topSize = 0;
-
- int i = 0;
- for (int j = 0; j < bucket->n; j++) {
- if (mayDropKey(bucket, j, refPos)) {
- // key is unused and has no children - drop it
- continue;
- }
-
- if (i != j) {
- if (refPos == j) {
- // i < j so j will never be refPos again
- refPos = i;
- }
- getKeyHeader(bucket, i) = getKeyHeader(bucket, j);
- }
+ int tdz = totalDataSize(bucket);
+ char temp[BtreeLayout::BucketSize];
+ int ofs = tdz;
+ bucket->topSize = 0;
- short ofsold = getKeyHeader(bucket, i).keyDataOfs();
- int sz = getFullKey(bucket, i).data.dataSize();
- ofs -= sz;
- bucket->topSize += sz;
- memcpy(temp + ofs, dataAt(bucket, ofsold), sz);
- getKeyHeader(bucket, i).setKeyDataOfsSavingUse(ofs);
- ++i;
+ int i = 0;
+ for (int j = 0; j < bucket->n; j++) {
+ if (mayDropKey(bucket, j, refPos)) {
+ // key is unused and has no children - drop it
+ continue;
}
- if (refPos == bucket->n) {
- refPos = i;
- }
-
- bucket->n = i;
- int dataUsed = tdz - ofs;
- memcpy(bucket->data + ofs, temp + ofs, dataUsed);
-
- bucket->emptySize = tdz - dataUsed - bucket->n * sizeof(KeyHeaderType);
- int foo = bucket->emptySize;
- invariant( foo >= 0 );
- setPacked(bucket);
- assertValid(_indexName, bucket, _ordering);
- }
-
- template <class BtreeLayout>
- void BtreeLogic<BtreeLayout>::truncateTo(BucketType* bucket,
- int N,
- int &refPos) {
- bucket->n = N;
- setNotPacked(bucket);
- _packReadyForMod(bucket, refPos);
- }
-
- /**
- * In the standard btree algorithm, we would split based on the
- * existing keys _and_ the new key. But that's more work to
- * implement, so we split the existing keys and then add the new key.
- *
- * There are several published heuristic algorithms for doing splits, but basically what you
- * want are (1) even balancing between the two sides and (2) a small split key so the parent can
- * have a larger branching factor.
- *
- * We just have a simple algorithm right now: if a key includes the halfway point (or 10% way
- * point) in terms of bytes, split on that key; otherwise split on the key immediately to the
- * left of the halfway point (or 10% point).
- *
- * This function is expected to be called on a packed bucket.
- */
- template <class BtreeLayout>
- int BtreeLogic<BtreeLayout>::splitPos(BucketType* bucket, int keypos) {
- invariant(bucket->n > 2);
- int split = 0;
- int rightSize = 0;
-
- // When splitting a btree node, if the new key is greater than all the other keys, we should
- // not do an even split, but a 90/10 split. see SERVER-983. TODO I think we only want to
- // do the 90% split on the rhs node of the tree.
- int rightSizeLimit = (bucket->topSize + sizeof(KeyHeaderType) * bucket->n)
- / (keypos == bucket->n ? 10 : 2);
-
- for (int i = bucket->n - 1; i > -1; --i) {
- rightSize += getFullKey(bucket, i).data.dataSize() + sizeof(KeyHeaderType);
- if (rightSize > rightSizeLimit) {
- split = i;
- break;
+ if (i != j) {
+ if (refPos == j) {
+ // i < j so j will never be refPos again
+ refPos = i;
}
+ getKeyHeader(bucket, i) = getKeyHeader(bucket, j);
}
- // safeguards - we must not create an empty bucket
- if (split < 1) {
- split = 1;
- }
- else if (split > bucket->n - 2) {
- split = bucket->n - 2;
- }
-
- return split;
+ short ofsold = getKeyHeader(bucket, i).keyDataOfs();
+ int sz = getFullKey(bucket, i).data.dataSize();
+ ofs -= sz;
+ bucket->topSize += sz;
+ memcpy(temp + ofs, dataAt(bucket, ofsold), sz);
+ getKeyHeader(bucket, i).setKeyDataOfsSavingUse(ofs);
+ ++i;
}
- template <class BtreeLayout>
- void BtreeLogic<BtreeLayout>::reserveKeysFront(BucketType* bucket, int nAdd) {
- invariant(bucket->emptySize >= int(sizeof(KeyHeaderType) * nAdd));
- bucket->emptySize -= sizeof(KeyHeaderType) * nAdd;
- for (int i = bucket->n - 1; i > -1; --i) {
- getKeyHeader(bucket, i + nAdd) = getKeyHeader(bucket, i);
- }
- bucket->n += nAdd;
- }
-
- template <class BtreeLayout>
- void BtreeLogic<BtreeLayout>::setKey(BucketType* bucket,
- int i,
- const DiskLoc recordLoc,
- const KeyDataType& key,
- const DiskLoc prevChildBucket) {
- KeyHeaderType &kn = getKeyHeader(bucket, i);
- kn.recordLoc = recordLoc;
- kn.prevChildBucket = prevChildBucket;
- short ofs = (short) _alloc(bucket, key.dataSize());
- kn.setKeyDataOfs(ofs);
- char *p = dataAt(bucket, ofs);
- memcpy(p, key.data(), key.dataSize());
- }
-
- template <class BtreeLayout>
- void BtreeLogic<BtreeLayout>::dropFront(BucketType* bucket,
- int nDrop,
- int &refpos) {
- for (int i = nDrop; i < bucket->n; ++i) {
- getKeyHeader(bucket, i - nDrop) = getKeyHeader(bucket, i);
- }
- bucket->n -= nDrop;
- setNotPacked(bucket);
- _packReadyForMod(bucket, refpos );
+ if (refPos == bucket->n) {
+ refPos = i;
}
- template <class BtreeLayout>
- void BtreeLogic<BtreeLayout>::customLocate(OperationContext* txn,
- DiskLoc* locInOut,
- int* keyOfsInOut,
- const IndexSeekPoint& seekPoint,
- int direction) const {
- pair<DiskLoc, int> unused;
+ bucket->n = i;
+ int dataUsed = tdz - ofs;
+ memcpy(bucket->data + ofs, temp + ofs, dataUsed);
- customLocate(txn, locInOut, keyOfsInOut, seekPoint, direction, unused);
- skipUnusedKeys(txn, locInOut, keyOfsInOut, direction);
- }
+ bucket->emptySize = tdz - dataUsed - bucket->n * sizeof(KeyHeaderType);
+ int foo = bucket->emptySize;
+ invariant(foo >= 0);
+ setPacked(bucket);
+ assertValid(_indexName, bucket, _ordering);
+}
- template <class BtreeLayout>
- void BtreeLogic<BtreeLayout>::advance(OperationContext* txn,
- DiskLoc* bucketLocInOut,
- int* posInOut,
- int direction) const {
+template <class BtreeLayout>
+void BtreeLogic<BtreeLayout>::truncateTo(BucketType* bucket, int N, int& refPos) {
+ bucket->n = N;
+ setNotPacked(bucket);
+ _packReadyForMod(bucket, refPos);
+}
- *bucketLocInOut = advance(txn, *bucketLocInOut, posInOut, direction);
- skipUnusedKeys(txn, bucketLocInOut, posInOut, direction);
+/**
+ * In the standard btree algorithm, we would split based on the
+ * existing keys _and_ the new key. But that's more work to
+ * implement, so we split the existing keys and then add the new key.
+ *
+ * There are several published heuristic algorithms for doing splits, but basically what you
+ * want are (1) even balancing between the two sides and (2) a small split key so the parent can
+ * have a larger branching factor.
+ *
+ * We just have a simple algorithm right now: if a key includes the halfway point (or 10% way
+ * point) in terms of bytes, split on that key; otherwise split on the key immediately to the
+ * left of the halfway point (or 10% point).
+ *
+ * This function is expected to be called on a packed bucket.
+ */
+template <class BtreeLayout>
+int BtreeLogic<BtreeLayout>::splitPos(BucketType* bucket, int keypos) {
+ invariant(bucket->n > 2);
+ int split = 0;
+ int rightSize = 0;
+
+ // When splitting a btree node, if the new key is greater than all the other keys, we should
+ // not do an even split, but a 90/10 split. see SERVER-983. TODO I think we only want to
+ // do the 90% split on the rhs node of the tree.
+ int rightSizeLimit =
+ (bucket->topSize + sizeof(KeyHeaderType) * bucket->n) / (keypos == bucket->n ? 10 : 2);
+
+ for (int i = bucket->n - 1; i > -1; --i) {
+ rightSize += getFullKey(bucket, i).data.dataSize() + sizeof(KeyHeaderType);
+ if (rightSize > rightSizeLimit) {
+ split = i;
+ break;
+ }
+ }
+
+ // safeguards - we must not create an empty bucket
+ if (split < 1) {
+ split = 1;
+ } else if (split > bucket->n - 2) {
+ split = bucket->n - 2;
+ }
+
+ return split;
+}
+
+template <class BtreeLayout>
+void BtreeLogic<BtreeLayout>::reserveKeysFront(BucketType* bucket, int nAdd) {
+ invariant(bucket->emptySize >= int(sizeof(KeyHeaderType) * nAdd));
+ bucket->emptySize -= sizeof(KeyHeaderType) * nAdd;
+ for (int i = bucket->n - 1; i > -1; --i) {
+ getKeyHeader(bucket, i + nAdd) = getKeyHeader(bucket, i);
+ }
+ bucket->n += nAdd;
+}
+
+template <class BtreeLayout>
+void BtreeLogic<BtreeLayout>::setKey(BucketType* bucket,
+ int i,
+ const DiskLoc recordLoc,
+ const KeyDataType& key,
+ const DiskLoc prevChildBucket) {
+ KeyHeaderType& kn = getKeyHeader(bucket, i);
+ kn.recordLoc = recordLoc;
+ kn.prevChildBucket = prevChildBucket;
+ short ofs = (short)_alloc(bucket, key.dataSize());
+ kn.setKeyDataOfs(ofs);
+ char* p = dataAt(bucket, ofs);
+ memcpy(p, key.data(), key.dataSize());
+}
+
+template <class BtreeLayout>
+void BtreeLogic<BtreeLayout>::dropFront(BucketType* bucket, int nDrop, int& refpos) {
+ for (int i = nDrop; i < bucket->n; ++i) {
+ getKeyHeader(bucket, i - nDrop) = getKeyHeader(bucket, i);
+ }
+ bucket->n -= nDrop;
+ setNotPacked(bucket);
+ _packReadyForMod(bucket, refpos);
+}
+
+template <class BtreeLayout>
+void BtreeLogic<BtreeLayout>::customLocate(OperationContext* txn,
+ DiskLoc* locInOut,
+ int* keyOfsInOut,
+ const IndexSeekPoint& seekPoint,
+ int direction) const {
+ pair<DiskLoc, int> unused;
+
+ customLocate(txn, locInOut, keyOfsInOut, seekPoint, direction, unused);
+ skipUnusedKeys(txn, locInOut, keyOfsInOut, direction);
+}
+
+template <class BtreeLayout>
+void BtreeLogic<BtreeLayout>::advance(OperationContext* txn,
+ DiskLoc* bucketLocInOut,
+ int* posInOut,
+ int direction) const {
+ *bucketLocInOut = advance(txn, *bucketLocInOut, posInOut, direction);
+ skipUnusedKeys(txn, bucketLocInOut, posInOut, direction);
+}
+
+template <class BtreeLayout>
+void BtreeLogic<BtreeLayout>::skipUnusedKeys(OperationContext* txn,
+ DiskLoc* loc,
+ int* pos,
+ int direction) const {
+ while (!loc->isNull() && !keyIsUsed(txn, *loc, *pos)) {
+ *loc = advance(txn, *loc, pos, direction);
}
+}
- template <class BtreeLayout>
- void BtreeLogic<BtreeLayout>::skipUnusedKeys(OperationContext* txn,
- DiskLoc* loc,
- int* pos,
- int direction) const {
- while (!loc->isNull() && !keyIsUsed(txn, *loc, *pos)) {
- *loc = advance(txn, *loc, pos, direction);
- }
- }
+template <class BtreeLayout>
+void BtreeLogic<BtreeLayout>::advanceTo(OperationContext* txn,
+ DiskLoc* thisLocInOut,
+ int* keyOfsInOut,
+ const IndexSeekPoint& seekPoint,
+ int direction) const {
+ advanceToImpl(txn, thisLocInOut, keyOfsInOut, seekPoint, direction);
+ skipUnusedKeys(txn, thisLocInOut, keyOfsInOut, direction);
+}
- template <class BtreeLayout>
- void BtreeLogic<BtreeLayout>::advanceTo(OperationContext* txn,
+/**
+ * find smallest/biggest value greater-equal/less-equal than specified
+ *
+ * starting thisLoc + keyOfs will be strictly less than/strictly greater than
+ * keyBegin/keyBeginLen/keyEnd
+ *
+ * All the direction checks below allowed me to refactor the code, but possibly separate forward
+ * and reverse implementations would be more efficient
+ */
+template <class BtreeLayout>
+void BtreeLogic<BtreeLayout>::advanceToImpl(OperationContext* txn,
DiskLoc* thisLocInOut,
int* keyOfsInOut,
const IndexSeekPoint& seekPoint,
int direction) const {
+ BucketType* bucket = getBucket(txn, *thisLocInOut);
- advanceToImpl(txn, thisLocInOut, keyOfsInOut, seekPoint, direction);
- skipUnusedKeys(txn, thisLocInOut, keyOfsInOut, direction);
- }
-
- /**
- * find smallest/biggest value greater-equal/less-equal than specified
- *
- * starting thisLoc + keyOfs will be strictly less than/strictly greater than
- * keyBegin/keyBeginLen/keyEnd
- *
- * All the direction checks below allowed me to refactor the code, but possibly separate forward
- * and reverse implementations would be more efficient
- */
- template <class BtreeLayout>
- void BtreeLogic<BtreeLayout>::advanceToImpl(OperationContext* txn,
- DiskLoc* thisLocInOut,
- int* keyOfsInOut,
- const IndexSeekPoint& seekPoint,
- int direction) const {
-
- BucketType* bucket = getBucket(txn, *thisLocInOut);
-
- int l, h;
- bool dontGoUp;
-
- if (direction > 0) {
- l = *keyOfsInOut;
- h = bucket->n - 1;
- int cmpResult = customBSONCmp(getFullKey(bucket, h).data.toBson(),
- seekPoint,
- direction);
- dontGoUp = (cmpResult >= 0);
- }
- else {
- l = 0;
- h = *keyOfsInOut;
- int cmpResult = customBSONCmp(getFullKey(bucket, l).data.toBson(),
- seekPoint,
- direction);
- dontGoUp = (cmpResult <= 0);
- }
+ int l, h;
+ bool dontGoUp;
- pair<DiskLoc, int> bestParent;
-
- if (dontGoUp) {
- // this comparison result assures h > l
- if (!customFind(txn,
- l,
- h,
- seekPoint,
- direction,
- thisLocInOut,
- keyOfsInOut,
- bestParent)) {
- return;
- }
+ if (direction > 0) {
+ l = *keyOfsInOut;
+ h = bucket->n - 1;
+ int cmpResult = customBSONCmp(getFullKey(bucket, h).data.toBson(), seekPoint, direction);
+ dontGoUp = (cmpResult >= 0);
+ } else {
+ l = 0;
+ h = *keyOfsInOut;
+ int cmpResult = customBSONCmp(getFullKey(bucket, l).data.toBson(), seekPoint, direction);
+ dontGoUp = (cmpResult <= 0);
+ }
+
+ pair<DiskLoc, int> bestParent;
+
+ if (dontGoUp) {
+ // this comparison result assures h > l
+ if (!customFind(txn, l, h, seekPoint, direction, thisLocInOut, keyOfsInOut, bestParent)) {
+ return;
}
- else {
- // go up parents until rightmost/leftmost node is >=/<= target or at top
- while (!bucket->parent.isNull()) {
- *thisLocInOut = bucket->parent;
- bucket = getBucket(txn,
- *thisLocInOut);
-
- if (direction > 0) {
- if (customBSONCmp(getFullKey(bucket, bucket->n - 1).data.toBson(),
- seekPoint,
- direction) >= 0 ) {
- break;
- }
+ } else {
+ // go up parents until rightmost/leftmost node is >=/<= target or at top
+ while (!bucket->parent.isNull()) {
+ *thisLocInOut = bucket->parent;
+ bucket = getBucket(txn, *thisLocInOut);
+
+ if (direction > 0) {
+ if (customBSONCmp(getFullKey(bucket, bucket->n - 1).data.toBson(),
+ seekPoint,
+ direction) >= 0) {
+ break;
}
- else {
- if (customBSONCmp(getFullKey(bucket, 0).data.toBson(),
- seekPoint,
- direction) <= 0) {
- break;
- }
+ } else {
+ if (customBSONCmp(getFullKey(bucket, 0).data.toBson(), seekPoint, direction) <= 0) {
+ break;
}
}
}
-
- customLocate(txn, thisLocInOut, keyOfsInOut, seekPoint, direction, bestParent);
}
- template <class BtreeLayout>
- void BtreeLogic<BtreeLayout>::customLocate(OperationContext* txn,
- DiskLoc* locInOut,
- int* keyOfsInOut,
- const IndexSeekPoint& seekPoint,
- int direction,
- pair<DiskLoc, int>& bestParent) const {
+ customLocate(txn, thisLocInOut, keyOfsInOut, seekPoint, direction, bestParent);
+}
- BucketType* bucket = getBucket(txn, *locInOut);
-
- if (0 == bucket->n) {
- *locInOut = DiskLoc();
- return;
- }
+template <class BtreeLayout>
+void BtreeLogic<BtreeLayout>::customLocate(OperationContext* txn,
+ DiskLoc* locInOut,
+ int* keyOfsInOut,
+ const IndexSeekPoint& seekPoint,
+ int direction,
+ pair<DiskLoc, int>& bestParent) const {
+ BucketType* bucket = getBucket(txn, *locInOut);
- // go down until find smallest/biggest >=/<= target
- for (;;) {
- int l = 0;
- int h = bucket->n - 1;
+ if (0 == bucket->n) {
+ *locInOut = DiskLoc();
+ return;
+ }
- // +direction: 0, -direction: h
- int z = (direction > 0) ? 0 : h;
+ // go down until find smallest/biggest >=/<= target
+ for (;;) {
+ int l = 0;
+ int h = bucket->n - 1;
- // leftmost/rightmost key may possibly be >=/<= search key
- int res = customBSONCmp(getFullKey(bucket, z).data.toBson(), seekPoint, direction);
- if (direction * res >= 0) {
- DiskLoc next;
- *keyOfsInOut = z;
+ // +direction: 0, -direction: h
+ int z = (direction > 0) ? 0 : h;
- if (direction > 0) {
- dassert(z == 0);
- next = getKeyHeader(bucket, 0).prevChildBucket;
- }
- else {
- next = bucket->nextChild;
- }
+ // leftmost/rightmost key may possibly be >=/<= search key
+ int res = customBSONCmp(getFullKey(bucket, z).data.toBson(), seekPoint, direction);
+ if (direction * res >= 0) {
+ DiskLoc next;
+ *keyOfsInOut = z;
- if (!next.isNull()) {
- bestParent = pair<DiskLoc, int>(*locInOut, *keyOfsInOut);
- *locInOut = next;
- bucket = getBucket(txn, *locInOut);
- continue;
- }
- else {
- return;
- }
+ if (direction > 0) {
+ dassert(z == 0);
+ next = getKeyHeader(bucket, 0).prevChildBucket;
+ } else {
+ next = bucket->nextChild;
}
- res = customBSONCmp(getFullKey(bucket, h - z).data.toBson(), seekPoint, direction);
- if (direction * res < 0) {
- DiskLoc next;
- if (direction > 0) {
- next = bucket->nextChild;
- }
- else {
- next = getKeyHeader(bucket, 0).prevChildBucket;
- }
+ if (!next.isNull()) {
+ bestParent = pair<DiskLoc, int>(*locInOut, *keyOfsInOut);
+ *locInOut = next;
+ bucket = getBucket(txn, *locInOut);
+ continue;
+ } else {
+ return;
+ }
+ }
- if (next.isNull()) {
- // if bestParent is null, we've hit the end and locInOut gets set to DiskLoc()
- *locInOut = bestParent.first;
- *keyOfsInOut = bestParent.second;
- return;
- }
- else {
- *locInOut = next;
- bucket = getBucket(txn, *locInOut);
- continue;
- }
+ res = customBSONCmp(getFullKey(bucket, h - z).data.toBson(), seekPoint, direction);
+ if (direction * res < 0) {
+ DiskLoc next;
+ if (direction > 0) {
+ next = bucket->nextChild;
+ } else {
+ next = getKeyHeader(bucket, 0).prevChildBucket;
}
- if (!customFind(txn,
- l,
- h,
- seekPoint,
- direction,
- locInOut,
- keyOfsInOut,
- bestParent)) {
+ if (next.isNull()) {
+ // if bestParent is null, we've hit the end and locInOut gets set to DiskLoc()
+ *locInOut = bestParent.first;
+ *keyOfsInOut = bestParent.second;
return;
+ } else {
+ *locInOut = next;
+ bucket = getBucket(txn, *locInOut);
+ continue;
}
-
- bucket = getBucket(txn, *locInOut);
}
- }
-
- template <class BtreeLayout>
- bool BtreeLogic<BtreeLayout>::customFind(OperationContext* txn,
- int low,
- int high,
- const IndexSeekPoint& seekPoint,
- int direction,
- DiskLoc* thisLocInOut,
- int* keyOfsInOut,
- pair<DiskLoc, int>& bestParent) const {
- const BucketType* bucket = getBucket(txn, *thisLocInOut);
+ if (!customFind(txn, l, h, seekPoint, direction, locInOut, keyOfsInOut, bestParent)) {
+ return;
+ }
- for (;;) {
- if (low + 1 == high) {
- *keyOfsInOut = (direction > 0) ? high : low;
- DiskLoc next = getKeyHeader(bucket, high).prevChildBucket;
- if (!next.isNull()) {
- bestParent = make_pair(*thisLocInOut, *keyOfsInOut);
- *thisLocInOut = next;
- return true;
- }
- else {
- return false;
- }
+ bucket = getBucket(txn, *locInOut);
+ }
+}
+
+template <class BtreeLayout>
+bool BtreeLogic<BtreeLayout>::customFind(OperationContext* txn,
+ int low,
+ int high,
+ const IndexSeekPoint& seekPoint,
+ int direction,
+ DiskLoc* thisLocInOut,
+ int* keyOfsInOut,
+ pair<DiskLoc, int>& bestParent) const {
+ const BucketType* bucket = getBucket(txn, *thisLocInOut);
+
+ for (;;) {
+ if (low + 1 == high) {
+ *keyOfsInOut = (direction > 0) ? high : low;
+ DiskLoc next = getKeyHeader(bucket, high).prevChildBucket;
+ if (!next.isNull()) {
+ bestParent = make_pair(*thisLocInOut, *keyOfsInOut);
+ *thisLocInOut = next;
+ return true;
+ } else {
+ return false;
}
+ }
- int middle = low + (high - low) / 2;
+ int middle = low + (high - low) / 2;
- int cmp = customBSONCmp(getFullKey(bucket, middle).data.toBson(), seekPoint, direction);
- if (cmp < 0) {
+ int cmp = customBSONCmp(getFullKey(bucket, middle).data.toBson(), seekPoint, direction);
+ if (cmp < 0) {
+ low = middle;
+ } else if (cmp > 0) {
+ high = middle;
+ } else {
+ if (direction < 0) {
low = middle;
- }
- else if (cmp > 0) {
+ } else {
high = middle;
}
- else {
- if (direction < 0) {
- low = middle;
- }
- else {
- high = middle;
- }
- }
}
}
+}
- /**
- * NOTE: Currently the Ordering implementation assumes a compound index will not have more keys
- * than an unsigned variable has bits. The same assumption is used in the implementation below
- * with respect to the 'mask' variable.
- *
- * 'l' is a regular bsonobj
- *
- * 'rBegin' is composed partly of an existing bsonobj, and the remaining keys are taken from a
- * vector of elements that frequently changes
- *
- * see https://jira.mongodb.org/browse/SERVER-371
- */
- // static
- template <class BtreeLayout>
- int BtreeLogic<BtreeLayout>::customBSONCmp(const BSONObj& left,
- const IndexSeekPoint& right,
- int direction) const {
- // XXX: make this readable
- dassert(right.keySuffix.size() == right.suffixInclusive.size());
-
- BSONObjIterator ll( left );
- BSONObjIterator rr( right.keyPrefix );
- unsigned mask = 1;
- size_t i = 0;
- for( ; i < size_t(right.prefixLen); ++i, mask <<= 1 ) {
- BSONElement lll = ll.next();
- BSONElement rrr = rr.next();
-
- int x = lll.woCompare( rrr, false );
- if ( _ordering.descending( mask ) )
- x = -x;
- if ( x != 0 )
- return x;
- }
- if (right.prefixExclusive) {
+/**
+ * NOTE: Currently the Ordering implementation assumes a compound index will not have more keys
+ * than an unsigned variable has bits. The same assumption is used in the implementation below
+ * with respect to the 'mask' variable.
+ *
+ * 'l' is a regular bsonobj
+ *
+ * 'rBegin' is composed partly of an existing bsonobj, and the remaining keys are taken from a
+ * vector of elements that frequently changes
+ *
+ * see https://jira.mongodb.org/browse/SERVER-371
+ */
+// static
+template <class BtreeLayout>
+int BtreeLogic<BtreeLayout>::customBSONCmp(const BSONObj& left,
+ const IndexSeekPoint& right,
+ int direction) const {
+ // XXX: make this readable
+ dassert(right.keySuffix.size() == right.suffixInclusive.size());
+
+ BSONObjIterator ll(left);
+ BSONObjIterator rr(right.keyPrefix);
+ unsigned mask = 1;
+ size_t i = 0;
+ for (; i < size_t(right.prefixLen); ++i, mask <<= 1) {
+ BSONElement lll = ll.next();
+ BSONElement rrr = rr.next();
+
+ int x = lll.woCompare(rrr, false);
+ if (_ordering.descending(mask))
+ x = -x;
+ if (x != 0)
+ return x;
+ }
+ if (right.prefixExclusive) {
+ return -direction;
+ }
+ for (; i < right.keySuffix.size(); ++i, mask <<= 1) {
+ if (!ll.more())
+ return -direction;
+
+ BSONElement lll = ll.next();
+ BSONElement rrr = *right.keySuffix[i];
+ int x = lll.woCompare(rrr, false);
+ if (_ordering.descending(mask))
+ x = -x;
+ if (x != 0)
+ return x;
+ if (!right.suffixInclusive[i]) {
return -direction;
}
- for( ; i < right.keySuffix.size(); ++i, mask <<= 1 ) {
- if (!ll.more())
- return -direction;
-
- BSONElement lll = ll.next();
- BSONElement rrr = *right.keySuffix[i];
- int x = lll.woCompare( rrr, false );
- if ( _ordering.descending( mask ) )
- x = -x;
- if ( x != 0 )
- return x;
- if ( !right.suffixInclusive[i] ) {
- return -direction;
- }
- }
- return ll.more() ? direction : 0;
}
+ return ll.more() ? direction : 0;
+}
- template <class BtreeLayout>
- bool BtreeLogic<BtreeLayout>::exists(OperationContext* txn, const KeyDataType& key) const {
- int position = 0;
+template <class BtreeLayout>
+bool BtreeLogic<BtreeLayout>::exists(OperationContext* txn, const KeyDataType& key) const {
+ int position = 0;
- // Find the DiskLoc
- bool found;
+ // Find the DiskLoc
+ bool found;
- DiskLoc bucket = _locate(txn, getRootLoc(txn), key, &position, &found, DiskLoc::min(), 1);
+ DiskLoc bucket = _locate(txn, getRootLoc(txn), key, &position, &found, DiskLoc::min(), 1);
- while (!bucket.isNull()) {
- FullKey fullKey = getFullKey(getBucket(txn, bucket), position);
- if (fullKey.header.isUsed()) {
- return fullKey.data.woEqual(key);
- }
- bucket = advance(txn, bucket, &position, 1);
+ while (!bucket.isNull()) {
+ FullKey fullKey = getFullKey(getBucket(txn, bucket), position);
+ if (fullKey.header.isUsed()) {
+ return fullKey.data.woEqual(key);
}
-
- return false;
+ bucket = advance(txn, bucket, &position, 1);
}
- template <class BtreeLayout>
- Status BtreeLogic<BtreeLayout>::dupKeyCheck(OperationContext* txn,
- const BSONObj& key,
- const DiskLoc& loc) const {
- KeyDataOwnedType theKey(key);
- if (!wouldCreateDup(txn, theKey, loc)) {
- return Status::OK();
- }
+ return false;
+}
- return Status(ErrorCodes::DuplicateKey, dupKeyError(theKey));
+template <class BtreeLayout>
+Status BtreeLogic<BtreeLayout>::dupKeyCheck(OperationContext* txn,
+ const BSONObj& key,
+ const DiskLoc& loc) const {
+ KeyDataOwnedType theKey(key);
+ if (!wouldCreateDup(txn, theKey, loc)) {
+ return Status::OK();
}
- template <class BtreeLayout>
- bool BtreeLogic<BtreeLayout>::wouldCreateDup(OperationContext* txn,
- const KeyDataType& key,
- const DiskLoc self) const {
- int position;
- bool found;
-
- DiskLoc posLoc = _locate(txn, getRootLoc(txn), key, &position, &found, DiskLoc::min(), 1);
+ return Status(ErrorCodes::DuplicateKey, dupKeyError(theKey));
+}
- while (!posLoc.isNull()) {
- FullKey fullKey = getFullKey(getBucket(txn, posLoc), position);
- if (fullKey.header.isUsed()) {
- // TODO: we may not need fullKey.data until we know fullKey.header.isUsed() here
- // and elsewhere.
- if (fullKey.data.woEqual(key)) {
- return fullKey.recordLoc != self;
- }
- break;
+template <class BtreeLayout>
+bool BtreeLogic<BtreeLayout>::wouldCreateDup(OperationContext* txn,
+ const KeyDataType& key,
+ const DiskLoc self) const {
+ int position;
+ bool found;
+
+ DiskLoc posLoc = _locate(txn, getRootLoc(txn), key, &position, &found, DiskLoc::min(), 1);
+
+ while (!posLoc.isNull()) {
+ FullKey fullKey = getFullKey(getBucket(txn, posLoc), position);
+ if (fullKey.header.isUsed()) {
+ // TODO: we may not need fullKey.data until we know fullKey.header.isUsed() here
+ // and elsewhere.
+ if (fullKey.data.woEqual(key)) {
+ return fullKey.recordLoc != self;
}
-
- posLoc = advance(txn, posLoc, &position, 1);
+ break;
}
- return false;
+
+ posLoc = advance(txn, posLoc, &position, 1);
}
+ return false;
+}
- template <class BtreeLayout>
- string BtreeLogic<BtreeLayout>::dupKeyError(const KeyDataType& key) const {
- stringstream ss;
- ss << "E11000 duplicate key error ";
- ss << "index: " << _indexName << " ";
- ss << "dup key: " << key.toString();
- return ss.str();
- }
-
- /**
- * Find a key within this btree bucket.
- *
- * When duplicate keys are allowed, we use the DiskLoc of the record as if it were part of the
- * key. That assures that even when there are many duplicates (e.g., 1 million) for a key, our
- * performance is still good.
- *
- * assertIfDup: if the key exists (ignoring the recordLoc), uassert
- *
- * pos: for existing keys k0...kn-1.
- * returns # it goes BEFORE. so key[pos-1] < key < key[pos]
- * returns n if it goes after the last existing key.
- * note result might be an Unused location!
- */
- template <class BtreeLayout>
- Status BtreeLogic<BtreeLayout>::_find(OperationContext* txn,
- BucketType* bucket,
- const KeyDataType& key,
- const DiskLoc& recordLoc,
- bool errorIfDup,
- int* keyPositionOut,
- bool* foundOut) const {
-
- // XXX: fix the ctor for DiskLoc56bit so we can just convert w/o assignment operator
- LocType genericRecordLoc;
- genericRecordLoc = recordLoc;
-
- bool dupsCheckedYet = false;
-
- int low = 0;
- int high = bucket->n - 1;
- int middle = (low + high) / 2;
-
- while (low <= high) {
- FullKey fullKey = getFullKey(bucket, middle);
- int cmp = key.woCompare(fullKey.data, _ordering);
-
- // The key data is the same.
- if (0 == cmp) {
- // Found the key in this bucket. If we're checking for dups...
- if (errorIfDup) {
- if (fullKey.header.isUnused()) {
- // It's ok that the key is there if it is unused. We need to check that
- // there aren't other entries for the key then. as it is very rare that
- // we get here, we don't put any coding effort in here to make this
- // particularly fast
- if (!dupsCheckedYet) {
- // This is expensive and we only want to do it once(? -- when would
- // it happen twice).
- dupsCheckedYet = true;
- if (exists(txn, key)) {
- if (wouldCreateDup(txn, key, genericRecordLoc)) {
- return Status(ErrorCodes::DuplicateKey, dupKeyError(key), 11000);
- }
- else {
- return Status(ErrorCodes::DuplicateKeyValue,
- "key/value already in index");
- }
+template <class BtreeLayout>
+string BtreeLogic<BtreeLayout>::dupKeyError(const KeyDataType& key) const {
+ stringstream ss;
+ ss << "E11000 duplicate key error ";
+ ss << "index: " << _indexName << " ";
+ ss << "dup key: " << key.toString();
+ return ss.str();
+}
+
+/**
+ * Find a key within this btree bucket.
+ *
+ * When duplicate keys are allowed, we use the DiskLoc of the record as if it were part of the
+ * key. That assures that even when there are many duplicates (e.g., 1 million) for a key, our
+ * performance is still good.
+ *
+ * assertIfDup: if the key exists (ignoring the recordLoc), uassert
+ *
+ * pos: for existing keys k0...kn-1.
+ * returns # it goes BEFORE. so key[pos-1] < key < key[pos]
+ * returns n if it goes after the last existing key.
+ * note result might be an Unused location!
+ */
+template <class BtreeLayout>
+Status BtreeLogic<BtreeLayout>::_find(OperationContext* txn,
+ BucketType* bucket,
+ const KeyDataType& key,
+ const DiskLoc& recordLoc,
+ bool errorIfDup,
+ int* keyPositionOut,
+ bool* foundOut) const {
+ // XXX: fix the ctor for DiskLoc56bit so we can just convert w/o assignment operator
+ LocType genericRecordLoc;
+ genericRecordLoc = recordLoc;
+
+ bool dupsCheckedYet = false;
+
+ int low = 0;
+ int high = bucket->n - 1;
+ int middle = (low + high) / 2;
+
+ while (low <= high) {
+ FullKey fullKey = getFullKey(bucket, middle);
+ int cmp = key.woCompare(fullKey.data, _ordering);
+
+ // The key data is the same.
+ if (0 == cmp) {
+ // Found the key in this bucket. If we're checking for dups...
+ if (errorIfDup) {
+ if (fullKey.header.isUnused()) {
+ // It's ok that the key is there if it is unused. We need to check that
+ // there aren't other entries for the key then. as it is very rare that
+ // we get here, we don't put any coding effort in here to make this
+ // particularly fast
+ if (!dupsCheckedYet) {
+ // This is expensive and we only want to do it once(? -- when would
+ // it happen twice).
+ dupsCheckedYet = true;
+ if (exists(txn, key)) {
+ if (wouldCreateDup(txn, key, genericRecordLoc)) {
+ return Status(ErrorCodes::DuplicateKey, dupKeyError(key), 11000);
+ } else {
+ return Status(ErrorCodes::DuplicateKeyValue,
+ "key/value already in index");
}
}
}
- else {
- if (fullKey.recordLoc == recordLoc) {
- return Status(ErrorCodes::DuplicateKeyValue,
- "key/value already in index");
- }
- else {
- return Status(ErrorCodes::DuplicateKey, dupKeyError(key), 11000);
- }
+ } else {
+ if (fullKey.recordLoc == recordLoc) {
+ return Status(ErrorCodes::DuplicateKeyValue, "key/value already in index");
+ } else {
+ return Status(ErrorCodes::DuplicateKey, dupKeyError(key), 11000);
}
}
+ }
- // If we're here dup keys are allowed, or the key is a dup but unused.
- LocType recordLocCopy = fullKey.recordLoc;
-
- // We clear this bit so we can test equality without the used bit messing us up.
- // XXX: document this
- // XXX: kill this GETOFS stuff
- recordLocCopy.GETOFS() &= ~1;
+ // If we're here dup keys are allowed, or the key is a dup but unused.
+ LocType recordLocCopy = fullKey.recordLoc;
- // Set 'cmp' to the comparison w/the DiskLoc and fall through below.
- cmp = recordLoc.compare(recordLocCopy);
- }
+ // We clear this bit so we can test equality without the used bit messing us up.
+ // XXX: document this
+ // XXX: kill this GETOFS stuff
+ recordLocCopy.GETOFS() &= ~1;
- if (cmp < 0) {
- high = middle - 1;
- }
- else if (cmp > 0) {
- low = middle + 1;
- }
- else {
- // Found it!
- *keyPositionOut = middle;
- *foundOut = true;
- return Status::OK();
- }
+ // Set 'cmp' to the comparison w/the DiskLoc and fall through below.
+ cmp = recordLoc.compare(recordLocCopy);
+ }
- middle = (low + high) / 2;
+ if (cmp < 0) {
+ high = middle - 1;
+ } else if (cmp > 0) {
+ low = middle + 1;
+ } else {
+ // Found it!
+ *keyPositionOut = middle;
+ *foundOut = true;
+ return Status::OK();
}
- // Not found.
- *keyPositionOut = low;
+ middle = (low + high) / 2;
+ }
+
+ // Not found.
+ *keyPositionOut = low;
- // Some debugging checks.
- if (low != bucket->n) {
- wassert(key.woCompare(getFullKey(bucket, low).data, _ordering) <= 0);
+ // Some debugging checks.
+ if (low != bucket->n) {
+ wassert(key.woCompare(getFullKey(bucket, low).data, _ordering) <= 0);
- if (low > 0) {
- if (getFullKey(bucket, low - 1).data.woCompare(key, _ordering) > 0) {
- DEV {
- log() << key.toString() << endl;
- log() << getFullKey(bucket, low - 1).data.toString() << endl;
- }
- wassert(false);
+ if (low > 0) {
+ if (getFullKey(bucket, low - 1).data.woCompare(key, _ordering) > 0) {
+ DEV {
+ log() << key.toString() << endl;
+ log() << getFullKey(bucket, low - 1).data.toString() << endl;
}
+ wassert(false);
}
}
-
- *foundOut = false;
- return Status::OK();
}
- template <class BtreeLayout>
- void BtreeLogic<BtreeLayout>::delBucket(OperationContext* txn,
- BucketType* bucket,
- const DiskLoc bucketLoc) {
- invariant(bucketLoc != getRootLoc(txn));
-
- _cursorRegistry->invalidateCursorsForBucket(bucketLoc);
-
- BucketType* p = getBucket(txn, bucket->parent);
- int parentIdx = indexInParent(txn, bucket, bucketLoc);
- *txn->recoveryUnit()->writing(&childLocForPos(p, parentIdx)) = DiskLoc();
- deallocBucket(txn, bucket, bucketLoc);
- }
+ *foundOut = false;
+ return Status::OK();
+}
- template <class BtreeLayout>
- void BtreeLogic<BtreeLayout>::deallocBucket(OperationContext* txn,
- BucketType* bucket,
- const DiskLoc bucketLoc) {
- bucket->n = BtreeLayout::INVALID_N_SENTINEL;
- bucket->parent.Null();
- _recordStore->deleteRecord(txn, bucketLoc.toRecordId());
- }
+template <class BtreeLayout>
+void BtreeLogic<BtreeLayout>::delBucket(OperationContext* txn,
+ BucketType* bucket,
+ const DiskLoc bucketLoc) {
+ invariant(bucketLoc != getRootLoc(txn));
- template <class BtreeLayout>
- void BtreeLogic<BtreeLayout>::restorePosition(OperationContext* txn,
- const BSONObj& savedKey,
- const DiskLoc& savedLoc,
- int direction,
- DiskLoc* bucketLocInOut,
- int* keyOffsetInOut) const {
+ _cursorRegistry->invalidateCursorsForBucket(bucketLoc);
- // The caller has to ensure validity of the saved cursor using the SavedCursorRegistry
- BucketType* bucket = getBucket(txn, *bucketLocInOut);
- invariant(bucket);
- invariant(BtreeLayout::INVALID_N_SENTINEL != bucket->n);
+ BucketType* p = getBucket(txn, bucket->parent);
+ int parentIdx = indexInParent(txn, bucket, bucketLoc);
+ *txn->recoveryUnit()->writing(&childLocForPos(p, parentIdx)) = DiskLoc();
+ deallocBucket(txn, bucket, bucketLoc);
+}
+template <class BtreeLayout>
+void BtreeLogic<BtreeLayout>::deallocBucket(OperationContext* txn,
+ BucketType* bucket,
+ const DiskLoc bucketLoc) {
+ bucket->n = BtreeLayout::INVALID_N_SENTINEL;
+ bucket->parent.Null();
+ _recordStore->deleteRecord(txn, bucketLoc.toRecordId());
+}
+
+template <class BtreeLayout>
+void BtreeLogic<BtreeLayout>::restorePosition(OperationContext* txn,
+ const BSONObj& savedKey,
+ const DiskLoc& savedLoc,
+ int direction,
+ DiskLoc* bucketLocInOut,
+ int* keyOffsetInOut) const {
+ // The caller has to ensure validity of the saved cursor using the SavedCursorRegistry
+ BucketType* bucket = getBucket(txn, *bucketLocInOut);
+ invariant(bucket);
+ invariant(BtreeLayout::INVALID_N_SENTINEL != bucket->n);
+
+ if (_keyIsAt(savedKey, savedLoc, bucket, *keyOffsetInOut)) {
+ skipUnusedKeys(txn, bucketLocInOut, keyOffsetInOut, direction);
+ return;
+ }
+
+ if (*keyOffsetInOut > 0) {
+ (*keyOffsetInOut)--;
if (_keyIsAt(savedKey, savedLoc, bucket, *keyOffsetInOut)) {
skipUnusedKeys(txn, bucketLocInOut, keyOffsetInOut, direction);
return;
}
+ }
- if (*keyOffsetInOut > 0) {
- (*keyOffsetInOut)--;
- if (_keyIsAt(savedKey, savedLoc, bucket, *keyOffsetInOut)) {
- skipUnusedKeys(txn, bucketLocInOut, keyOffsetInOut, direction);
- return;
- }
- }
+ locate(txn, savedKey, savedLoc, direction, keyOffsetInOut, bucketLocInOut);
+}
- locate(txn, savedKey, savedLoc, direction, keyOffsetInOut, bucketLocInOut);
+template <class BtreeLayout>
+bool BtreeLogic<BtreeLayout>::_keyIsAt(const BSONObj& savedKey,
+ const DiskLoc& savedLoc,
+ BucketType* bucket,
+ int keyPos) const {
+ if (keyPos >= bucket->n) {
+ return false;
}
- template <class BtreeLayout>
- bool BtreeLogic<BtreeLayout>::_keyIsAt(const BSONObj& savedKey,
- const DiskLoc& savedLoc,
- BucketType* bucket,
- int keyPos) const {
- if (keyPos >= bucket->n) {
- return false;
- }
-
- FullKey key = getFullKey(bucket, keyPos);
- if (!key.data.toBson().binaryEqual(savedKey)) {
- return false;
- }
- return key.header.recordLoc == savedLoc;
+ FullKey key = getFullKey(bucket, keyPos);
+ if (!key.data.toBson().binaryEqual(savedKey)) {
+ return false;
}
+ return key.header.recordLoc == savedLoc;
+}
- /**
- * May delete the bucket 'bucket' rendering 'bucketLoc' invalid.
- */
- template <class BtreeLayout>
- void BtreeLogic<BtreeLayout>::delKeyAtPos(OperationContext* txn,
- BucketType* bucket,
- const DiskLoc bucketLoc,
- int p) {
- invariant(bucket->n > 0);
- DiskLoc left = childLocForPos(bucket, p);
- if (bucket->n == 1) {
- if (left.isNull() && bucket->nextChild.isNull()) {
- _delKeyAtPos(bucket, p);
- if (isHead(bucket)) {
- // we don't delete the top bucket ever
- }
- else {
- if (!mayBalanceWithNeighbors(txn, bucket, bucketLoc)) {
- // An empty bucket is only allowed as a txnient state. If
- // there are no neighbors to balance with, we delete ourself.
- // This condition is only expected in legacy btrees.
- delBucket(txn, bucket, bucketLoc);
- }
+/**
+ * May delete the bucket 'bucket' rendering 'bucketLoc' invalid.
+ */
+template <class BtreeLayout>
+void BtreeLogic<BtreeLayout>::delKeyAtPos(OperationContext* txn,
+ BucketType* bucket,
+ const DiskLoc bucketLoc,
+ int p) {
+ invariant(bucket->n > 0);
+ DiskLoc left = childLocForPos(bucket, p);
+ if (bucket->n == 1) {
+ if (left.isNull() && bucket->nextChild.isNull()) {
+ _delKeyAtPos(bucket, p);
+ if (isHead(bucket)) {
+ // we don't delete the top bucket ever
+ } else {
+ if (!mayBalanceWithNeighbors(txn, bucket, bucketLoc)) {
+ // An empty bucket is only allowed as a txnient state. If
+ // there are no neighbors to balance with, we delete ourself.
+ // This condition is only expected in legacy btrees.
+ delBucket(txn, bucket, bucketLoc);
}
- return;
}
- deleteInternalKey(txn, bucket, bucketLoc, p);
return;
}
-
- if (left.isNull()) {
- _delKeyAtPos(bucket, p);
- mayBalanceWithNeighbors(txn, bucket, bucketLoc);
- }
- else {
- deleteInternalKey(txn, bucket, bucketLoc, p);
- }
+ deleteInternalKey(txn, bucket, bucketLoc, p);
+ return;
}
- /**
- * This function replaces the specified key (k) by either the prev or next key in the btree
- * (k'). We require that k have either a left or right child. If k has a left child, we set k'
- * to the prev key of k, which must be a leaf present in the left child. If k does not have a
- * left child, we set k' to the next key of k, which must be a leaf present in the right child.
- * When we replace k with k', we copy k' over k (which may cause a split) and then remove k'
- * from its original location. Because k' is stored in a descendent of k, replacing k by k'
- * will not modify the storage location of the original k', and we can easily remove k' from its
- * original location.
- *
- * This function is only needed in cases where k has a left or right child; in other cases a
- * simpler key removal implementation is possible.
- *
- * NOTE on noncompliant BtreeBuilder btrees: It is possible (though likely rare) for btrees
- * created by BtreeBuilder to have k' that is not a leaf, see SERVER-2732. These cases are
- * handled in the same manner as described in the "legacy btree structures" note below.
- *
- * NOTE on legacy btree structures: In legacy btrees, k' can be a nonleaf. In such a case we
- * 'delete' k by marking it as an unused node rather than replacing it with k'. Also, k' may be
- * a leaf but marked as an unused node. In such a case we replace k by k', preserving the key's
- * unused marking. This function is only expected to mark a key as unused when handling a
- * legacy btree.
- */
- template <class BtreeLayout>
- void BtreeLogic<BtreeLayout>::deleteInternalKey(OperationContext* txn,
- BucketType* bucket,
- const DiskLoc bucketLoc,
- int keypos) {
- DiskLoc lchild = childLocForPos(bucket, keypos);
- DiskLoc rchild = childLocForPos(bucket, keypos + 1);
- invariant(!lchild.isNull() || !rchild.isNull());
- int advanceDirection = lchild.isNull() ? 1 : -1;
- int advanceKeyOfs = keypos;
- DiskLoc advanceLoc = advance(txn, bucketLoc, &advanceKeyOfs, advanceDirection);
- // advanceLoc must be a descentant of thisLoc, because thisLoc has a
- // child in the proper direction and all descendants of thisLoc must be
- // nonempty because they are not the root.
- BucketType* advanceBucket = getBucket(txn, advanceLoc);
-
- if (!childLocForPos(advanceBucket, advanceKeyOfs).isNull()
- || !childLocForPos(advanceBucket, advanceKeyOfs + 1).isNull()) {
-
- markUnused(bucket, keypos);
- return;
- }
-
- FullKey kn = getFullKey(advanceBucket, advanceKeyOfs);
- // Because advanceLoc is a descendant of thisLoc, updating thisLoc will
- // not affect packing or keys of advanceLoc and kn will be stable
- // during the following setInternalKey()
- setInternalKey(txn, bucket, bucketLoc, keypos, kn.recordLoc, kn.data,
- childLocForPos(bucket, keypos),
- childLocForPos(bucket, keypos + 1));
- delKeyAtPos(txn, btreemod(txn, advanceBucket), advanceLoc, advanceKeyOfs);
+ if (left.isNull()) {
+ _delKeyAtPos(bucket, p);
+ mayBalanceWithNeighbors(txn, bucket, bucketLoc);
+ } else {
+ deleteInternalKey(txn, bucket, bucketLoc, p);
}
+}
- template <class BtreeLayout>
- void BtreeLogic<BtreeLayout>::replaceWithNextChild(OperationContext* txn,
- BucketType* bucket,
- const DiskLoc bucketLoc) {
-
- invariant(bucket->n == 0 && !bucket->nextChild.isNull() );
- if (bucket->parent.isNull()) {
- invariant(getRootLoc(txn) == bucketLoc);
- _headManager->setHead(txn, bucket->nextChild.toRecordId());
- }
- else {
- BucketType* parentBucket = getBucket(txn, bucket->parent);
- int bucketIndexInParent = indexInParent(txn, bucket, bucketLoc);
- *txn->recoveryUnit()->writing(&childLocForPos(parentBucket, bucketIndexInParent)) =
- bucket->nextChild;
- }
-
- *txn->recoveryUnit()->writing(&getBucket(txn, bucket->nextChild)->parent) = bucket->parent;
- _cursorRegistry->invalidateCursorsForBucket(bucketLoc);
- deallocBucket(txn, bucket, bucketLoc);
- }
-
- template <class BtreeLayout>
- bool BtreeLogic<BtreeLayout>::canMergeChildren(OperationContext* txn,
+/**
+ * This function replaces the specified key (k) by either the prev or next key in the btree
+ * (k'). We require that k have either a left or right child. If k has a left child, we set k'
+ * to the prev key of k, which must be a leaf present in the left child. If k does not have a
+ * left child, we set k' to the next key of k, which must be a leaf present in the right child.
+ * When we replace k with k', we copy k' over k (which may cause a split) and then remove k'
+ * from its original location. Because k' is stored in a descendent of k, replacing k by k'
+ * will not modify the storage location of the original k', and we can easily remove k' from its
+ * original location.
+ *
+ * This function is only needed in cases where k has a left or right child; in other cases a
+ * simpler key removal implementation is possible.
+ *
+ * NOTE on noncompliant BtreeBuilder btrees: It is possible (though likely rare) for btrees
+ * created by BtreeBuilder to have k' that is not a leaf, see SERVER-2732. These cases are
+ * handled in the same manner as described in the "legacy btree structures" note below.
+ *
+ * NOTE on legacy btree structures: In legacy btrees, k' can be a nonleaf. In such a case we
+ * 'delete' k by marking it as an unused node rather than replacing it with k'. Also, k' may be
+ * a leaf but marked as an unused node. In such a case we replace k by k', preserving the key's
+ * unused marking. This function is only expected to mark a key as unused when handling a
+ * legacy btree.
+ */
+template <class BtreeLayout>
+void BtreeLogic<BtreeLayout>::deleteInternalKey(OperationContext* txn,
+ BucketType* bucket,
+ const DiskLoc bucketLoc,
+ int keypos) {
+ DiskLoc lchild = childLocForPos(bucket, keypos);
+ DiskLoc rchild = childLocForPos(bucket, keypos + 1);
+ invariant(!lchild.isNull() || !rchild.isNull());
+ int advanceDirection = lchild.isNull() ? 1 : -1;
+ int advanceKeyOfs = keypos;
+ DiskLoc advanceLoc = advance(txn, bucketLoc, &advanceKeyOfs, advanceDirection);
+ // advanceLoc must be a descentant of thisLoc, because thisLoc has a
+ // child in the proper direction and all descendants of thisLoc must be
+ // nonempty because they are not the root.
+ BucketType* advanceBucket = getBucket(txn, advanceLoc);
+
+ if (!childLocForPos(advanceBucket, advanceKeyOfs).isNull() ||
+ !childLocForPos(advanceBucket, advanceKeyOfs + 1).isNull()) {
+ markUnused(bucket, keypos);
+ return;
+ }
+
+ FullKey kn = getFullKey(advanceBucket, advanceKeyOfs);
+ // Because advanceLoc is a descendant of thisLoc, updating thisLoc will
+ // not affect packing or keys of advanceLoc and kn will be stable
+ // during the following setInternalKey()
+ setInternalKey(txn,
+ bucket,
+ bucketLoc,
+ keypos,
+ kn.recordLoc,
+ kn.data,
+ childLocForPos(bucket, keypos),
+ childLocForPos(bucket, keypos + 1));
+ delKeyAtPos(txn, btreemod(txn, advanceBucket), advanceLoc, advanceKeyOfs);
+}
+
+template <class BtreeLayout>
+void BtreeLogic<BtreeLayout>::replaceWithNextChild(OperationContext* txn,
BucketType* bucket,
- const DiskLoc bucketLoc,
- const int leftIndex) {
- invariant(leftIndex >= 0 && leftIndex < bucket->n);
+ const DiskLoc bucketLoc) {
+ invariant(bucket->n == 0 && !bucket->nextChild.isNull());
+ if (bucket->parent.isNull()) {
+ invariant(getRootLoc(txn) == bucketLoc);
+ _headManager->setHead(txn, bucket->nextChild.toRecordId());
+ } else {
+ BucketType* parentBucket = getBucket(txn, bucket->parent);
+ int bucketIndexInParent = indexInParent(txn, bucket, bucketLoc);
+ *txn->recoveryUnit()->writing(&childLocForPos(parentBucket, bucketIndexInParent)) =
+ bucket->nextChild;
+ }
+
+ *txn->recoveryUnit()->writing(&getBucket(txn, bucket->nextChild)->parent) = bucket->parent;
+ _cursorRegistry->invalidateCursorsForBucket(bucketLoc);
+ deallocBucket(txn, bucket, bucketLoc);
+}
+
+template <class BtreeLayout>
+bool BtreeLogic<BtreeLayout>::canMergeChildren(OperationContext* txn,
+ BucketType* bucket,
+ const DiskLoc bucketLoc,
+ const int leftIndex) {
+ invariant(leftIndex >= 0 && leftIndex < bucket->n);
- DiskLoc leftNodeLoc = childLocForPos(bucket, leftIndex);
- DiskLoc rightNodeLoc = childLocForPos(bucket, leftIndex + 1);
+ DiskLoc leftNodeLoc = childLocForPos(bucket, leftIndex);
+ DiskLoc rightNodeLoc = childLocForPos(bucket, leftIndex + 1);
- if (leftNodeLoc.isNull() || rightNodeLoc.isNull()) {
- return false;
- }
+ if (leftNodeLoc.isNull() || rightNodeLoc.isNull()) {
+ return false;
+ }
- int pos = 0;
+ int pos = 0;
- BucketType* leftBucket = getBucket(txn, leftNodeLoc);
- BucketType* rightBucket = getBucket(txn, rightNodeLoc);
+ BucketType* leftBucket = getBucket(txn, leftNodeLoc);
+ BucketType* rightBucket = getBucket(txn, rightNodeLoc);
- int sum = BucketType::HeaderSize
- + _packedDataSize(leftBucket, pos)
- + _packedDataSize(rightBucket, pos)
- + getFullKey(bucket, leftIndex).data.dataSize()
- + sizeof(KeyHeaderType);
+ int sum = BucketType::HeaderSize + _packedDataSize(leftBucket, pos) +
+ _packedDataSize(rightBucket, pos) + getFullKey(bucket, leftIndex).data.dataSize() +
+ sizeof(KeyHeaderType);
- return sum <= BtreeLayout::BucketSize;
- }
+ return sum <= BtreeLayout::BucketSize;
+}
- /**
- * This implementation must respect the meaning and value of lowWaterMark. Also see comments in
- * splitPos().
- */
- template <class BtreeLayout>
- int BtreeLogic<BtreeLayout>::_rebalancedSeparatorPos(OperationContext* txn,
- BucketType* bucket,
- int leftIndex) {
- int split = -1;
- int rightSize = 0;
+/**
+ * This implementation must respect the meaning and value of lowWaterMark. Also see comments in
+ * splitPos().
+ */
+template <class BtreeLayout>
+int BtreeLogic<BtreeLayout>::_rebalancedSeparatorPos(OperationContext* txn,
+ BucketType* bucket,
+ int leftIndex) {
+ int split = -1;
+ int rightSize = 0;
- const BucketType* l = childForPos(txn, bucket, leftIndex);
- const BucketType* r = childForPos(txn, bucket, leftIndex + 1);
+ const BucketType* l = childForPos(txn, bucket, leftIndex);
+ const BucketType* r = childForPos(txn, bucket, leftIndex + 1);
- int KNS = sizeof(KeyHeaderType);
- int rightSizeLimit = ( l->topSize
- + l->n * KNS
- + getFullKey(bucket, leftIndex).data.dataSize()
- + KNS
- + r->topSize
- + r->n * KNS ) / 2;
+ int KNS = sizeof(KeyHeaderType);
+ int rightSizeLimit = (l->topSize + l->n * KNS + getFullKey(bucket, leftIndex).data.dataSize() +
+ KNS + r->topSize + r->n * KNS) /
+ 2;
- // This constraint should be ensured by only calling this function
- // if we go below the low water mark.
- invariant(rightSizeLimit < BtreeLayout::BucketBodySize);
+ // This constraint should be ensured by only calling this function
+ // if we go below the low water mark.
+ invariant(rightSizeLimit < BtreeLayout::BucketBodySize);
- for (int i = r->n - 1; i > -1; --i) {
- rightSize += getFullKey(r, i).data.dataSize() + KNS;
- if (rightSize > rightSizeLimit) {
- split = l->n + 1 + i;
- break;
- }
+ for (int i = r->n - 1; i > -1; --i) {
+ rightSize += getFullKey(r, i).data.dataSize() + KNS;
+ if (rightSize > rightSizeLimit) {
+ split = l->n + 1 + i;
+ break;
}
+ }
- if (split == -1) {
- rightSize += getFullKey(bucket, leftIndex).data.dataSize() + KNS;
- if (rightSize > rightSizeLimit) {
- split = l->n;
- }
+ if (split == -1) {
+ rightSize += getFullKey(bucket, leftIndex).data.dataSize() + KNS;
+ if (rightSize > rightSizeLimit) {
+ split = l->n;
}
+ }
- if (split == -1) {
- for (int i = l->n - 1; i > -1; --i) {
- rightSize += getFullKey(l, i).data.dataSize() + KNS;
- if (rightSize > rightSizeLimit) {
- split = i;
- break;
- }
+ if (split == -1) {
+ for (int i = l->n - 1; i > -1; --i) {
+ rightSize += getFullKey(l, i).data.dataSize() + KNS;
+ if (rightSize > rightSizeLimit) {
+ split = i;
+ break;
}
}
+ }
- // safeguards - we must not create an empty bucket
- if (split < 1) {
- split = 1;
- }
- else if (split > l->n + 1 + r->n - 2) {
- split = l->n + 1 + r->n - 2;
- }
-
- return split;
+ // safeguards - we must not create an empty bucket
+ if (split < 1) {
+ split = 1;
+ } else if (split > l->n + 1 + r->n - 2) {
+ split = l->n + 1 + r->n - 2;
}
- template <class BtreeLayout>
- void BtreeLogic<BtreeLayout>::doMergeChildren(OperationContext* txn,
- BucketType* bucket,
- const DiskLoc bucketLoc,
- int leftIndex) {
+ return split;
+}
- DiskLoc leftNodeLoc = childLocForPos(bucket, leftIndex);
- DiskLoc rightNodeLoc = childLocForPos(bucket, leftIndex + 1);
+template <class BtreeLayout>
+void BtreeLogic<BtreeLayout>::doMergeChildren(OperationContext* txn,
+ BucketType* bucket,
+ const DiskLoc bucketLoc,
+ int leftIndex) {
+ DiskLoc leftNodeLoc = childLocForPos(bucket, leftIndex);
+ DiskLoc rightNodeLoc = childLocForPos(bucket, leftIndex + 1);
- BucketType* l = btreemod(txn, getBucket(txn, leftNodeLoc));
- BucketType* r = btreemod(txn, getBucket(txn, rightNodeLoc));
+ BucketType* l = btreemod(txn, getBucket(txn, leftNodeLoc));
+ BucketType* r = btreemod(txn, getBucket(txn, rightNodeLoc));
- int pos = 0;
- _packReadyForMod(l, pos);
- _packReadyForMod(r, pos);
+ int pos = 0;
+ _packReadyForMod(l, pos);
+ _packReadyForMod(r, pos);
- // We know the additional keys below will fit in l because canMergeChildren() must be true.
- int oldLNum = l->n;
- // left child's right child becomes old parent key's left child
- FullKey knLeft = getFullKey(bucket, leftIndex);
- invariant(pushBack(l, knLeft.recordLoc, knLeft.data, l->nextChild));
+ // We know the additional keys below will fit in l because canMergeChildren() must be true.
+ int oldLNum = l->n;
+ // left child's right child becomes old parent key's left child
+ FullKey knLeft = getFullKey(bucket, leftIndex);
+ invariant(pushBack(l, knLeft.recordLoc, knLeft.data, l->nextChild));
- for (int i = 0; i < r->n; ++i) {
- FullKey kn = getFullKey(r, i);
- invariant(pushBack(l, kn.recordLoc, kn.data, kn.prevChildBucket));
- }
+ for (int i = 0; i < r->n; ++i) {
+ FullKey kn = getFullKey(r, i);
+ invariant(pushBack(l, kn.recordLoc, kn.data, kn.prevChildBucket));
+ }
- l->nextChild = r->nextChild;
- fixParentPtrs(txn, l, leftNodeLoc, oldLNum);
- delBucket(txn, r, rightNodeLoc);
+ l->nextChild = r->nextChild;
+ fixParentPtrs(txn, l, leftNodeLoc, oldLNum);
+ delBucket(txn, r, rightNodeLoc);
- childLocForPos(bucket, leftIndex + 1) = leftNodeLoc;
- childLocForPos(bucket, leftIndex) = DiskLoc();
- _delKeyAtPos(bucket, leftIndex, true);
+ childLocForPos(bucket, leftIndex + 1) = leftNodeLoc;
+ childLocForPos(bucket, leftIndex) = DiskLoc();
+ _delKeyAtPos(bucket, leftIndex, true);
- if (bucket->n == 0) {
- // Will trash bucket and bucketLoc.
- //
- // TODO To ensure all leaves are of equal height, we should ensure this is only called
- // on the root.
- replaceWithNextChild(txn, bucket, bucketLoc);
- }
- else {
- mayBalanceWithNeighbors(txn, bucket, bucketLoc);
- }
+ if (bucket->n == 0) {
+ // Will trash bucket and bucketLoc.
+ //
+ // TODO To ensure all leaves are of equal height, we should ensure this is only called
+ // on the root.
+ replaceWithNextChild(txn, bucket, bucketLoc);
+ } else {
+ mayBalanceWithNeighbors(txn, bucket, bucketLoc);
}
+}
- template <class BtreeLayout>
- int BtreeLogic<BtreeLayout>::indexInParent(OperationContext* txn,
- BucketType* bucket,
- const DiskLoc bucketLoc) const {
- invariant(!bucket->parent.isNull());
- const BucketType* p = getBucket(txn, bucket->parent);
- if (p->nextChild == bucketLoc) {
- return p->n;
- }
+template <class BtreeLayout>
+int BtreeLogic<BtreeLayout>::indexInParent(OperationContext* txn,
+ BucketType* bucket,
+ const DiskLoc bucketLoc) const {
+ invariant(!bucket->parent.isNull());
+ const BucketType* p = getBucket(txn, bucket->parent);
+ if (p->nextChild == bucketLoc) {
+ return p->n;
+ }
- for (int i = 0; i < p->n; ++i) {
- if (getKeyHeader(p, i).prevChildBucket == bucketLoc) {
- return i;
- }
+ for (int i = 0; i < p->n; ++i) {
+ if (getKeyHeader(p, i).prevChildBucket == bucketLoc) {
+ return i;
}
-
- log() << "ERROR: can't find ref to child bucket.\n";
- log() << "child: " << bucketLoc << "\n";
- //dump();
- log() << "Parent: " << bucket->parent << "\n";
- //p->dump();
- invariant(false);
- return -1; // just to compile
}
- template <class BtreeLayout>
- bool BtreeLogic<BtreeLayout>::tryBalanceChildren(OperationContext* txn,
- BucketType* bucket,
- const DiskLoc bucketLoc,
- int leftIndex) {
-
- // If we can merge, then we must merge rather than balance to preserve bucket utilization
- // constraints.
- if (canMergeChildren(txn, bucket, bucketLoc, leftIndex)) {
- return false;
- }
+ log() << "ERROR: can't find ref to child bucket.\n";
+ log() << "child: " << bucketLoc << "\n";
+ // dump();
+ log() << "Parent: " << bucket->parent << "\n";
+ // p->dump();
+ invariant(false);
+ return -1; // just to compile
+}
- doBalanceChildren(txn, btreemod(txn, bucket), bucketLoc, leftIndex);
- return true;
+template <class BtreeLayout>
+bool BtreeLogic<BtreeLayout>::tryBalanceChildren(OperationContext* txn,
+ BucketType* bucket,
+ const DiskLoc bucketLoc,
+ int leftIndex) {
+ // If we can merge, then we must merge rather than balance to preserve bucket utilization
+ // constraints.
+ if (canMergeChildren(txn, bucket, bucketLoc, leftIndex)) {
+ return false;
}
- template <class BtreeLayout>
- void BtreeLogic<BtreeLayout>::doBalanceLeftToRight(OperationContext* txn,
- BucketType* bucket,
- const DiskLoc bucketLoc,
- int leftIndex,
- int split,
- BucketType* l,
- const DiskLoc lchild,
- BucketType* r,
- const DiskLoc rchild) {
-
- // TODO maybe do some audits the same way pushBack() does? As a precondition, rchild + the
- // old separator are <= half a body size, and lchild is at most completely full. Based on
- // the value of split, rchild will get <= half of the total bytes which is at most 75% of a
- // full body. So rchild will have room for the following keys:
- int rAdd = l->n - split;
- reserveKeysFront(r, rAdd);
-
- for (int i = split + 1, j = 0; i < l->n; ++i, ++j) {
- FullKey kn = getFullKey(l, i);
- setKey(r, j, kn.recordLoc, kn.data, kn.prevChildBucket);
- }
+ doBalanceChildren(txn, btreemod(txn, bucket), bucketLoc, leftIndex);
+ return true;
+}
- FullKey leftIndexKN = getFullKey(bucket, leftIndex);
- setKey(r, rAdd - 1, leftIndexKN.recordLoc, leftIndexKN.data, l->nextChild);
+template <class BtreeLayout>
+void BtreeLogic<BtreeLayout>::doBalanceLeftToRight(OperationContext* txn,
+ BucketType* bucket,
+ const DiskLoc bucketLoc,
+ int leftIndex,
+ int split,
+ BucketType* l,
+ const DiskLoc lchild,
+ BucketType* r,
+ const DiskLoc rchild) {
+ // TODO maybe do some audits the same way pushBack() does? As a precondition, rchild + the
+ // old separator are <= half a body size, and lchild is at most completely full. Based on
+ // the value of split, rchild will get <= half of the total bytes which is at most 75% of a
+ // full body. So rchild will have room for the following keys:
+ int rAdd = l->n - split;
+ reserveKeysFront(r, rAdd);
+
+ for (int i = split + 1, j = 0; i < l->n; ++i, ++j) {
+ FullKey kn = getFullKey(l, i);
+ setKey(r, j, kn.recordLoc, kn.data, kn.prevChildBucket);
+ }
+
+ FullKey leftIndexKN = getFullKey(bucket, leftIndex);
+ setKey(r, rAdd - 1, leftIndexKN.recordLoc, leftIndexKN.data, l->nextChild);
+
+ fixParentPtrs(txn, r, rchild, 0, rAdd - 1);
+
+ FullKey kn = getFullKey(l, split);
+ l->nextChild = kn.prevChildBucket;
+
+ // Because lchild is a descendant of thisLoc, updating thisLoc will not affect packing or
+ // keys of lchild and kn will be stable during the following setInternalKey()
+ setInternalKey(txn, bucket, bucketLoc, leftIndex, kn.recordLoc, kn.data, lchild, rchild);
+
+ // lchild and rchild cannot be merged, so there must be >0 (actually more) keys to the left
+ // of split.
+ int zeropos = 0;
+ truncateTo(l, split, zeropos);
+}
+
+template <class BtreeLayout>
+void BtreeLogic<BtreeLayout>::doBalanceRightToLeft(OperationContext* txn,
+ BucketType* bucket,
+ const DiskLoc bucketLoc,
+ int leftIndex,
+ int split,
+ BucketType* l,
+ const DiskLoc lchild,
+ BucketType* r,
+ const DiskLoc rchild) {
+ // As a precondition, lchild + the old separator are <= half a body size,
+ // and rchild is at most completely full. Based on the value of split,
+ // lchild will get less than half of the total bytes which is at most 75%
+ // of a full body. So lchild will have room for the following keys:
+ int lN = l->n;
+
+ {
+ // left child's right child becomes old parent key's left child
+ FullKey kn = getFullKey(bucket, leftIndex);
+ invariant(pushBack(l, kn.recordLoc, kn.data, l->nextChild));
+ }
- fixParentPtrs(txn, r, rchild, 0, rAdd - 1);
+ for (int i = 0; i < split - lN - 1; ++i) {
+ FullKey kn = getFullKey(r, i);
+ invariant(pushBack(l, kn.recordLoc, kn.data, kn.prevChildBucket));
+ }
- FullKey kn = getFullKey(l, split);
+ {
+ FullKey kn = getFullKey(r, split - lN - 1);
l->nextChild = kn.prevChildBucket;
-
- // Because lchild is a descendant of thisLoc, updating thisLoc will not affect packing or
- // keys of lchild and kn will be stable during the following setInternalKey()
+ // Child lN was lchild's old nextChild, and don't need to fix that one.
+ fixParentPtrs(txn, l, lchild, lN + 1, l->n);
+ // Because rchild is a descendant of thisLoc, updating thisLoc will
+ // not affect packing or keys of rchild and kn will be stable
+ // during the following setInternalKey()
setInternalKey(txn, bucket, bucketLoc, leftIndex, kn.recordLoc, kn.data, lchild, rchild);
-
- // lchild and rchild cannot be merged, so there must be >0 (actually more) keys to the left
- // of split.
- int zeropos = 0;
- truncateTo(l, split, zeropos);
- }
-
- template <class BtreeLayout>
- void BtreeLogic<BtreeLayout>::doBalanceRightToLeft(OperationContext* txn,
- BucketType* bucket,
- const DiskLoc bucketLoc,
- int leftIndex,
- int split,
- BucketType* l,
- const DiskLoc lchild,
- BucketType* r,
- const DiskLoc rchild) {
- // As a precondition, lchild + the old separator are <= half a body size,
- // and rchild is at most completely full. Based on the value of split,
- // lchild will get less than half of the total bytes which is at most 75%
- // of a full body. So lchild will have room for the following keys:
- int lN = l->n;
-
- {
- // left child's right child becomes old parent key's left child
- FullKey kn = getFullKey(bucket, leftIndex);
- invariant(pushBack(l, kn.recordLoc, kn.data, l->nextChild));
- }
-
- for (int i = 0; i < split - lN - 1; ++i) {
- FullKey kn = getFullKey(r, i);
- invariant(pushBack(l, kn.recordLoc, kn.data, kn.prevChildBucket));
- }
-
- {
- FullKey kn = getFullKey(r, split - lN - 1);
- l->nextChild = kn.prevChildBucket;
- // Child lN was lchild's old nextChild, and don't need to fix that one.
- fixParentPtrs(txn, l, lchild, lN + 1, l->n);
- // Because rchild is a descendant of thisLoc, updating thisLoc will
- // not affect packing or keys of rchild and kn will be stable
- // during the following setInternalKey()
- setInternalKey(txn, bucket, bucketLoc, leftIndex, kn.recordLoc, kn.data, lchild, rchild);
- }
-
- // lchild and rchild cannot be merged, so there must be >0 (actually more)
- // keys to the right of split.
- int zeropos = 0;
- dropFront(r, split - lN, zeropos);
}
- template <class BtreeLayout>
- void BtreeLogic<BtreeLayout>::doBalanceChildren(OperationContext* txn,
- BucketType* bucket,
- const DiskLoc bucketLoc,
- int leftIndex) {
-
- DiskLoc lchild = childLocForPos(bucket, leftIndex);
- DiskLoc rchild = childLocForPos(bucket, leftIndex + 1);
-
- int zeropos = 0;
- BucketType* l = btreemod(txn, getBucket(txn, lchild));
- _packReadyForMod(l, zeropos);
+ // lchild and rchild cannot be merged, so there must be >0 (actually more)
+ // keys to the right of split.
+ int zeropos = 0;
+ dropFront(r, split - lN, zeropos);
+}
- BucketType* r = btreemod(txn, getBucket(txn, rchild));
- _packReadyForMod(r, zeropos);
-
- int split = _rebalancedSeparatorPos(txn, bucket, leftIndex);
-
- // By definition, if we are below the low water mark and cannot merge
- // then we must actively balance.
- invariant(split != l->n);
- if (split < l->n) {
- doBalanceLeftToRight(txn, bucket, bucketLoc, leftIndex, split, l, lchild, r, rchild);
- }
- else {
- doBalanceRightToLeft(txn, bucket, bucketLoc, leftIndex, split, l, lchild, r, rchild);
- }
+template <class BtreeLayout>
+void BtreeLogic<BtreeLayout>::doBalanceChildren(OperationContext* txn,
+ BucketType* bucket,
+ const DiskLoc bucketLoc,
+ int leftIndex) {
+ DiskLoc lchild = childLocForPos(bucket, leftIndex);
+ DiskLoc rchild = childLocForPos(bucket, leftIndex + 1);
+
+ int zeropos = 0;
+ BucketType* l = btreemod(txn, getBucket(txn, lchild));
+ _packReadyForMod(l, zeropos);
+
+ BucketType* r = btreemod(txn, getBucket(txn, rchild));
+ _packReadyForMod(r, zeropos);
+
+ int split = _rebalancedSeparatorPos(txn, bucket, leftIndex);
+
+ // By definition, if we are below the low water mark and cannot merge
+ // then we must actively balance.
+ invariant(split != l->n);
+ if (split < l->n) {
+ doBalanceLeftToRight(txn, bucket, bucketLoc, leftIndex, split, l, lchild, r, rchild);
+ } else {
+ doBalanceRightToLeft(txn, bucket, bucketLoc, leftIndex, split, l, lchild, r, rchild);
+ }
+}
+
+template <class BtreeLayout>
+bool BtreeLogic<BtreeLayout>::mayBalanceWithNeighbors(OperationContext* txn,
+ BucketType* bucket,
+ const DiskLoc bucketLoc) {
+ if (bucket->parent.isNull()) {
+ return false;
}
- template <class BtreeLayout>
- bool BtreeLogic<BtreeLayout>::mayBalanceWithNeighbors(OperationContext* txn,
- BucketType* bucket,
- const DiskLoc bucketLoc) {
- if (bucket->parent.isNull()) {
- return false;
- }
-
- if (_packedDataSize(bucket, 0) >= lowWaterMark()) {
- return false;
- }
-
- BucketType* p = getBucket(txn, bucket->parent);
- int parentIdx = indexInParent(txn, bucket, bucketLoc);
-
- // TODO will missing neighbor case be possible long term? Should we try to merge/balance
- // somehow in that case if so?
- bool mayBalanceRight = (parentIdx < p->n) && !childLocForPos(p, parentIdx + 1).isNull();
- bool mayBalanceLeft = ( parentIdx > 0 ) && !childLocForPos(p, parentIdx - 1).isNull();
-
- // Balance if possible on one side - we merge only if absolutely necessary to preserve btree
- // bucket utilization constraints since that's a more heavy duty operation (especially if we
- // must re-split later).
- if (mayBalanceRight && tryBalanceChildren(txn, p, bucket->parent, parentIdx)) {
- return true;
- }
-
- if (mayBalanceLeft && tryBalanceChildren(txn, p, bucket->parent, parentIdx - 1)) {
- return true;
- }
-
- BucketType* pm = btreemod(txn, getBucket(txn, bucket->parent));
- if (mayBalanceRight) {
- doMergeChildren(txn, pm, bucket->parent, parentIdx);
- return true;
- }
- else if (mayBalanceLeft) {
- doMergeChildren(txn, pm, bucket->parent, parentIdx - 1);
- return true;
- }
-
+ if (_packedDataSize(bucket, 0) >= lowWaterMark()) {
return false;
}
- template <class BtreeLayout>
- bool BtreeLogic<BtreeLayout>::unindex(OperationContext* txn,
- const BSONObj& key,
- const DiskLoc& recordLoc) {
- int pos;
- bool found = false;
- KeyDataOwnedType ownedKey(key);
+ BucketType* p = getBucket(txn, bucket->parent);
+ int parentIdx = indexInParent(txn, bucket, bucketLoc);
- DiskLoc loc = _locate(txn, getRootLoc(txn), ownedKey, &pos, &found, recordLoc, 1);
- if (found) {
- BucketType* bucket = btreemod(txn, getBucket(txn, loc));
- delKeyAtPos(txn, bucket, loc, pos);
- assertValid(_indexName, getRoot(txn), _ordering);
- }
- return found;
- }
+ // TODO will missing neighbor case be possible long term? Should we try to merge/balance
+ // somehow in that case if so?
+ bool mayBalanceRight = (parentIdx < p->n) && !childLocForPos(p, parentIdx + 1).isNull();
+ bool mayBalanceLeft = (parentIdx > 0) && !childLocForPos(p, parentIdx - 1).isNull();
- template <class BtreeLayout>
- bool BtreeLogic<BtreeLayout>::isEmpty(OperationContext* txn) const {
- return getRoot(txn)->n == 0;
+ // Balance if possible on one side - we merge only if absolutely necessary to preserve btree
+ // bucket utilization constraints since that's a more heavy duty operation (especially if we
+ // must re-split later).
+ if (mayBalanceRight && tryBalanceChildren(txn, p, bucket->parent, parentIdx)) {
+ return true;
}
- /**
- * This can cause a lot of additional page writes when we assign buckets to different parents.
- * Maybe get rid of parent ptrs?
- */
- template <class BtreeLayout>
- void BtreeLogic<BtreeLayout>::fixParentPtrs(OperationContext* txn,
- BucketType* bucket,
- const DiskLoc bucketLoc,
- int firstIndex,
- int lastIndex) {
-
- invariant(getBucket(txn, bucketLoc) == bucket);
-
- if (lastIndex == -1) {
- lastIndex = bucket->n;
- }
-
- for (int i = firstIndex; i <= lastIndex; i++) {
- const DiskLoc childLoc = childLocForPos(bucket, i);
- if (!childLoc.isNull()) {
- *txn->recoveryUnit()->writing(&getBucket(txn, childLoc)->parent) = bucketLoc;
- }
- }
+ if (mayBalanceLeft && tryBalanceChildren(txn, p, bucket->parent, parentIdx - 1)) {
+ return true;
}
- template <class BtreeLayout>
- void BtreeLogic<BtreeLayout>::setInternalKey(OperationContext* txn,
- BucketType* bucket,
- const DiskLoc bucketLoc,
- int keypos,
- const DiskLoc recordLoc,
- const KeyDataType& key,
- const DiskLoc lchild,
- const DiskLoc rchild) {
- childLocForPos(bucket, keypos).Null();
- // This may leave the bucket empty (n == 0) which is ok only as a txnient state. In the
- // instant case, the implementation of insertHere behaves correctly when n == 0 and as a
- // side effect increments n.
- _delKeyAtPos(bucket, keypos, true);
-
- // Ensure we do not orphan neighbor's old child.
- invariant(childLocForPos(bucket, keypos ) == rchild);
-
- // Just set temporarily - required to pass validation in insertHere()
- childLocForPos(bucket, keypos) = lchild;
-
- insertHere(txn, bucketLoc, keypos, key, recordLoc, lchild, rchild);
- }
-
- /**
- * insert a key in this bucket, splitting if necessary.
- *
- * @keypos - where to insert the key in range 0..n. 0=make leftmost, n=make rightmost. NOTE
- * this function may free some data, and as a result the value passed for keypos may be invalid
- * after calling insertHere()
- *
- * Some of the write intent signaling below relies on the implementation of the optimized write
- * intent code in basicInsert().
- */
- template <class BtreeLayout>
- void BtreeLogic<BtreeLayout>::insertHere(OperationContext* txn,
- const DiskLoc bucketLoc,
- int pos,
- const KeyDataType& key,
- const DiskLoc recordLoc,
- const DiskLoc leftChildLoc,
- const DiskLoc rightChildLoc) {
+ BucketType* pm = btreemod(txn, getBucket(txn, bucket->parent));
+ if (mayBalanceRight) {
+ doMergeChildren(txn, pm, bucket->parent, parentIdx);
+ return true;
+ } else if (mayBalanceLeft) {
+ doMergeChildren(txn, pm, bucket->parent, parentIdx - 1);
+ return true;
+ }
- BucketType* bucket = getBucket(txn, bucketLoc);
+ return false;
+}
- if (!basicInsert(txn, bucket, bucketLoc, pos, key, recordLoc)) {
- // If basicInsert() fails, the bucket will be packed as required by split().
- split(txn, btreemod(txn, bucket), bucketLoc, pos, recordLoc, key, leftChildLoc, rightChildLoc);
- return;
- }
+template <class BtreeLayout>
+bool BtreeLogic<BtreeLayout>::unindex(OperationContext* txn,
+ const BSONObj& key,
+ const DiskLoc& recordLoc) {
+ int pos;
+ bool found = false;
+ KeyDataOwnedType ownedKey(key);
- KeyHeaderType* kn = &getKeyHeader(bucket, pos);
- if (pos + 1 == bucket->n) {
- // It's the last key.
- if (bucket->nextChild != leftChildLoc) {
- // XXX log more
- invariant(false);
- }
- kn->prevChildBucket = bucket->nextChild;
- invariant(kn->prevChildBucket == leftChildLoc);
- *txn->recoveryUnit()->writing(&bucket->nextChild) = rightChildLoc;
- if (!rightChildLoc.isNull()) {
- *txn->recoveryUnit()->writing(&getBucket(txn, rightChildLoc)->parent) = bucketLoc;
- }
- }
- else {
- kn->prevChildBucket = leftChildLoc;
- if (getKeyHeader(bucket, pos + 1).prevChildBucket != leftChildLoc) {
- // XXX: log more
- invariant(false);
- }
- const LocType *pc = &getKeyHeader(bucket, pos + 1).prevChildBucket;
- // Intent declared in basicInsert()
- *const_cast<LocType*>(pc) = rightChildLoc;
- if (!rightChildLoc.isNull()) {
- *txn->recoveryUnit()->writing(&getBucket(txn, rightChildLoc)->parent) = bucketLoc;
- }
- }
+ DiskLoc loc = _locate(txn, getRootLoc(txn), ownedKey, &pos, &found, recordLoc, 1);
+ if (found) {
+ BucketType* bucket = btreemod(txn, getBucket(txn, loc));
+ delKeyAtPos(txn, bucket, loc, pos);
+ assertValid(_indexName, getRoot(txn), _ordering);
}
+ return found;
+}
- template <class BtreeLayout>
- void BtreeLogic<BtreeLayout>::split(OperationContext* txn,
- BucketType* bucket,
- const DiskLoc bucketLoc,
- int keypos,
- const DiskLoc recordLoc,
- const KeyDataType& key,
- const DiskLoc lchild,
- const DiskLoc rchild) {
-
- int split = splitPos(bucket, keypos);
- DiskLoc rLoc = _addBucket(txn);
- BucketType* r = btreemod(txn, getBucket(txn, rLoc));
-
- for (int i = split + 1; i < bucket->n; i++) {
- FullKey kn = getFullKey(bucket, i);
- invariant(pushBack(r, kn.recordLoc, kn.data, kn.prevChildBucket));
- }
- r->nextChild = bucket->nextChild;
- assertValid(_indexName, r, _ordering);
-
- r = NULL;
- fixParentPtrs(txn, getBucket(txn, rLoc), rLoc);
-
- FullKey splitkey = getFullKey(bucket, split);
- // splitkey key gets promoted, its children will be thisLoc (l) and rLoc (r)
- bucket->nextChild = splitkey.prevChildBucket;
-
- // Because thisLoc is a descendant of parent, updating parent will not affect packing or
- // keys of thisLoc and splitkey will be stable during the following:
-
- if (bucket->parent.isNull()) {
- // promote splitkey to a parent this->node make a new parent if we were the root
- DiskLoc L = _addBucket(txn);
- BucketType* p = btreemod(txn, getBucket(txn, L));
- invariant(pushBack(p, splitkey.recordLoc, splitkey.data, bucketLoc));
- p->nextChild = rLoc;
- assertValid(_indexName, p, _ordering);
- bucket->parent = L;
- _headManager->setHead(txn, L.toRecordId());
- *txn->recoveryUnit()->writing(&getBucket(txn, rLoc)->parent) = bucket->parent;
- }
- else {
- // set this before calling _insert - if it splits it will do fixParent() logic and
- // change the value.
- *txn->recoveryUnit()->writing(&getBucket(txn, rLoc)->parent) = bucket->parent;
- _insert(txn,
- getBucket(txn, bucket->parent),
- bucket->parent,
- splitkey.data,
- splitkey.recordLoc,
- true, // dupsallowed
- bucketLoc,
- rLoc);
- }
+template <class BtreeLayout>
+bool BtreeLogic<BtreeLayout>::isEmpty(OperationContext* txn) const {
+ return getRoot(txn)->n == 0;
+}
- int newpos = keypos;
- // note this may trash splitkey.key. thus we had to promote it before finishing up here.
- truncateTo(bucket, split, newpos);
+/**
+ * This can cause a lot of additional page writes when we assign buckets to different parents.
+ * Maybe get rid of parent ptrs?
+ */
+template <class BtreeLayout>
+void BtreeLogic<BtreeLayout>::fixParentPtrs(OperationContext* txn,
+ BucketType* bucket,
+ const DiskLoc bucketLoc,
+ int firstIndex,
+ int lastIndex) {
+ invariant(getBucket(txn, bucketLoc) == bucket);
- // add our this->new key, there is room this->now
- if (keypos <= split) {
- insertHere(txn, bucketLoc, newpos, key, recordLoc, lchild, rchild);
- }
- else {
- int kp = keypos - split - 1;
- invariant(kp >= 0);
- insertHere(txn, rLoc, kp, key, recordLoc, lchild, rchild);
- }
+ if (lastIndex == -1) {
+ lastIndex = bucket->n;
}
- class DummyDocWriter : public DocWriter {
- public:
- DummyDocWriter(size_t sz) : _sz(sz) { }
- virtual void writeDocument(char* buf) const { /* no-op */ }
- virtual size_t documentSize() const { return _sz; }
- private:
- size_t _sz;
- };
-
- template <class BtreeLayout>
- Status BtreeLogic<BtreeLayout>::initAsEmpty(OperationContext* txn) {
- if (!_headManager->getHead(txn).isNull()) {
- return Status(ErrorCodes::InternalError, "index already initialized");
+ for (int i = firstIndex; i <= lastIndex; i++) {
+ const DiskLoc childLoc = childLocForPos(bucket, i);
+ if (!childLoc.isNull()) {
+ *txn->recoveryUnit()->writing(&getBucket(txn, childLoc)->parent) = bucketLoc;
}
-
- _headManager->setHead(txn, _addBucket(txn).toRecordId());
- return Status::OK();
}
+}
- template <class BtreeLayout>
- DiskLoc BtreeLogic<BtreeLayout>::_addBucket(OperationContext* txn) {
- DummyDocWriter docWriter(BtreeLayout::BucketSize);
- StatusWith<RecordId> loc = _recordStore->insertRecord(txn, &docWriter, false);
- // XXX: remove this(?) or turn into massert or sanely bubble it back up.
- uassertStatusOK(loc.getStatus());
+template <class BtreeLayout>
+void BtreeLogic<BtreeLayout>::setInternalKey(OperationContext* txn,
+ BucketType* bucket,
+ const DiskLoc bucketLoc,
+ int keypos,
+ const DiskLoc recordLoc,
+ const KeyDataType& key,
+ const DiskLoc lchild,
+ const DiskLoc rchild) {
+ childLocForPos(bucket, keypos).Null();
+ // This may leave the bucket empty (n == 0) which is ok only as a txnient state. In the
+ // instant case, the implementation of insertHere behaves correctly when n == 0 and as a
+ // side effect increments n.
+ _delKeyAtPos(bucket, keypos, true);
- // this is a new bucket, not referenced by anyone, probably don't need this lock
- BucketType* b = btreemod(txn, getBucket(txn, loc.getValue()));
- init(b);
- return DiskLoc::fromRecordId(loc.getValue());
- }
+ // Ensure we do not orphan neighbor's old child.
+ invariant(childLocForPos(bucket, keypos) == rchild);
- // static
- template <class BtreeLayout>
- void BtreeLogic<BtreeLayout>::dumpBucket(const BucketType* bucket, int indentLength) {
- log() << "BUCKET n:" << bucket->n << ", parent:" << hex << bucket->parent.getOfs() << dec;
+ // Just set temporarily - required to pass validation in insertHere()
+ childLocForPos(bucket, keypos) = lchild;
- const string indent = string(indentLength, ' ');
+ insertHere(txn, bucketLoc, keypos, key, recordLoc, lchild, rchild);
+}
- for (int i = 0; i < bucket->n; i++) {
- log() << '\n' << indent;
- FullKey k = getFullKey(bucket, i);
- string ks = k.data.toString();
- log() << " " << hex << k.prevChildBucket.getOfs() << "<-- prevChildBucket for " << i << '\n';
- log() << indent << " " << i << ' ' << ks.substr(0, 30)
- << " Loc:" << k.recordLoc.toString() << dec;
- if (getKeyHeader(bucket, i).isUnused()) {
- log() << " UNUSED";
- }
+/**
+ * insert a key in this bucket, splitting if necessary.
+ *
+ * @keypos - where to insert the key in range 0..n. 0=make leftmost, n=make rightmost. NOTE
+ * this function may free some data, and as a result the value passed for keypos may be invalid
+ * after calling insertHere()
+ *
+ * Some of the write intent signaling below relies on the implementation of the optimized write
+ * intent code in basicInsert().
+ */
+template <class BtreeLayout>
+void BtreeLogic<BtreeLayout>::insertHere(OperationContext* txn,
+ const DiskLoc bucketLoc,
+ int pos,
+ const KeyDataType& key,
+ const DiskLoc recordLoc,
+ const DiskLoc leftChildLoc,
+ const DiskLoc rightChildLoc) {
+ BucketType* bucket = getBucket(txn, bucketLoc);
+
+ if (!basicInsert(txn, bucket, bucketLoc, pos, key, recordLoc)) {
+ // If basicInsert() fails, the bucket will be packed as required by split().
+ split(txn,
+ btreemod(txn, bucket),
+ bucketLoc,
+ pos,
+ recordLoc,
+ key,
+ leftChildLoc,
+ rightChildLoc);
+ return;
+ }
+
+ KeyHeaderType* kn = &getKeyHeader(bucket, pos);
+ if (pos + 1 == bucket->n) {
+ // It's the last key.
+ if (bucket->nextChild != leftChildLoc) {
+ // XXX log more
+ invariant(false);
}
-
- log() << "\n" << indent << " " << hex << bucket->nextChild.getOfs() << dec << endl;
- }
-
- template <class BtreeLayout>
- DiskLoc BtreeLogic<BtreeLayout>::getDiskLoc(OperationContext* txn,
- const DiskLoc& bucketLoc,
- const int keyOffset) const {
- invariant(!bucketLoc.isNull());
- BucketType* bucket = getBucket(txn, bucketLoc);
- return getKeyHeader(bucket, keyOffset).recordLoc;
- }
-
- template <class BtreeLayout>
- BSONObj BtreeLogic<BtreeLayout>::getKey(OperationContext* txn,
- const DiskLoc& bucketLoc,
- const int keyOffset) const {
- invariant(!bucketLoc.isNull());
- BucketType* bucket = getBucket(txn, bucketLoc);
- int n = bucket->n;
- invariant(n != BtreeLayout::INVALID_N_SENTINEL);
- invariant(n >= 0);
- invariant(n < 10000);
- invariant(n != 0xffff);
-
- invariant(keyOffset >= 0);
- invariant(keyOffset < n);
-
- // XXX: should we really return an empty obj if keyOffset>=n?
- if (keyOffset >= n) {
- return BSONObj();
+ kn->prevChildBucket = bucket->nextChild;
+ invariant(kn->prevChildBucket == leftChildLoc);
+ *txn->recoveryUnit()->writing(&bucket->nextChild) = rightChildLoc;
+ if (!rightChildLoc.isNull()) {
+ *txn->recoveryUnit()->writing(&getBucket(txn, rightChildLoc)->parent) = bucketLoc;
}
- else {
- return getFullKey(bucket, keyOffset).data.toBson();
+ } else {
+ kn->prevChildBucket = leftChildLoc;
+ if (getKeyHeader(bucket, pos + 1).prevChildBucket != leftChildLoc) {
+ // XXX: log more
+ invariant(false);
}
- }
+ const LocType* pc = &getKeyHeader(bucket, pos + 1).prevChildBucket;
+ // Intent declared in basicInsert()
+ *const_cast<LocType*>(pc) = rightChildLoc;
+ if (!rightChildLoc.isNull()) {
+ *txn->recoveryUnit()->writing(&getBucket(txn, rightChildLoc)->parent) = bucketLoc;
+ }
+ }
+}
+
+template <class BtreeLayout>
+void BtreeLogic<BtreeLayout>::split(OperationContext* txn,
+ BucketType* bucket,
+ const DiskLoc bucketLoc,
+ int keypos,
+ const DiskLoc recordLoc,
+ const KeyDataType& key,
+ const DiskLoc lchild,
+ const DiskLoc rchild) {
+ int split = splitPos(bucket, keypos);
+ DiskLoc rLoc = _addBucket(txn);
+ BucketType* r = btreemod(txn, getBucket(txn, rLoc));
+
+ for (int i = split + 1; i < bucket->n; i++) {
+ FullKey kn = getFullKey(bucket, i);
+ invariant(pushBack(r, kn.recordLoc, kn.data, kn.prevChildBucket));
+ }
+ r->nextChild = bucket->nextChild;
+ assertValid(_indexName, r, _ordering);
+
+ r = NULL;
+ fixParentPtrs(txn, getBucket(txn, rLoc), rLoc);
+
+ FullKey splitkey = getFullKey(bucket, split);
+ // splitkey key gets promoted, its children will be thisLoc (l) and rLoc (r)
+ bucket->nextChild = splitkey.prevChildBucket;
+
+ // Because thisLoc is a descendant of parent, updating parent will not affect packing or
+ // keys of thisLoc and splitkey will be stable during the following:
+
+ if (bucket->parent.isNull()) {
+ // promote splitkey to a parent this->node make a new parent if we were the root
+ DiskLoc L = _addBucket(txn);
+ BucketType* p = btreemod(txn, getBucket(txn, L));
+ invariant(pushBack(p, splitkey.recordLoc, splitkey.data, bucketLoc));
+ p->nextChild = rLoc;
+ assertValid(_indexName, p, _ordering);
+ bucket->parent = L;
+ _headManager->setHead(txn, L.toRecordId());
+ *txn->recoveryUnit()->writing(&getBucket(txn, rLoc)->parent) = bucket->parent;
+ } else {
+ // set this before calling _insert - if it splits it will do fixParent() logic and
+ // change the value.
+ *txn->recoveryUnit()->writing(&getBucket(txn, rLoc)->parent) = bucket->parent;
+ _insert(txn,
+ getBucket(txn, bucket->parent),
+ bucket->parent,
+ splitkey.data,
+ splitkey.recordLoc,
+ true, // dupsallowed
+ bucketLoc,
+ rLoc);
+ }
+
+ int newpos = keypos;
+ // note this may trash splitkey.key. thus we had to promote it before finishing up here.
+ truncateTo(bucket, split, newpos);
+
+ // add our this->new key, there is room this->now
+ if (keypos <= split) {
+ insertHere(txn, bucketLoc, newpos, key, recordLoc, lchild, rchild);
+ } else {
+ int kp = keypos - split - 1;
+ invariant(kp >= 0);
+ insertHere(txn, rLoc, kp, key, recordLoc, lchild, rchild);
+ }
+}
+
+class DummyDocWriter : public DocWriter {
+public:
+ DummyDocWriter(size_t sz) : _sz(sz) {}
+ virtual void writeDocument(char* buf) const { /* no-op */
+ }
+ virtual size_t documentSize() const {
+ return _sz;
+ }
+
+private:
+ size_t _sz;
+};
+
+template <class BtreeLayout>
+Status BtreeLogic<BtreeLayout>::initAsEmpty(OperationContext* txn) {
+ if (!_headManager->getHead(txn).isNull()) {
+ return Status(ErrorCodes::InternalError, "index already initialized");
+ }
+
+ _headManager->setHead(txn, _addBucket(txn).toRecordId());
+ return Status::OK();
+}
+
+template <class BtreeLayout>
+DiskLoc BtreeLogic<BtreeLayout>::_addBucket(OperationContext* txn) {
+ DummyDocWriter docWriter(BtreeLayout::BucketSize);
+ StatusWith<RecordId> loc = _recordStore->insertRecord(txn, &docWriter, false);
+ // XXX: remove this(?) or turn into massert or sanely bubble it back up.
+ uassertStatusOK(loc.getStatus());
+
+ // this is a new bucket, not referenced by anyone, probably don't need this lock
+ BucketType* b = btreemod(txn, getBucket(txn, loc.getValue()));
+ init(b);
+ return DiskLoc::fromRecordId(loc.getValue());
+}
+
+// static
+template <class BtreeLayout>
+void BtreeLogic<BtreeLayout>::dumpBucket(const BucketType* bucket, int indentLength) {
+ log() << "BUCKET n:" << bucket->n << ", parent:" << hex << bucket->parent.getOfs() << dec;
+
+ const string indent = string(indentLength, ' ');
+
+ for (int i = 0; i < bucket->n; i++) {
+ log() << '\n' << indent;
+ FullKey k = getFullKey(bucket, i);
+ string ks = k.data.toString();
+ log() << " " << hex << k.prevChildBucket.getOfs() << "<-- prevChildBucket for " << i
+ << '\n';
+ log() << indent << " " << i << ' ' << ks.substr(0, 30)
+ << " Loc:" << k.recordLoc.toString() << dec;
+ if (getKeyHeader(bucket, i).isUnused()) {
+ log() << " UNUSED";
+ }
+ }
+
+ log() << "\n" << indent << " " << hex << bucket->nextChild.getOfs() << dec << endl;
+}
+
+template <class BtreeLayout>
+DiskLoc BtreeLogic<BtreeLayout>::getDiskLoc(OperationContext* txn,
+ const DiskLoc& bucketLoc,
+ const int keyOffset) const {
+ invariant(!bucketLoc.isNull());
+ BucketType* bucket = getBucket(txn, bucketLoc);
+ return getKeyHeader(bucket, keyOffset).recordLoc;
+}
+
+template <class BtreeLayout>
+BSONObj BtreeLogic<BtreeLayout>::getKey(OperationContext* txn,
+ const DiskLoc& bucketLoc,
+ const int keyOffset) const {
+ invariant(!bucketLoc.isNull());
+ BucketType* bucket = getBucket(txn, bucketLoc);
+ int n = bucket->n;
+ invariant(n != BtreeLayout::INVALID_N_SENTINEL);
+ invariant(n >= 0);
+ invariant(n < 10000);
+ invariant(n != 0xffff);
+
+ invariant(keyOffset >= 0);
+ invariant(keyOffset < n);
+
+ // XXX: should we really return an empty obj if keyOffset>=n?
+ if (keyOffset >= n) {
+ return BSONObj();
+ } else {
+ return getFullKey(bucket, keyOffset).data.toBson();
+ }
+}
+
+template <class BtreeLayout>
+Status BtreeLogic<BtreeLayout>::touch(OperationContext* txn) const {
+ return _recordStore->touch(txn, NULL);
+}
+
+template <class BtreeLayout>
+long long BtreeLogic<BtreeLayout>::fullValidate(OperationContext* txn,
+ long long* unusedCount,
+ bool strict,
+ bool dumpBuckets,
+ unsigned depth) const {
+ return _fullValidate(txn, getRootLoc(txn), unusedCount, strict, dumpBuckets, depth);
+}
+
+template <class BtreeLayout>
+long long BtreeLogic<BtreeLayout>::_fullValidate(OperationContext* txn,
+ const DiskLoc bucketLoc,
+ long long* unusedCount,
+ bool strict,
+ bool dumpBuckets,
+ unsigned depth) const {
+ BucketType* bucket = getBucket(txn, bucketLoc);
+ assertValid(_indexName, bucket, _ordering, true);
- template <class BtreeLayout>
- Status BtreeLogic<BtreeLayout>::touch(OperationContext* txn) const {
- return _recordStore->touch( txn, NULL );
+ if (dumpBuckets) {
+ log() << bucketLoc.toString() << ' ';
+ dumpBucket(bucket, depth);
}
- template <class BtreeLayout>
- long long BtreeLogic<BtreeLayout>::fullValidate(OperationContext* txn,
- long long *unusedCount,
- bool strict,
- bool dumpBuckets,
- unsigned depth) const {
- return _fullValidate(txn, getRootLoc(txn), unusedCount, strict, dumpBuckets, depth);
- }
+ long long keyCount = 0;
- template <class BtreeLayout>
- long long BtreeLogic<BtreeLayout>::_fullValidate(OperationContext* txn,
- const DiskLoc bucketLoc,
- long long *unusedCount,
- bool strict,
- bool dumpBuckets,
- unsigned depth) const {
- BucketType* bucket = getBucket(txn, bucketLoc);
- assertValid(_indexName, bucket, _ordering, true);
+ for (int i = 0; i < bucket->n; i++) {
+ KeyHeaderType& kn = getKeyHeader(bucket, i);
- if (dumpBuckets) {
- log() << bucketLoc.toString() << ' ';
- dumpBucket(bucket, depth);
+ if (kn.isUsed()) {
+ keyCount++;
+ } else if (NULL != unusedCount) {
+ ++(*unusedCount);
}
- long long keyCount = 0;
-
- for (int i = 0; i < bucket->n; i++) {
- KeyHeaderType& kn = getKeyHeader(bucket, i);
-
- if (kn.isUsed()) {
- keyCount++;
- }
- else if (NULL != unusedCount) {
- ++(*unusedCount);
- }
-
- if (!kn.prevChildBucket.isNull()) {
- DiskLoc left = kn.prevChildBucket;
- BucketType* b = getBucket(txn, left);
-
- if (strict) {
- invariant(b->parent == bucketLoc);
- }
- else {
- wassert(b->parent == bucketLoc);
- }
-
- keyCount += _fullValidate(txn, left, unusedCount, strict, dumpBuckets, depth + 1);
- }
- }
+ if (!kn.prevChildBucket.isNull()) {
+ DiskLoc left = kn.prevChildBucket;
+ BucketType* b = getBucket(txn, left);
- if (!bucket->nextChild.isNull()) {
- BucketType* b = getBucket(txn, bucket->nextChild);
if (strict) {
invariant(b->parent == bucketLoc);
- }
- else {
+ } else {
wassert(b->parent == bucketLoc);
}
- keyCount += _fullValidate(txn, bucket->nextChild, unusedCount, strict, dumpBuckets, depth + 1);
+ keyCount += _fullValidate(txn, left, unusedCount, strict, dumpBuckets, depth + 1);
+ }
+ }
+
+ if (!bucket->nextChild.isNull()) {
+ BucketType* b = getBucket(txn, bucket->nextChild);
+ if (strict) {
+ invariant(b->parent == bucketLoc);
+ } else {
+ wassert(b->parent == bucketLoc);
}
- return keyCount;
+ keyCount +=
+ _fullValidate(txn, bucket->nextChild, unusedCount, strict, dumpBuckets, depth + 1);
}
- // XXX: remove this(?) used to not dump every key in assertValid.
- int nDumped = 0;
+ return keyCount;
+}
- // static
- template <class BtreeLayout>
- void BtreeLogic<BtreeLayout>::assertValid(const std::string& ns,
- BucketType* bucket,
- const Ordering& ordering,
- bool force) {
- if (!force) {
- return;
- }
+// XXX: remove this(?) used to not dump every key in assertValid.
+int nDumped = 0;
- // this is very slow so don't do often
- {
- static int _k;
- if (++_k % 128) {
- return;
- }
+// static
+template <class BtreeLayout>
+void BtreeLogic<BtreeLayout>::assertValid(const std::string& ns,
+ BucketType* bucket,
+ const Ordering& ordering,
+ bool force) {
+ if (!force) {
+ return;
+ }
+
+ // this is very slow so don't do often
+ {
+ static int _k;
+ if (++_k % 128) {
+ return;
}
+ }
- DEV {
- // slow:
- for (int i = 0; i < bucket->n - 1; i++) {
- FullKey firstKey = getFullKey(bucket, i);
- FullKey secondKey = getFullKey(bucket, i + 1);
- int z = firstKey.data.woCompare(secondKey.data, ordering);
- if (z > 0) {
- log() << "ERROR: btree key order corrupt. Keys:" << endl;
- if (++nDumped < 5) {
- for (int j = 0; j < bucket->n; j++) {
- log() << " " << getFullKey(bucket, j).data.toString() << endl;
- }
- dumpBucket(bucket);
+ DEV {
+ // slow:
+ for (int i = 0; i < bucket->n - 1; i++) {
+ FullKey firstKey = getFullKey(bucket, i);
+ FullKey secondKey = getFullKey(bucket, i + 1);
+ int z = firstKey.data.woCompare(secondKey.data, ordering);
+ if (z > 0) {
+ log() << "ERROR: btree key order corrupt. Keys:" << endl;
+ if (++nDumped < 5) {
+ for (int j = 0; j < bucket->n; j++) {
+ log() << " " << getFullKey(bucket, j).data.toString() << endl;
}
- wassert(false);
- break;
+ dumpBucket(bucket);
}
- else if (z == 0) {
- if (!(firstKey.header.recordLoc < secondKey.header.recordLoc)) {
- log() << "ERROR: btree key order corrupt (recordlocs wrong):" << endl;
- log() << " k(" << i << ")" << firstKey.data.toString()
- << " RL:" << firstKey.header.recordLoc.toString() << endl;
- log() << " k(" << i + 1 << ")" << secondKey.data.toString()
- << " RL:" << secondKey.header.recordLoc.toString() << endl;
- wassert(firstKey.header.recordLoc < secondKey.header.recordLoc);
- }
+ wassert(false);
+ break;
+ } else if (z == 0) {
+ if (!(firstKey.header.recordLoc < secondKey.header.recordLoc)) {
+ log() << "ERROR: btree key order corrupt (recordlocs wrong):" << endl;
+ log() << " k(" << i << ")" << firstKey.data.toString()
+ << " RL:" << firstKey.header.recordLoc.toString() << endl;
+ log() << " k(" << i + 1 << ")" << secondKey.data.toString()
+ << " RL:" << secondKey.header.recordLoc.toString() << endl;
+ wassert(firstKey.header.recordLoc < secondKey.header.recordLoc);
}
}
}
- else {
- //faster:
- if (bucket->n > 1) {
- FullKey k1 = getFullKey(bucket, 0);
- FullKey k2 = getFullKey(bucket, bucket->n - 1);
- int z = k1.data.woCompare(k2.data, ordering);
- //wassert( z <= 0 );
- if (z > 0) {
- log() << "Btree keys out of order in collection " << ns;
- ONCE {
- dumpBucket(bucket);
- }
- invariant(false);
+ }
+ else {
+ // faster:
+ if (bucket->n > 1) {
+ FullKey k1 = getFullKey(bucket, 0);
+ FullKey k2 = getFullKey(bucket, bucket->n - 1);
+ int z = k1.data.woCompare(k2.data, ordering);
+ // wassert( z <= 0 );
+ if (z > 0) {
+ log() << "Btree keys out of order in collection " << ns;
+ ONCE {
+ dumpBucket(bucket);
}
+ invariant(false);
}
}
}
+}
- template <class BtreeLayout>
- Status BtreeLogic<BtreeLayout>::insert(OperationContext* txn,
- const BSONObj& rawKey,
- const DiskLoc& value,
- bool dupsAllowed) {
- KeyDataOwnedType key(rawKey);
+template <class BtreeLayout>
+Status BtreeLogic<BtreeLayout>::insert(OperationContext* txn,
+ const BSONObj& rawKey,
+ const DiskLoc& value,
+ bool dupsAllowed) {
+ KeyDataOwnedType key(rawKey);
- if (key.dataSize() > BtreeLayout::KeyMax) {
- string msg = str::stream() << "Btree::insert: key too large to index, failing "
- << _indexName << ' '
- << key.dataSize() << ' ' << key.toString();
- return Status(ErrorCodes::KeyTooLong, msg);
- }
-
- Status status = _insert(txn,
- getRoot(txn),
- getRootLoc(txn),
- key,
- value,
- dupsAllowed,
- DiskLoc(),
- DiskLoc());
-
- assertValid(_indexName, getRoot(txn), _ordering);
- return status;
+ if (key.dataSize() > BtreeLayout::KeyMax) {
+ string msg = str::stream() << "Btree::insert: key too large to index, failing "
+ << _indexName << ' ' << key.dataSize() << ' ' << key.toString();
+ return Status(ErrorCodes::KeyTooLong, msg);
}
- template <class BtreeLayout>
- Status BtreeLogic<BtreeLayout>::_insert(OperationContext* txn,
- BucketType* bucket,
- const DiskLoc bucketLoc,
- const KeyDataType& key,
- const DiskLoc recordLoc,
- bool dupsAllowed,
- const DiskLoc leftChild,
- const DiskLoc rightChild) {
- invariant( key.dataSize() > 0 );
-
- int pos;
- bool found;
- Status findStatus = _find(txn, bucket, key, recordLoc, !dupsAllowed, &pos, &found);
- if (!findStatus.isOK()) {
- return findStatus;
- }
+ Status status =
+ _insert(txn, getRoot(txn), getRootLoc(txn), key, value, dupsAllowed, DiskLoc(), DiskLoc());
- if (found) {
- KeyHeaderType& header = getKeyHeader(bucket, pos);
- if (header.isUnused()) {
- LOG(4) << "btree _insert: reusing unused key" << endl;
- massert(17433, "_insert: reuse key but lchild is not null", leftChild.isNull());
- massert(17434, "_insert: reuse key but rchild is not null", rightChild.isNull());
- txn->recoveryUnit()->writing(&header)->setUsed();
- return Status::OK();
- }
- // The logic in _find() prohibits finding and returning a position if the 'used' bit
- // in the header is set and dups are disallowed.
- invariant(dupsAllowed);
- return Status(ErrorCodes::DuplicateKeyValue, "key/value already in index");
- }
+ assertValid(_indexName, getRoot(txn), _ordering);
+ return status;
+}
- DiskLoc childLoc = childLocForPos(bucket, pos);
-
- // In current usage, rightChild is NULL for a new key and is not NULL when we are
- // promoting a split key. These are the only two cases where _insert() is called
- // currently.
- if (childLoc.isNull() || !rightChild.isNull()) {
- insertHere(txn, bucketLoc, pos, key, recordLoc, leftChild, rightChild);
+template <class BtreeLayout>
+Status BtreeLogic<BtreeLayout>::_insert(OperationContext* txn,
+ BucketType* bucket,
+ const DiskLoc bucketLoc,
+ const KeyDataType& key,
+ const DiskLoc recordLoc,
+ bool dupsAllowed,
+ const DiskLoc leftChild,
+ const DiskLoc rightChild) {
+ invariant(key.dataSize() > 0);
+
+ int pos;
+ bool found;
+ Status findStatus = _find(txn, bucket, key, recordLoc, !dupsAllowed, &pos, &found);
+ if (!findStatus.isOK()) {
+ return findStatus;
+ }
+
+ if (found) {
+ KeyHeaderType& header = getKeyHeader(bucket, pos);
+ if (header.isUnused()) {
+ LOG(4) << "btree _insert: reusing unused key" << endl;
+ massert(17433, "_insert: reuse key but lchild is not null", leftChild.isNull());
+ massert(17434, "_insert: reuse key but rchild is not null", rightChild.isNull());
+ txn->recoveryUnit()->writing(&header)->setUsed();
return Status::OK();
}
- else {
- return _insert(txn,
- getBucket(txn, childLoc),
- childLoc,
- key,
- recordLoc,
- dupsAllowed,
- DiskLoc(),
- DiskLoc());
- }
+ // The logic in _find() prohibits finding and returning a position if the 'used' bit
+ // in the header is set and dups are disallowed.
+ invariant(dupsAllowed);
+ return Status(ErrorCodes::DuplicateKeyValue, "key/value already in index");
}
- template <class BtreeLayout>
- DiskLoc BtreeLogic<BtreeLayout>::advance(OperationContext* txn,
- const DiskLoc& bucketLoc,
- int* posInOut,
- int direction) const {
- BucketType* bucket = getBucket(txn, bucketLoc);
-
- if (*posInOut < 0 || *posInOut >= bucket->n ) {
- log() << "ASSERT failure advancing btree bucket" << endl;
- log() << " thisLoc: " << bucketLoc.toString() << endl;
- log() << " keyOfs: " << *posInOut << " n:" << bucket->n << " direction: " << direction << endl;
- // log() << bucketSummary() << endl;
- invariant(false);
- }
+ DiskLoc childLoc = childLocForPos(bucket, pos);
- // XXX document
- int adj = direction < 0 ? 1 : 0;
- int ko = *posInOut + direction;
-
- // Look down if we need to.
- DiskLoc nextDownLoc = childLocForPos(bucket, ko + adj);
- BucketType* nextDown = getBucket(txn, nextDownLoc);
- if (NULL != nextDown) {
- for (;;) {
- if (direction > 0) {
- *posInOut = 0;
- }
- else {
- *posInOut = nextDown->n - 1;
- }
- DiskLoc newNextDownLoc = childLocForPos(nextDown, *posInOut + adj);
- BucketType* newNextDownBucket = getBucket(txn, newNextDownLoc);
- if (NULL == newNextDownBucket) {
- break;
- }
- nextDownLoc = newNextDownLoc;
- nextDown = newNextDownBucket;
- }
- return nextDownLoc;
- }
+ // In current usage, rightChild is NULL for a new key and is not NULL when we are
+ // promoting a split key. These are the only two cases where _insert() is called
+ // currently.
+ if (childLoc.isNull() || !rightChild.isNull()) {
+ insertHere(txn, bucketLoc, pos, key, recordLoc, leftChild, rightChild);
+ return Status::OK();
+ } else {
+ return _insert(txn,
+ getBucket(txn, childLoc),
+ childLoc,
+ key,
+ recordLoc,
+ dupsAllowed,
+ DiskLoc(),
+ DiskLoc());
+ }
+}
+
+template <class BtreeLayout>
+DiskLoc BtreeLogic<BtreeLayout>::advance(OperationContext* txn,
+ const DiskLoc& bucketLoc,
+ int* posInOut,
+ int direction) const {
+ BucketType* bucket = getBucket(txn, bucketLoc);
+
+ if (*posInOut < 0 || *posInOut >= bucket->n) {
+ log() << "ASSERT failure advancing btree bucket" << endl;
+ log() << " thisLoc: " << bucketLoc.toString() << endl;
+ log() << " keyOfs: " << *posInOut << " n:" << bucket->n << " direction: " << direction
+ << endl;
+ // log() << bucketSummary() << endl;
+ invariant(false);
+ }
- // Looking down isn't the right choice, move forward.
- if (ko < bucket->n && ko >= 0) {
- *posInOut = ko;
- return bucketLoc;
- }
+ // XXX document
+ int adj = direction < 0 ? 1 : 0;
+ int ko = *posInOut + direction;
- // Hit the end of the bucket, move up and over.
- DiskLoc childLoc = bucketLoc;
- DiskLoc ancestor = getBucket(txn, bucketLoc)->parent;
+ // Look down if we need to.
+ DiskLoc nextDownLoc = childLocForPos(bucket, ko + adj);
+ BucketType* nextDown = getBucket(txn, nextDownLoc);
+ if (NULL != nextDown) {
for (;;) {
- if (ancestor.isNull()) {
- break;
+ if (direction > 0) {
+ *posInOut = 0;
+ } else {
+ *posInOut = nextDown->n - 1;
}
- BucketType* an = getBucket(txn, ancestor);
- for (int i = 0; i < an->n; i++) {
- if (childLocForPos(an, i + adj) == childLoc) {
- *posInOut = i;
- return ancestor;
- }
+ DiskLoc newNextDownLoc = childLocForPos(nextDown, *posInOut + adj);
+ BucketType* newNextDownBucket = getBucket(txn, newNextDownLoc);
+ if (NULL == newNextDownBucket) {
+ break;
}
- invariant(direction < 0 || an->nextChild == childLoc);
- // parent exhausted also, keep going up
- childLoc = ancestor;
- ancestor = an->parent;
+ nextDownLoc = newNextDownLoc;
+ nextDown = newNextDownBucket;
}
+ return nextDownLoc;
+ }
- return DiskLoc();
+ // Looking down isn't the right choice, move forward.
+ if (ko < bucket->n && ko >= 0) {
+ *posInOut = ko;
+ return bucketLoc;
}
- template <class BtreeLayout>
- bool BtreeLogic<BtreeLayout>::keyIsUsed(OperationContext* txn,
- const DiskLoc& loc,
- const int& pos) const {
- return getKeyHeader(getBucket(txn, loc), pos).isUsed();
+ // Hit the end of the bucket, move up and over.
+ DiskLoc childLoc = bucketLoc;
+ DiskLoc ancestor = getBucket(txn, bucketLoc)->parent;
+ for (;;) {
+ if (ancestor.isNull()) {
+ break;
+ }
+ BucketType* an = getBucket(txn, ancestor);
+ for (int i = 0; i < an->n; i++) {
+ if (childLocForPos(an, i + adj) == childLoc) {
+ *posInOut = i;
+ return ancestor;
+ }
+ }
+ invariant(direction < 0 || an->nextChild == childLoc);
+ // parent exhausted also, keep going up
+ childLoc = ancestor;
+ ancestor = an->parent;
}
- template <class BtreeLayout>
- bool BtreeLogic<BtreeLayout>::locate(OperationContext* txn,
- const BSONObj& key,
- const DiskLoc& recordLoc,
- const int direction,
- int* posOut,
- DiskLoc* bucketLocOut) const {
- // Clear out any data.
- *posOut = 0;
- *bucketLocOut = DiskLoc();
+ return DiskLoc();
+}
- bool found = false;
- KeyDataOwnedType owned(key);
+template <class BtreeLayout>
+bool BtreeLogic<BtreeLayout>::keyIsUsed(OperationContext* txn,
+ const DiskLoc& loc,
+ const int& pos) const {
+ return getKeyHeader(getBucket(txn, loc), pos).isUsed();
+}
- *bucketLocOut = _locate(txn, getRootLoc(txn), owned, posOut, &found, recordLoc, direction);
+template <class BtreeLayout>
+bool BtreeLogic<BtreeLayout>::locate(OperationContext* txn,
+ const BSONObj& key,
+ const DiskLoc& recordLoc,
+ const int direction,
+ int* posOut,
+ DiskLoc* bucketLocOut) const {
+ // Clear out any data.
+ *posOut = 0;
+ *bucketLocOut = DiskLoc();
- if (!found) {
- return false;
- }
+ bool found = false;
+ KeyDataOwnedType owned(key);
- skipUnusedKeys(txn, bucketLocOut, posOut, direction);
+ *bucketLocOut = _locate(txn, getRootLoc(txn), owned, posOut, &found, recordLoc, direction);
- return found;
+ if (!found) {
+ return false;
}
- /**
- * Recursively walk down the btree, looking for a match of key and recordLoc.
- * Caller should have acquired lock on bucketLoc.
- */
- template <class BtreeLayout>
- DiskLoc BtreeLogic<BtreeLayout>::_locate(OperationContext* txn,
- const DiskLoc& bucketLoc,
- const KeyDataType& key,
- int* posOut,
- bool* foundOut,
- const DiskLoc& recordLoc,
- const int direction) const {
- int position;
- BucketType* bucket = getBucket(txn, bucketLoc);
- // XXX: owned to not owned conversion(?)
- _find(txn, bucket, key, recordLoc, false, &position, foundOut);
-
- // Look in our current bucket.
- if (*foundOut) {
- *posOut = position;
- return bucketLoc;
- }
+ skipUnusedKeys(txn, bucketLocOut, posOut, direction);
- // Not in our current bucket. 'position' tells us where there may be a child.
- DiskLoc childLoc = childLocForPos(bucket, position);
+ return found;
+}
- if (!childLoc.isNull()) {
- DiskLoc inChild = _locate(txn, childLoc, key, posOut, foundOut, recordLoc, direction);
- if (!inChild.isNull()) {
- return inChild;
- }
+/**
+ * Recursively walk down the btree, looking for a match of key and recordLoc.
+ * Caller should have acquired lock on bucketLoc.
+ */
+template <class BtreeLayout>
+DiskLoc BtreeLogic<BtreeLayout>::_locate(OperationContext* txn,
+ const DiskLoc& bucketLoc,
+ const KeyDataType& key,
+ int* posOut,
+ bool* foundOut,
+ const DiskLoc& recordLoc,
+ const int direction) const {
+ int position;
+ BucketType* bucket = getBucket(txn, bucketLoc);
+ // XXX: owned to not owned conversion(?)
+ _find(txn, bucket, key, recordLoc, false, &position, foundOut);
+
+ // Look in our current bucket.
+ if (*foundOut) {
+ *posOut = position;
+ return bucketLoc;
+ }
+
+ // Not in our current bucket. 'position' tells us where there may be a child.
+ DiskLoc childLoc = childLocForPos(bucket, position);
+
+ if (!childLoc.isNull()) {
+ DiskLoc inChild = _locate(txn, childLoc, key, posOut, foundOut, recordLoc, direction);
+ if (!inChild.isNull()) {
+ return inChild;
}
+ }
- *posOut = position;
+ *posOut = position;
- if (direction < 0) {
- // The key *would* go to our left.
- (*posOut)--;
- if (-1 == *posOut) {
- // But there's no space for that in our bucket.
- return DiskLoc();
- }
- else {
- return bucketLoc;
- }
+ if (direction < 0) {
+ // The key *would* go to our left.
+ (*posOut)--;
+ if (-1 == *posOut) {
+ // But there's no space for that in our bucket.
+ return DiskLoc();
+ } else {
+ return bucketLoc;
}
- else {
- // The key would go to our right...
- if (bucket->n == *posOut) {
- return DiskLoc();
- }
- else {
- // But only if there is space.
- return bucketLoc;
- }
+ } else {
+ // The key would go to our right...
+ if (bucket->n == *posOut) {
+ return DiskLoc();
+ } else {
+ // But only if there is space.
+ return bucketLoc;
}
}
+}
- // TODO relcoate
- template <class BtreeLayout>
- bool BtreeLogic<BtreeLayout>::isHead(BucketType* bucket) {
- return bucket->parent.isNull();
- }
+// TODO relcoate
+template <class BtreeLayout>
+bool BtreeLogic<BtreeLayout>::isHead(BucketType* bucket) {
+ return bucket->parent.isNull();
+}
- template <class BtreeLayout>
- typename BtreeLogic<BtreeLayout>::BucketType*
- BtreeLogic<BtreeLayout>::getBucket(OperationContext* txn, const RecordId id) const {
- if (id.isNull()) {
- return NULL;
- }
+template <class BtreeLayout>
+typename BtreeLogic<BtreeLayout>::BucketType* BtreeLogic<BtreeLayout>::getBucket(
+ OperationContext* txn, const RecordId id) const {
+ if (id.isNull()) {
+ return NULL;
+ }
- RecordData recordData = _recordStore->dataFor(txn, id);
+ RecordData recordData = _recordStore->dataFor(txn, id);
- // we need to be working on the raw bytes, not a transient copy
- invariant(!recordData.isOwned());
+ // we need to be working on the raw bytes, not a transient copy
+ invariant(!recordData.isOwned());
- return reinterpret_cast<BucketType*>(const_cast<char*>(recordData.data()));
- }
+ return reinterpret_cast<BucketType*>(const_cast<char*>(recordData.data()));
+}
- template <class BtreeLayout>
- typename BtreeLogic<BtreeLayout>::BucketType*
- BtreeLogic<BtreeLayout>::getRoot(OperationContext* txn) const {
- return getBucket(txn, _headManager->getHead(txn));
- }
+template <class BtreeLayout>
+typename BtreeLogic<BtreeLayout>::BucketType* BtreeLogic<BtreeLayout>::getRoot(
+ OperationContext* txn) const {
+ return getBucket(txn, _headManager->getHead(txn));
+}
- template <class BtreeLayout>
- DiskLoc
- BtreeLogic<BtreeLayout>::getRootLoc(OperationContext* txn) const {
- return DiskLoc::fromRecordId(_headManager->getHead(txn));
- }
+template <class BtreeLayout>
+DiskLoc BtreeLogic<BtreeLayout>::getRootLoc(OperationContext* txn) const {
+ return DiskLoc::fromRecordId(_headManager->getHead(txn));
+}
- template <class BtreeLayout>
- typename BtreeLogic<BtreeLayout>::BucketType*
- BtreeLogic<BtreeLayout>::childForPos(OperationContext* txn, BucketType* bucket, int pos) const {
- DiskLoc loc = childLocForPos(bucket, pos);
- return getBucket(txn, loc);
- }
+template <class BtreeLayout>
+typename BtreeLogic<BtreeLayout>::BucketType* BtreeLogic<BtreeLayout>::childForPos(
+ OperationContext* txn, BucketType* bucket, int pos) const {
+ DiskLoc loc = childLocForPos(bucket, pos);
+ return getBucket(txn, loc);
+}
- template <class BtreeLayout>
- typename BtreeLogic<BtreeLayout>::LocType&
- BtreeLogic<BtreeLayout>::childLocForPos(BucketType* bucket, int pos) {
- if (bucket->n == pos) {
- return bucket->nextChild;
- }
- else {
- return getKeyHeader(bucket, pos).prevChildBucket;
- }
+template <class BtreeLayout>
+typename BtreeLogic<BtreeLayout>::LocType& BtreeLogic<BtreeLayout>::childLocForPos(
+ BucketType* bucket, int pos) {
+ if (bucket->n == pos) {
+ return bucket->nextChild;
+ } else {
+ return getKeyHeader(bucket, pos).prevChildBucket;
}
+}
- //
- // And, template stuff.
- //
+//
+// And, template stuff.
+//
- // V0 format.
- template struct FixedWidthKey<DiskLoc>;
- template class BtreeLogic<BtreeLayoutV0>;
+// V0 format.
+template struct FixedWidthKey<DiskLoc>;
+template class BtreeLogic<BtreeLayoutV0>;
- // V1 format.
- template struct FixedWidthKey<DiskLoc56Bit>;
- template class BtreeLogic<BtreeLayoutV1>;
+// V1 format.
+template struct FixedWidthKey<DiskLoc56Bit>;
+template class BtreeLogic<BtreeLayoutV1>;
} // namespace mongo
diff --git a/src/mongo/db/storage/mmap_v1/btree/btree_logic.h b/src/mongo/db/storage/mmap_v1/btree/btree_logic.h
index 48a307f3b4d..3c742170bcd 100644
--- a/src/mongo/db/storage/mmap_v1/btree/btree_logic.h
+++ b/src/mongo/db/storage/mmap_v1/btree/btree_logic.h
@@ -41,539 +41,522 @@
namespace mongo {
- class RecordStore;
- class SavedCursorRegistry;
+class RecordStore;
+class SavedCursorRegistry;
- // Used for unit-testing only
- template <class BtreeLayout> class BtreeLogicTestBase;
- template <class BtreeLayout> class ArtificialTreeBuilder;
-
- /**
- * This is the logic for manipulating the Btree. It is (mostly) independent of the on-disk
- * format.
- */
- template <class BtreeLayout>
- class BtreeLogic {
- public:
- // AKA _keyNode
- typedef typename BtreeLayout::FixedWidthKeyType KeyHeaderType;
-
- // AKA Key
- typedef typename BtreeLayout::KeyType KeyDataType;
+// Used for unit-testing only
+template <class BtreeLayout>
+class BtreeLogicTestBase;
+template <class BtreeLayout>
+class ArtificialTreeBuilder;
- // AKA KeyOwned
- typedef typename BtreeLayout::KeyOwnedType KeyDataOwnedType;
+/**
+ * This is the logic for manipulating the Btree. It is (mostly) independent of the on-disk
+ * format.
+ */
+template <class BtreeLayout>
+class BtreeLogic {
+public:
+ // AKA _keyNode
+ typedef typename BtreeLayout::FixedWidthKeyType KeyHeaderType;
- // AKA Loc
- typedef typename BtreeLayout::LocType LocType;
+ // AKA Key
+ typedef typename BtreeLayout::KeyType KeyDataType;
- // AKA BucketBasics or BtreeBucket, either one.
- typedef typename BtreeLayout::BucketType BucketType;
+ // AKA KeyOwned
+ typedef typename BtreeLayout::KeyOwnedType KeyDataOwnedType;
- /**
- * 'head' manages the catalog information.
- * 'store' allocates and frees buckets.
- * 'ordering' is meta-information we store in the catalog.
- * 'indexName' is a string identifying the index that we use to print errors with.
- */
- BtreeLogic(HeadManager* head,
- RecordStore* store,
- SavedCursorRegistry* cursors,
- const Ordering& ordering,
- const std::string& indexName)
- : _headManager(head),
- _recordStore(store),
- _cursorRegistry(cursors),
- _ordering(ordering),
- _indexName(indexName) {
- }
+ // AKA Loc
+ typedef typename BtreeLayout::LocType LocType;
- //
- // Public-facing
- //
+ // AKA BucketBasics or BtreeBucket, either one.
+ typedef typename BtreeLayout::BucketType BucketType;
- class Builder {
- public:
- typedef typename BtreeLayout::KeyOwnedType KeyDataOwnedType;
- typedef typename BtreeLayout::KeyType KeyDataType;
+ /**
+ * 'head' manages the catalog information.
+ * 'store' allocates and frees buckets.
+ * 'ordering' is meta-information we store in the catalog.
+ * 'indexName' is a string identifying the index that we use to print errors with.
+ */
+ BtreeLogic(HeadManager* head,
+ RecordStore* store,
+ SavedCursorRegistry* cursors,
+ const Ordering& ordering,
+ const std::string& indexName)
+ : _headManager(head),
+ _recordStore(store),
+ _cursorRegistry(cursors),
+ _ordering(ordering),
+ _indexName(indexName) {}
+
+ //
+ // Public-facing
+ //
+
+ class Builder {
+ public:
+ typedef typename BtreeLayout::KeyOwnedType KeyDataOwnedType;
+ typedef typename BtreeLayout::KeyType KeyDataType;
- Status addKey(const BSONObj& key, const DiskLoc& loc);
+ Status addKey(const BSONObj& key, const DiskLoc& loc);
- private:
- friend class BtreeLogic;
+ private:
+ friend class BtreeLogic;
- class SetRightLeafLocChange;
+ class SetRightLeafLocChange;
- Builder(BtreeLogic* logic, OperationContext* txn, bool dupsAllowed);
+ Builder(BtreeLogic* logic, OperationContext* txn, bool dupsAllowed);
- /**
- * Creates and returns a new empty bucket to the right of leftSib, maintaining the
- * internal consistency of the tree. leftSib must be the right-most child of its parent
- * or it must be the root.
- */
- DiskLoc newBucket(BucketType* leftSib, DiskLoc leftSibLoc);
+ /**
+ * Creates and returns a new empty bucket to the right of leftSib, maintaining the
+ * internal consistency of the tree. leftSib must be the right-most child of its parent
+ * or it must be the root.
+ */
+ DiskLoc newBucket(BucketType* leftSib, DiskLoc leftSibLoc);
- BucketType* _getModifiableBucket(DiskLoc loc);
- BucketType* _getBucket(DiskLoc loc);
+ BucketType* _getModifiableBucket(DiskLoc loc);
+ BucketType* _getBucket(DiskLoc loc);
- // Not owned.
- BtreeLogic* _logic;
+ // Not owned.
+ BtreeLogic* _logic;
- DiskLoc _rightLeafLoc; // DiskLoc of right-most (highest) leaf bucket.
- bool _dupsAllowed;
- std::unique_ptr<KeyDataOwnedType> _keyLast;
+ DiskLoc _rightLeafLoc; // DiskLoc of right-most (highest) leaf bucket.
+ bool _dupsAllowed;
+ std::unique_ptr<KeyDataOwnedType> _keyLast;
- // Not owned.
- OperationContext* _txn;
- };
+ // Not owned.
+ OperationContext* _txn;
+ };
- /**
- * Caller owns the returned pointer.
- * 'this' must outlive the returned pointer.
- */
- Builder* newBuilder(OperationContext* txn, bool dupsAllowed);
+ /**
+ * Caller owns the returned pointer.
+ * 'this' must outlive the returned pointer.
+ */
+ Builder* newBuilder(OperationContext* txn, bool dupsAllowed);
- Status dupKeyCheck(OperationContext* txn,
- const BSONObj& key,
- const DiskLoc& loc) const;
+ Status dupKeyCheck(OperationContext* txn, const BSONObj& key, const DiskLoc& loc) const;
- Status insert(OperationContext* txn,
- const BSONObj& rawKey,
- const DiskLoc& value,
- bool dupsAllowed);
+ Status insert(OperationContext* txn,
+ const BSONObj& rawKey,
+ const DiskLoc& value,
+ bool dupsAllowed);
- /**
- * Navigates down the tree and locates the bucket and position containing a record with
- * the specified <key, recordLoc> combination.
- *
- * @return true if the exact <key, recordLoc> was found. Otherwise, false and the
- * bucketLocOut would contain the bucket containing key which is before or after the
- * searched one (dependent on the direction).
- */
- bool locate(OperationContext* txn,
- const BSONObj& key,
- const DiskLoc& recordLoc,
- const int direction,
- int* posOut,
- DiskLoc* bucketLocOut) const;
+ /**
+ * Navigates down the tree and locates the bucket and position containing a record with
+ * the specified <key, recordLoc> combination.
+ *
+ * @return true if the exact <key, recordLoc> was found. Otherwise, false and the
+ * bucketLocOut would contain the bucket containing key which is before or after the
+ * searched one (dependent on the direction).
+ */
+ bool locate(OperationContext* txn,
+ const BSONObj& key,
+ const DiskLoc& recordLoc,
+ const int direction,
+ int* posOut,
+ DiskLoc* bucketLocOut) const;
- void advance(OperationContext* txn,
- DiskLoc* bucketLocInOut,
- int* posInOut,
- int direction) const;
+ void advance(OperationContext* txn,
+ DiskLoc* bucketLocInOut,
+ int* posInOut,
+ int direction) const;
- bool exists(OperationContext* txn, const KeyDataType& key) const;
+ bool exists(OperationContext* txn, const KeyDataType& key) const;
- bool unindex(OperationContext* txn,
- const BSONObj& key,
- const DiskLoc& recordLoc);
+ bool unindex(OperationContext* txn, const BSONObj& key, const DiskLoc& recordLoc);
- bool isEmpty(OperationContext* txn) const;
+ bool isEmpty(OperationContext* txn) const;
- long long fullValidate(OperationContext*,
- long long *unusedCount,
- bool strict,
- bool dumpBuckets,
- unsigned depth) const;
+ long long fullValidate(OperationContext*,
+ long long* unusedCount,
+ bool strict,
+ bool dumpBuckets,
+ unsigned depth) const;
- DiskLoc getDiskLoc(OperationContext* txn,
- const DiskLoc& bucketLoc,
- const int keyOffset) const;
+ DiskLoc getDiskLoc(OperationContext* txn, const DiskLoc& bucketLoc, const int keyOffset) const;
- BSONObj getKey(OperationContext* txn,
- const DiskLoc& bucketLoc,
- const int keyOffset) const;
+ BSONObj getKey(OperationContext* txn, const DiskLoc& bucketLoc, const int keyOffset) const;
- DiskLoc getHead(OperationContext* txn) const {
- return DiskLoc::fromRecordId(_headManager->getHead(txn));
- }
+ DiskLoc getHead(OperationContext* txn) const {
+ return DiskLoc::fromRecordId(_headManager->getHead(txn));
+ }
- Status touch(OperationContext* txn) const;
+ Status touch(OperationContext* txn) const;
- //
- // Composite key navigation methods
- //
+ //
+ // Composite key navigation methods
+ //
- void customLocate(OperationContext* txn,
- DiskLoc* locInOut,
- int* keyOfsInOut,
- const IndexSeekPoint& seekPoint,
- int direction) const;
+ void customLocate(OperationContext* txn,
+ DiskLoc* locInOut,
+ int* keyOfsInOut,
+ const IndexSeekPoint& seekPoint,
+ int direction) const;
- void advanceTo(OperationContext*,
- DiskLoc* thisLocInOut,
- int* keyOfsInOut,
- const IndexSeekPoint& seekPoint,
- int direction) const;
+ void advanceTo(OperationContext*,
+ DiskLoc* thisLocInOut,
+ int* keyOfsInOut,
+ const IndexSeekPoint& seekPoint,
+ int direction) const;
- void restorePosition(OperationContext* txn,
- const BSONObj& savedKey,
- const DiskLoc& savedLoc,
- int direction,
- DiskLoc* bucketInOut,
- int* keyOffsetInOut) const;
+ void restorePosition(OperationContext* txn,
+ const BSONObj& savedKey,
+ const DiskLoc& savedLoc,
+ int direction,
+ DiskLoc* bucketInOut,
+ int* keyOffsetInOut) const;
- //
- // Creation and deletion
- //
+ //
+ // Creation and deletion
+ //
- /**
- * Returns OK if the index was uninitialized before, error status otherwise.
- */
- Status initAsEmpty(OperationContext* txn);
+ /**
+ * Returns OK if the index was uninitialized before, error status otherwise.
+ */
+ Status initAsEmpty(OperationContext* txn);
- //
- // Size constants
- //
+ //
+ // Size constants
+ //
- const RecordStore* getRecordStore() const { return _recordStore; }
+ const RecordStore* getRecordStore() const {
+ return _recordStore;
+ }
- SavedCursorRegistry* savedCursors() const { return _cursorRegistry; }
+ SavedCursorRegistry* savedCursors() const {
+ return _cursorRegistry;
+ }
- static int lowWaterMark();
-
- Ordering ordering() const { return _ordering; }
+ static int lowWaterMark();
- int customBSONCmp(const BSONObj& inIndex_left,
- const IndexSeekPoint& seekPoint_right,
- int direction) const;
+ Ordering ordering() const {
+ return _ordering;
+ }
- private:
- friend class BtreeLogic::Builder;
+ int customBSONCmp(const BSONObj& inIndex_left,
+ const IndexSeekPoint& seekPoint_right,
+ int direction) const;
- // Used for unit-testing only
- friend class BtreeLogicTestBase<BtreeLayout>;
- friend class ArtificialTreeBuilder<BtreeLayout>;
+private:
+ friend class BtreeLogic::Builder;
- /**
- * This is an in memory wrapper for the variable length data associated with a
- * KeyHeaderType. It points to on-disk data but is not itself on-disk data.
- *
- * This object and its BSONObj 'key' will become invalid if the KeyHeaderType data that owns
- * this it is moved within the btree. In general, a KeyWrapper should not be expected to be
- * valid after a write.
- */
- struct FullKey {
- FullKey(const BucketType* bucket, int i)
- : header(getKeyHeader(bucket, i)),
- prevChildBucket(header.prevChildBucket),
- recordLoc(header.recordLoc),
- data(bucket->data + header.keyDataOfs()) { }
+ // Used for unit-testing only
+ friend class BtreeLogicTestBase<BtreeLayout>;
+ friend class ArtificialTreeBuilder<BtreeLayout>;
- // This is actually a reference to something on-disk.
- const KeyHeaderType& header;
+ /**
+ * This is an in memory wrapper for the variable length data associated with a
+ * KeyHeaderType. It points to on-disk data but is not itself on-disk data.
+ *
+ * This object and its BSONObj 'key' will become invalid if the KeyHeaderType data that owns
+ * this it is moved within the btree. In general, a KeyWrapper should not be expected to be
+ * valid after a write.
+ */
+ struct FullKey {
+ FullKey(const BucketType* bucket, int i)
+ : header(getKeyHeader(bucket, i)),
+ prevChildBucket(header.prevChildBucket),
+ recordLoc(header.recordLoc),
+ data(bucket->data + header.keyDataOfs()) {}
+
+ // This is actually a reference to something on-disk.
+ const KeyHeaderType& header;
+
+ // These are actually in 'header'.
+ const LocType& prevChildBucket;
+ const LocType& recordLoc;
+
+ // This is *not* memory-mapped but its members point to something on-disk.
+ KeyDataType data;
+ };
- // These are actually in 'header'.
- const LocType& prevChildBucket;
- const LocType& recordLoc;
+ //
+ // Functions that depend on the templated type info but nothing in 'this'.
+ //
- // This is *not* memory-mapped but its members point to something on-disk.
- KeyDataType data;
- };
+ static LocType& childLocForPos(BucketType* bucket, int pos);
- //
- // Functions that depend on the templated type info but nothing in 'this'.
- //
+ static FullKey getFullKey(const BucketType* bucket, int i);
- static LocType& childLocForPos(BucketType* bucket, int pos);
+ static KeyHeaderType& getKeyHeader(BucketType* bucket, int i);
- static FullKey getFullKey(const BucketType* bucket, int i);
+ static const KeyHeaderType& getKeyHeader(const BucketType* bucket, int i);
- static KeyHeaderType& getKeyHeader(BucketType* bucket, int i);
+ static char* dataAt(BucketType* bucket, short ofs);
- static const KeyHeaderType& getKeyHeader(const BucketType* bucket, int i);
+ static void markUnused(BucketType* bucket, int keypos);
- static char* dataAt(BucketType* bucket, short ofs);
+ static int totalDataSize(BucketType* bucket);
- static void markUnused(BucketType* bucket, int keypos);
+ static void init(BucketType* bucket);
- static int totalDataSize(BucketType* bucket);
+ static int _alloc(BucketType* bucket, int bytes);
- static void init(BucketType* bucket);
+ static void _unalloc(BucketType* bucket, int bytes);
- static int _alloc(BucketType* bucket, int bytes);
+ static void _delKeyAtPos(BucketType* bucket, int keypos, bool mayEmpty = false);
- static void _unalloc(BucketType* bucket, int bytes);
+ static void popBack(BucketType* bucket, DiskLoc* recordLocOut, KeyDataType* keyDataOut);
- static void _delKeyAtPos(BucketType* bucket, int keypos, bool mayEmpty = false);
+ static bool mayDropKey(BucketType* bucket, int index, int refPos);
- static void popBack(BucketType* bucket, DiskLoc* recordLocOut, KeyDataType *keyDataOut);
+ static int _packedDataSize(BucketType* bucket, int refPos);
- static bool mayDropKey(BucketType* bucket, int index, int refPos);
+ static void setPacked(BucketType* bucket);
- static int _packedDataSize(BucketType* bucket, int refPos);
+ static void setNotPacked(BucketType* bucket);
- static void setPacked(BucketType* bucket);
+ static BucketType* btreemod(OperationContext* txn, BucketType* bucket);
- static void setNotPacked(BucketType* bucket);
+ static int splitPos(BucketType* bucket, int keypos);
- static BucketType* btreemod(OperationContext* txn, BucketType* bucket);
+ static void reserveKeysFront(BucketType* bucket, int nAdd);
- static int splitPos(BucketType* bucket, int keypos);
+ static void setKey(BucketType* bucket,
+ int i,
+ const DiskLoc recordLoc,
+ const KeyDataType& key,
+ const DiskLoc prevChildBucket);
- static void reserveKeysFront(BucketType* bucket, int nAdd);
+ static bool isHead(BucketType* bucket);
- static void setKey(BucketType* bucket,
- int i,
- const DiskLoc recordLoc,
- const KeyDataType &key,
- const DiskLoc prevChildBucket);
+ static void dumpBucket(const BucketType* bucket, int indentLength = 0);
- static bool isHead(BucketType* bucket);
+ static void assertValid(const std::string& ns,
+ BucketType* bucket,
+ const Ordering& ordering,
+ bool force = false);
- static void dumpBucket(const BucketType* bucket, int indentLength = 0);
+ //
+ // 'this'-specific helpers (require record store, catalog information, or ordering, or type
+ // information).
+ //
- static void assertValid(const std::string& ns,
- BucketType* bucket,
- const Ordering& ordering,
- bool force = false);
+ bool basicInsert(OperationContext* txn,
+ BucketType* bucket,
+ const DiskLoc bucketLoc,
+ int& keypos,
+ const KeyDataType& key,
+ const DiskLoc recordLoc);
+
+ void dropFront(BucketType* bucket, int nDrop, int& refpos);
+
+ void _pack(OperationContext* txn, BucketType* bucket, const DiskLoc thisLoc, int& refPos);
+
+ void customLocate(OperationContext* txn,
+ DiskLoc* locInOut,
+ int* keyOfsInOut,
+ const IndexSeekPoint& seekPoint,
+ int direction,
+ std::pair<DiskLoc, int>& bestParent) const;
+
+ Status _find(OperationContext* txn,
+ BucketType* bucket,
+ const KeyDataType& key,
+ const DiskLoc& recordLoc,
+ bool errorIfDup,
+ int* keyPositionOut,
+ bool* foundOut) const;
+
+ bool customFind(OperationContext* txn,
+ int low,
+ int high,
+ const IndexSeekPoint& seekPoint,
+ int direction,
+ DiskLoc* thisLocInOut,
+ int* keyOfsInOut,
+ std::pair<DiskLoc, int>& bestParent) const;
+
+ void advanceToImpl(OperationContext* txn,
+ DiskLoc* thisLocInOut,
+ int* keyOfsInOut,
+ const IndexSeekPoint& seekPoint,
+ int direction) const;
- //
- // 'this'-specific helpers (require record store, catalog information, or ordering, or type
- // information).
- //
+ bool wouldCreateDup(OperationContext* txn, const KeyDataType& key, const DiskLoc self) const;
- bool basicInsert(OperationContext* txn,
- BucketType* bucket,
- const DiskLoc bucketLoc,
- int& keypos,
- const KeyDataType& key,
- const DiskLoc recordLoc);
+ bool keyIsUsed(OperationContext* txn, const DiskLoc& loc, const int& pos) const;
- void dropFront(BucketType* bucket, int nDrop, int& refpos);
+ void skipUnusedKeys(OperationContext* txn, DiskLoc* loc, int* pos, int direction) const;
- void _pack(OperationContext* txn, BucketType* bucket, const DiskLoc thisLoc, int &refPos);
+ DiskLoc advance(OperationContext* txn,
+ const DiskLoc& bucketLoc,
+ int* posInOut,
+ int direction) const;
- void customLocate(OperationContext* txn,
- DiskLoc* locInOut,
- int* keyOfsInOut,
- const IndexSeekPoint& seekPoint,
- int direction,
- std::pair<DiskLoc, int>& bestParent) const;
+ DiskLoc _locate(OperationContext* txn,
+ const DiskLoc& bucketLoc,
+ const KeyDataType& key,
+ int* posOut,
+ bool* foundOut,
+ const DiskLoc& recordLoc,
+ const int direction) const;
- Status _find(OperationContext* txn,
- BucketType* bucket,
- const KeyDataType& key,
- const DiskLoc& recordLoc,
- bool errorIfDup,
- int* keyPositionOut,
- bool* foundOut) const;
-
- bool customFind(OperationContext* txn,
- int low,
- int high,
- const IndexSeekPoint& seekPoint,
- int direction,
- DiskLoc* thisLocInOut,
- int* keyOfsInOut,
- std::pair<DiskLoc, int>& bestParent) const;
-
- void advanceToImpl(OperationContext* txn,
- DiskLoc* thisLocInOut,
- int* keyOfsInOut,
- const IndexSeekPoint& seekPoint,
- int direction) const;
-
- bool wouldCreateDup(OperationContext* txn,
- const KeyDataType& key,
- const DiskLoc self) const;
-
- bool keyIsUsed(OperationContext* txn, const DiskLoc& loc, const int& pos) const;
-
- void skipUnusedKeys(OperationContext* txn,
- DiskLoc* loc,
- int* pos,
- int direction) const;
-
- DiskLoc advance(OperationContext* txn,
- const DiskLoc& bucketLoc,
- int* posInOut,
- int direction) const;
-
- DiskLoc _locate(OperationContext* txn,
- const DiskLoc& bucketLoc,
- const KeyDataType& key,
- int* posOut,
- bool* foundOut,
- const DiskLoc& recordLoc,
- const int direction) const;
+ long long _fullValidate(OperationContext* txn,
+ const DiskLoc bucketLoc,
+ long long* unusedCount,
+ bool strict,
+ bool dumpBuckets,
+ unsigned depth) const;
- long long _fullValidate(OperationContext* txn,
- const DiskLoc bucketLoc,
- long long *unusedCount,
- bool strict,
- bool dumpBuckets,
- unsigned depth) const ;
+ DiskLoc _addBucket(OperationContext* txn);
- DiskLoc _addBucket(OperationContext* txn);
+ bool canMergeChildren(OperationContext* txn,
+ BucketType* bucket,
+ const DiskLoc bucketLoc,
+ const int leftIndex);
- bool canMergeChildren(OperationContext* txn,
- BucketType* bucket,
- const DiskLoc bucketLoc,
- const int leftIndex);
+ // has to look in children of 'bucket' and requires record store
+ int _rebalancedSeparatorPos(OperationContext* txn, BucketType* bucket, int leftIndex);
- // has to look in children of 'bucket' and requires record store
- int _rebalancedSeparatorPos(OperationContext* txn,
- BucketType* bucket,
- int leftIndex);
+ void _packReadyForMod(BucketType* bucket, int& refPos);
- void _packReadyForMod(BucketType* bucket, int &refPos);
+ void truncateTo(BucketType* bucket, int N, int& refPos);
- void truncateTo(BucketType* bucket, int N, int &refPos);
+ void split(OperationContext* txn,
+ BucketType* bucket,
+ const DiskLoc bucketLoc,
+ int keypos,
+ const DiskLoc recordLoc,
+ const KeyDataType& key,
+ const DiskLoc lchild,
+ const DiskLoc rchild);
- void split(OperationContext* txn,
+ Status _insert(OperationContext* txn,
BucketType* bucket,
const DiskLoc bucketLoc,
- int keypos,
- const DiskLoc recordLoc,
const KeyDataType& key,
- const DiskLoc lchild,
- const DiskLoc rchild);
-
- Status _insert(OperationContext* txn,
- BucketType* bucket,
- const DiskLoc bucketLoc,
- const KeyDataType& key,
- const DiskLoc recordLoc,
- bool dupsAllowed,
- const DiskLoc leftChild,
- const DiskLoc rightChild);
-
- // TODO take a BucketType*?
- void insertHere(OperationContext* txn,
+ const DiskLoc recordLoc,
+ bool dupsAllowed,
+ const DiskLoc leftChild,
+ const DiskLoc rightChild);
+
+ // TODO take a BucketType*?
+ void insertHere(OperationContext* txn,
+ const DiskLoc bucketLoc,
+ int pos,
+ const KeyDataType& key,
+ const DiskLoc recordLoc,
+ const DiskLoc leftChild,
+ const DiskLoc rightChild);
+
+ std::string dupKeyError(const KeyDataType& key) const;
+
+ void setInternalKey(OperationContext* txn,
+ BucketType* bucket,
const DiskLoc bucketLoc,
- int pos,
- const KeyDataType& key,
+ int keypos,
const DiskLoc recordLoc,
- const DiskLoc leftChild,
- const DiskLoc rightChild);
+ const KeyDataType& key,
+ const DiskLoc lchild,
+ const DiskLoc rchild);
- std::string dupKeyError(const KeyDataType& key) const;
+ void fixParentPtrs(OperationContext* trans,
+ BucketType* bucket,
+ const DiskLoc bucketLoc,
+ int firstIndex = 0,
+ int lastIndex = -1);
- void setInternalKey(OperationContext* txn,
- BucketType* bucket,
- const DiskLoc bucketLoc,
- int keypos,
- const DiskLoc recordLoc,
- const KeyDataType& key,
- const DiskLoc lchild,
- const DiskLoc rchild);
+ bool mayBalanceWithNeighbors(OperationContext* txn,
+ BucketType* bucket,
+ const DiskLoc bucketLoc);
- void fixParentPtrs(OperationContext* trans,
+ void doBalanceChildren(OperationContext* txn,
BucketType* bucket,
const DiskLoc bucketLoc,
- int firstIndex = 0,
- int lastIndex = -1);
-
- bool mayBalanceWithNeighbors(OperationContext* txn, BucketType* bucket, const DiskLoc bucketLoc);
-
- void doBalanceChildren(OperationContext* txn,
- BucketType* bucket,
- const DiskLoc bucketLoc,
- int leftIndex);
-
- void doBalanceLeftToRight(OperationContext* txn,
- BucketType* bucket,
- const DiskLoc thisLoc,
- int leftIndex,
- int split,
- BucketType* l,
- const DiskLoc lchild,
- BucketType* r,
- const DiskLoc rchild);
-
- void doBalanceRightToLeft(OperationContext* txn,
- BucketType* bucket,
- const DiskLoc thisLoc,
- int leftIndex,
- int split,
- BucketType* l,
- const DiskLoc lchild,
- BucketType* r,
- const DiskLoc rchild);
-
- bool tryBalanceChildren(OperationContext* txn,
- BucketType* bucket,
- const DiskLoc bucketLoc,
- int leftIndex);
-
- int indexInParent(OperationContext* txn,
- BucketType* bucket,
- const DiskLoc bucketLoc) const;
-
- void doMergeChildren(OperationContext* txn,
- BucketType* bucket,
- const DiskLoc bucketLoc,
- int leftIndex);
+ int leftIndex);
- void replaceWithNextChild(OperationContext* txn,
- BucketType* bucket,
- const DiskLoc bucketLoc);
+ void doBalanceLeftToRight(OperationContext* txn,
+ BucketType* bucket,
+ const DiskLoc thisLoc,
+ int leftIndex,
+ int split,
+ BucketType* l,
+ const DiskLoc lchild,
+ BucketType* r,
+ const DiskLoc rchild);
+
+ void doBalanceRightToLeft(OperationContext* txn,
+ BucketType* bucket,
+ const DiskLoc thisLoc,
+ int leftIndex,
+ int split,
+ BucketType* l,
+ const DiskLoc lchild,
+ BucketType* r,
+ const DiskLoc rchild);
+
+ bool tryBalanceChildren(OperationContext* txn,
+ BucketType* bucket,
+ const DiskLoc bucketLoc,
+ int leftIndex);
- void deleteInternalKey(OperationContext* txn,
- BucketType* bucket,
- const DiskLoc bucketLoc,
- int keypos);
+ int indexInParent(OperationContext* txn, BucketType* bucket, const DiskLoc bucketLoc) const;
- void delKeyAtPos(OperationContext* txn,
+ void doMergeChildren(OperationContext* txn,
BucketType* bucket,
const DiskLoc bucketLoc,
- int p);
+ int leftIndex);
- void delBucket(OperationContext* txn,
- BucketType* bucket,
- const DiskLoc bucketLoc);
+ void replaceWithNextChild(OperationContext* txn, BucketType* bucket, const DiskLoc bucketLoc);
- void deallocBucket(OperationContext* txn,
+ void deleteInternalKey(OperationContext* txn,
BucketType* bucket,
- const DiskLoc bucketLoc);
+ const DiskLoc bucketLoc,
+ int keypos);
- bool _keyIsAt(const BSONObj& savedKey,
- const DiskLoc& savedLoc,
- BucketType* bucket,
- int keyPos) const;
+ void delKeyAtPos(OperationContext* txn, BucketType* bucket, const DiskLoc bucketLoc, int p);
- /**
- * Tries to push key into bucket. Return false if it can't because key doesn't fit.
- *
- * bucket must be declared as writable by the caller.
- * The new key/recordLoc pair must be higher than any others in bucket.
- *
- * TODO needs 'this' for _ordering for sanity check
- */
- bool pushBack(BucketType* bucket,
- const DiskLoc recordLoc,
- const KeyDataType& key,
- const DiskLoc prevChild);
+ void delBucket(OperationContext* txn, BucketType* bucket, const DiskLoc bucketLoc);
+ void deallocBucket(OperationContext* txn, BucketType* bucket, const DiskLoc bucketLoc);
- BucketType* childForPos(OperationContext* txn, BucketType* bucket, int pos) const;
+ bool _keyIsAt(const BSONObj& savedKey,
+ const DiskLoc& savedLoc,
+ BucketType* bucket,
+ int keyPos) const;
- BucketType* getBucket(OperationContext* txn, const DiskLoc dl) const {
- return getBucket(txn, dl.toRecordId());
- }
- BucketType* getBucket(OperationContext* txn, const RecordId dl) const;
+ /**
+ * Tries to push key into bucket. Return false if it can't because key doesn't fit.
+ *
+ * bucket must be declared as writable by the caller.
+ * The new key/recordLoc pair must be higher than any others in bucket.
+ *
+ * TODO needs 'this' for _ordering for sanity check
+ */
+ bool pushBack(BucketType* bucket,
+ const DiskLoc recordLoc,
+ const KeyDataType& key,
+ const DiskLoc prevChild);
- BucketType* getRoot(OperationContext* txn) const;
- DiskLoc getRootLoc(OperationContext* txn) const;
+ BucketType* childForPos(OperationContext* txn, BucketType* bucket, int pos) const;
- //
- // Data
- //
+ BucketType* getBucket(OperationContext* txn, const DiskLoc dl) const {
+ return getBucket(txn, dl.toRecordId());
+ }
+ BucketType* getBucket(OperationContext* txn, const RecordId dl) const;
- // Not owned here.
- HeadManager* _headManager;
+ BucketType* getRoot(OperationContext* txn) const;
- // Not owned here.
- RecordStore* _recordStore;
+ DiskLoc getRootLoc(OperationContext* txn) const;
- // Not owned Here.
- SavedCursorRegistry* _cursorRegistry;
+ //
+ // Data
+ //
- Ordering _ordering;
+ // Not owned here.
+ HeadManager* _headManager;
- std::string _indexName;
- };
+ // Not owned here.
+ RecordStore* _recordStore;
+
+ // Not owned Here.
+ SavedCursorRegistry* _cursorRegistry;
+
+ Ordering _ordering;
+
+ std::string _indexName;
+};
} // namespace mongo
diff --git a/src/mongo/db/storage/mmap_v1/btree/btree_logic_test.cpp b/src/mongo/db/storage/mmap_v1/btree/btree_logic_test.cpp
index 1c0bd1c1505..b4e42196c99 100644
--- a/src/mongo/db/storage/mmap_v1/btree/btree_logic_test.cpp
+++ b/src/mongo/db/storage/mmap_v1/btree/btree_logic_test.cpp
@@ -43,2070 +43,2244 @@
namespace mongo {
- using std::string;
+using std::string;
+
+/**
+ * This class is made friend of BtreeLogic so we can add whatever private method accesses we
+ * need to it, to be used by the tests.
+ */
+template <class BtreeLayoutType>
+class BtreeLogicTestBase {
+public:
+ typedef typename BtreeLayoutType::BucketType BucketType;
+ typedef typename BtreeLayoutType::FixedWidthKeyType FixedWidthKeyType;
+
+ typedef typename BtreeLogic<BtreeLayoutType>::FullKey FullKey;
+ typedef typename BtreeLogic<BtreeLayoutType>::KeyDataOwnedType KeyDataOwnedType;
+
+ BtreeLogicTestBase() : _helper(BSON("TheKey" << 1)) {}
+
+ virtual ~BtreeLogicTestBase() {}
+
+protected:
+ void checkValidNumKeys(int nKeys) {
+ OperationContextNoop txn;
+ ASSERT_EQUALS(nKeys, _helper.btree.fullValidate(&txn, NULL, true, false, 0));
+ }
+
+ Status insert(const BSONObj& key, const DiskLoc dl, bool dupsAllowed = true) {
+ OperationContextNoop txn;
+ return _helper.btree.insert(&txn, key, dl, dupsAllowed);
+ }
+
+ bool unindex(const BSONObj& key) {
+ OperationContextNoop txn;
+ return _helper.btree.unindex(&txn, key, _helper.dummyDiskLoc);
+ }
+
+ void locate(const BSONObj& key,
+ int expectedPos,
+ bool expectedFound,
+ const RecordId& expectedLocation,
+ int direction) {
+ return locate(
+ key, expectedPos, expectedFound, DiskLoc::fromRecordId(expectedLocation), direction);
+ }
+ void locate(const BSONObj& key,
+ int expectedPos,
+ bool expectedFound,
+ const DiskLoc& expectedLocation,
+ int direction) {
+ int pos;
+ DiskLoc loc;
+ OperationContextNoop txn;
+ ASSERT_EQUALS(expectedFound,
+ _helper.btree.locate(&txn, key, _helper.dummyDiskLoc, direction, &pos, &loc));
+ ASSERT_EQUALS(expectedLocation, loc);
+ ASSERT_EQUALS(expectedPos, pos);
+ }
+
+ const BucketType* child(const BucketType* bucket, int i) const {
+ verify(i <= bucket->n);
+
+ DiskLoc diskLoc;
+ if (i == bucket->n) {
+ diskLoc = bucket->nextChild;
+ } else {
+ FullKey fullKey = BtreeLogic<BtreeLayoutType>::getFullKey(bucket, i);
+ diskLoc = fullKey.prevChildBucket;
+ }
+
+ verify(!diskLoc.isNull());
+
+ return _helper.btree.getBucket(NULL, diskLoc);
+ }
+
+ BucketType* head() const {
+ OperationContextNoop txn;
+ return _helper.btree.getBucket(&txn, _helper.headManager.getHead(&txn));
+ }
+
+ void forcePackBucket(const RecordId bucketLoc) {
+ BucketType* bucket = _helper.btree.getBucket(NULL, bucketLoc);
+
+ bucket->topSize += bucket->emptySize;
+ bucket->emptySize = 0;
+ BtreeLogic<BtreeLayoutType>::setNotPacked(bucket);
+ }
+
+ void truncateBucket(BucketType* bucket, int N, int& refPos) {
+ _helper.btree.truncateTo(bucket, N, refPos);
+ }
+
+ int bucketPackedDataSize(BucketType* bucket, int refPos) {
+ return _helper.btree._packedDataSize(bucket, refPos);
+ }
+
+ int bucketRebalancedSeparatorPos(const RecordId bucketLoc, int leftIndex) {
+ BucketType* bucket = _helper.btree.getBucket(NULL, bucketLoc);
+ OperationContextNoop txn;
+ return _helper.btree._rebalancedSeparatorPos(&txn, bucket, leftIndex);
+ }
+
+ FullKey getKey(const RecordId bucketLoc, int pos) const {
+ const BucketType* bucket = _helper.btree.getBucket(NULL, bucketLoc);
+ return BtreeLogic<BtreeLayoutType>::getFullKey(bucket, pos);
+ }
+
+ void markKeyUnused(const DiskLoc bucketLoc, int keyPos) {
+ BucketType* bucket = _helper.btree.getBucket(NULL, bucketLoc);
+ invariant(keyPos >= 0 && keyPos < bucket->n);
+
+ _helper.btree.getKeyHeader(bucket, keyPos).setUnused();
+ }
+
+ DiskLoc newBucket() {
+ OperationContextNoop txn;
+ return _helper.btree._addBucket(&txn);
+ }
/**
- * This class is made friend of BtreeLogic so we can add whatever private method accesses we
- * need to it, to be used by the tests.
+ * Sets the nextChild pointer for the bucket at the specified location.
*/
- template<class BtreeLayoutType>
- class BtreeLogicTestBase {
- public:
- typedef typename BtreeLayoutType::BucketType BucketType;
- typedef typename BtreeLayoutType::FixedWidthKeyType FixedWidthKeyType;
+ void setBucketNextChild(const DiskLoc bucketLoc, const DiskLoc nextChild) {
+ OperationContextNoop txn;
- typedef typename BtreeLogic<BtreeLayoutType>::FullKey FullKey;
- typedef typename BtreeLogic<BtreeLayoutType>::KeyDataOwnedType KeyDataOwnedType;
+ BucketType* bucket = _helper.btree.getBucket(&txn, bucketLoc);
+ bucket->nextChild = nextChild;
- BtreeLogicTestBase() : _helper(BSON("TheKey" << 1)) {
+ _helper.btree.fixParentPtrs(&txn, bucket, bucketLoc);
+ }
- }
+protected:
+ BtreeLogicTestHelper<BtreeLayoutType> _helper;
+};
- virtual ~BtreeLogicTestBase() {
+//
+// TESTS
+//
- }
+template <class OnDiskFormat>
+class SimpleCreate : public BtreeLogicTestBase<OnDiskFormat> {
+public:
+ void run() {
+ OperationContextNoop txn;
+ this->_helper.btree.initAsEmpty(&txn);
+
+ this->checkValidNumKeys(0);
+ }
+};
+
+template <class OnDiskFormat>
+class SimpleInsertDelete : public BtreeLogicTestBase<OnDiskFormat> {
+public:
+ void run() {
+ OperationContextNoop txn;
+ this->_helper.btree.initAsEmpty(&txn);
+
+ BSONObj key = simpleKey('z');
+ this->insert(key, this->_helper.dummyDiskLoc);
+
+ this->checkValidNumKeys(1);
+ this->locate(key, 0, true, this->_helper.headManager.getHead(&txn), 1);
+
+ this->unindex(key);
+
+ this->checkValidNumKeys(0);
+ this->locate(key, 0, false, DiskLoc(), 1);
+ }
+};
+
+template <class OnDiskFormat>
+class SplitUnevenBucketBase : public BtreeLogicTestBase<OnDiskFormat> {
+public:
+ void run() {
+ OperationContextNoop txn;
+ this->_helper.btree.initAsEmpty(&txn);
+
+ for (int i = 0; i < 10; ++i) {
+ BSONObj shortKey = simpleKey(shortToken(i), 1);
+ this->insert(shortKey, this->_helper.dummyDiskLoc);
+
+ BSONObj longKey = simpleKey(longToken(i), 800);
+ this->insert(longKey, this->_helper.dummyDiskLoc);
+ }
+
+ this->checkValidNumKeys(20);
+ ASSERT_EQUALS(1, this->head()->n);
+ checkSplit();
+ }
+
+protected:
+ virtual char shortToken(int i) const = 0;
+ virtual char longToken(int i) const = 0;
+ virtual void checkSplit() = 0;
+
+ static char leftToken(int i) {
+ return 'a' + i;
+ }
+
+ static char rightToken(int i) {
+ return 'z' - i;
+ }
+};
+
+template <class OnDiskFormat>
+class SplitRightHeavyBucket : public SplitUnevenBucketBase<OnDiskFormat> {
+private:
+ virtual char shortToken(int i) const {
+ return this->leftToken(i);
+ }
+ virtual char longToken(int i) const {
+ return this->rightToken(i);
+ }
+ virtual void checkSplit() {
+ ASSERT_EQUALS(15, this->child(this->head(), 0)->n);
+ ASSERT_EQUALS(4, this->child(this->head(), 1)->n);
+ }
+};
+
+template <class OnDiskFormat>
+class SplitLeftHeavyBucket : public SplitUnevenBucketBase<OnDiskFormat> {
+private:
+ virtual char shortToken(int i) const {
+ return this->rightToken(i);
+ }
+ virtual char longToken(int i) const {
+ return this->leftToken(i);
+ }
+ virtual void checkSplit() {
+ ASSERT_EQUALS(4, this->child(this->head(), 0)->n);
+ ASSERT_EQUALS(15, this->child(this->head(), 1)->n);
+ }
+};
+
+template <class OnDiskFormat>
+class MissingLocate : public BtreeLogicTestBase<OnDiskFormat> {
+public:
+ void run() {
+ OperationContextNoop txn;
+ this->_helper.btree.initAsEmpty(&txn);
+
+ for (int i = 0; i < 3; ++i) {
+ BSONObj k = simpleKey('b' + 2 * i);
+ this->insert(k, this->_helper.dummyDiskLoc);
+ }
+
+ locateExtended(1, 'a', 'b', this->_helper.headManager.getHead(&txn));
+ locateExtended(1, 'c', 'd', this->_helper.headManager.getHead(&txn));
+ locateExtended(1, 'e', 'f', this->_helper.headManager.getHead(&txn));
+ locateExtended(1, 'g', 'g' + 1, RecordId()); // of course, 'h' isn't in the index.
+
+ // old behavior
+ // locateExtended( -1, 'a', 'b', dl() );
+ // locateExtended( -1, 'c', 'd', dl() );
+ // locateExtended( -1, 'e', 'f', dl() );
+ // locateExtended( -1, 'g', 'f', dl() );
+
+ locateExtended(-1, 'a', 'a' - 1, RecordId()); // of course, 'a' - 1 isn't in the index
+ locateExtended(-1, 'c', 'b', this->_helper.headManager.getHead(&txn));
+ locateExtended(-1, 'e', 'd', this->_helper.headManager.getHead(&txn));
+ locateExtended(-1, 'g', 'f', this->_helper.headManager.getHead(&txn));
+ }
+
+private:
+ void locateExtended(int direction, char token, char expectedMatch, RecordId expectedLocation) {
+ const BSONObj k = simpleKey(token);
+ int expectedPos = (expectedMatch - 'b') / 2;
+
+ this->locate(k, expectedPos, false, expectedLocation, direction);
+ }
+};
+
+template <class OnDiskFormat>
+class MissingLocateMultiBucket : public BtreeLogicTestBase<OnDiskFormat> {
+public:
+ void run() {
+ OperationContextNoop txn;
+ this->_helper.btree.initAsEmpty(&txn);
+
+ this->insert(simpleKey('A', 800), this->_helper.dummyDiskLoc);
+ this->insert(simpleKey('B', 800), this->_helper.dummyDiskLoc);
+ this->insert(simpleKey('C', 800), this->_helper.dummyDiskLoc);
+ this->insert(simpleKey('D', 800), this->_helper.dummyDiskLoc);
+ this->insert(simpleKey('E', 800), this->_helper.dummyDiskLoc);
+ this->insert(simpleKey('F', 800), this->_helper.dummyDiskLoc);
+ this->insert(simpleKey('G', 800), this->_helper.dummyDiskLoc);
+ this->insert(simpleKey('H', 800), this->_helper.dummyDiskLoc);
+ this->insert(simpleKey('J', 800), this->_helper.dummyDiskLoc);
+
+ // This causes split
+ this->insert(simpleKey('I', 800), this->_helper.dummyDiskLoc);
+
+ int pos;
+ DiskLoc loc;
+
+ // 'E' is the split point and should be in the head the rest should be ~50/50
+ const BSONObj splitPoint = simpleKey('E', 800);
+ this->_helper.btree.locate(&txn, splitPoint, this->_helper.dummyDiskLoc, 1, &pos, &loc);
+ ASSERT_EQUALS(this->_helper.headManager.getHead(&txn), loc.toRecordId());
+ ASSERT_EQUALS(0, pos);
+
+ // Find the one before 'E'
+ int largePos;
+ DiskLoc largeLoc;
+ this->_helper.btree.locate(
+ &txn, splitPoint, this->_helper.dummyDiskLoc, 1, &largePos, &largeLoc);
+ this->_helper.btree.advance(&txn, &largeLoc, &largePos, -1);
+
+ // Find the one after 'E'
+ int smallPos;
+ DiskLoc smallLoc;
+ this->_helper.btree.locate(
+ &txn, splitPoint, this->_helper.dummyDiskLoc, 1, &smallPos, &smallLoc);
+ this->_helper.btree.advance(&txn, &smallLoc, &smallPos, 1);
+
+ ASSERT_NOT_EQUALS(smallLoc, largeLoc);
+ ASSERT_NOT_EQUALS(smallLoc, loc);
+ ASSERT_NOT_EQUALS(largeLoc, loc);
+ }
+};
- protected:
- void checkValidNumKeys(int nKeys) {
- OperationContextNoop txn;
- ASSERT_EQUALS(nKeys, _helper.btree.fullValidate(&txn, NULL, true, false, 0));
- }
+/**
+ * Validates that adding keys incrementally produces buckets, which are 90%/10% full.
+ */
+template <class OnDiskFormat>
+class SERVER983 : public BtreeLogicTestBase<OnDiskFormat> {
+public:
+ void run() {
+ OperationContextNoop txn;
+ this->_helper.btree.initAsEmpty(&txn);
+
+ this->insert(simpleKey('A', 800), this->_helper.dummyDiskLoc);
+ this->insert(simpleKey('B', 800), this->_helper.dummyDiskLoc);
+ this->insert(simpleKey('C', 800), this->_helper.dummyDiskLoc);
+ this->insert(simpleKey('D', 800), this->_helper.dummyDiskLoc);
+ this->insert(simpleKey('E', 800), this->_helper.dummyDiskLoc);
+ this->insert(simpleKey('F', 800), this->_helper.dummyDiskLoc);
+ this->insert(simpleKey('G', 800), this->_helper.dummyDiskLoc);
+ this->insert(simpleKey('H', 800), this->_helper.dummyDiskLoc);
+ this->insert(simpleKey('I', 800), this->_helper.dummyDiskLoc);
+
+ // This will cause split
+ this->insert(simpleKey('J', 800), this->_helper.dummyDiskLoc);
+
+ int pos;
+ DiskLoc loc;
+
+ // 'H' is the maximum 'large' interval key, 90% should be < 'H' and 10% larger
+ const BSONObj splitPoint = simpleKey('H', 800);
+ this->_helper.btree.locate(&txn, splitPoint, this->_helper.dummyDiskLoc, 1, &pos, &loc);
+ ASSERT_EQUALS(this->_helper.headManager.getHead(&txn), loc.toRecordId());
+ ASSERT_EQUALS(0, pos);
+
+ // Find the one before 'H'
+ int largePos;
+ DiskLoc largeLoc;
+ this->_helper.btree.locate(
+ &txn, splitPoint, this->_helper.dummyDiskLoc, 1, &largePos, &largeLoc);
+ this->_helper.btree.advance(&txn, &largeLoc, &largePos, -1);
+
+ // Find the one after 'H'
+ int smallPos;
+ DiskLoc smallLoc;
+ this->_helper.btree.locate(
+ &txn, splitPoint, this->_helper.dummyDiskLoc, 1, &smallPos, &smallLoc);
+ this->_helper.btree.advance(&txn, &smallLoc, &smallPos, 1);
+
+ ASSERT_NOT_EQUALS(smallLoc, largeLoc);
+ ASSERT_NOT_EQUALS(smallLoc, loc);
+ ASSERT_NOT_EQUALS(largeLoc, loc);
+ }
+};
+
+template <class OnDiskFormat>
+class DontReuseUnused : public BtreeLogicTestBase<OnDiskFormat> {
+public:
+ void run() {
+ OperationContextNoop txn;
+ this->_helper.btree.initAsEmpty(&txn);
+
+ for (int i = 0; i < 10; ++i) {
+ const BSONObj k = simpleKey('b' + 2 * i, 800);
+ this->insert(k, this->_helper.dummyDiskLoc);
+ }
+
+ const BSONObj root = simpleKey('p', 800);
+ this->unindex(root);
+
+ this->insert(root, this->_helper.dummyDiskLoc);
+ this->locate(root, 0, true, this->head()->nextChild, 1);
+ }
+};
+
+template <class OnDiskFormat>
+class MergeBucketsTestBase : public BtreeLogicTestBase<OnDiskFormat> {
+public:
+ void run() {
+ OperationContextNoop txn;
+ this->_helper.btree.initAsEmpty(&txn);
+
+ for (int i = 0; i < 10; ++i) {
+ const BSONObj k = simpleKey('b' + 2 * i, 800);
+ this->insert(k, this->_helper.dummyDiskLoc);
+ }
+
+ // numRecords() - 1, because this->_helper.dummyDiskLoc is actually in the record store too
+ ASSERT_EQUALS(3, this->_helper.recordStore.numRecords(NULL) - 1);
+
+ long long expectedCount = 10 - unindexKeys();
+ ASSERT_EQUALS(1, this->_helper.recordStore.numRecords(NULL) - 1);
+
+ long long unusedCount = 0;
+ ASSERT_EQUALS(expectedCount,
+ this->_helper.btree.fullValidate(&txn, &unusedCount, true, false, 0));
+ ASSERT_EQUALS(0, unusedCount);
+ }
+
+protected:
+ virtual int unindexKeys() = 0;
+};
+
+template <class OnDiskFormat>
+class MergeBucketsLeft : public MergeBucketsTestBase<OnDiskFormat> {
+ virtual int unindexKeys() {
+ BSONObj k = simpleKey('b', 800);
+ this->unindex(k);
+
+ k = simpleKey('b' + 2, 800);
+ this->unindex(k);
+
+ k = simpleKey('b' + 4, 800);
+ this->unindex(k);
+
+ k = simpleKey('b' + 6, 800);
+ this->unindex(k);
+
+ return 4;
+ }
+};
+
+template <class OnDiskFormat>
+class MergeBucketsRight : public MergeBucketsTestBase<OnDiskFormat> {
+ virtual int unindexKeys() {
+ const BSONObj k = simpleKey('b' + 2 * 9, 800);
+ this->unindex(k);
+ return 1;
+ }
+};
+
+template <class OnDiskFormat>
+class MergeBucketsDontReplaceHead : public BtreeLogicTestBase<OnDiskFormat> {
+public:
+ void run() {
+ OperationContextNoop txn;
+ this->_helper.btree.initAsEmpty(&txn);
+
+ for (int i = 0; i < 18; ++i) {
+ const BSONObj k = simpleKey('a' + i, 800);
+ this->insert(k, this->_helper.dummyDiskLoc);
+ }
+
+ // numRecords(NULL) - 1, because fixedDiskLoc is actually in the record store too
+ ASSERT_EQUALS(4, this->_helper.recordStore.numRecords(NULL) - 1);
+
+ const BSONObj k = simpleKey('a' + 17, 800);
+ this->unindex(k);
+ ASSERT_EQUALS(3, this->_helper.recordStore.numRecords(NULL) - 1);
+
+ long long unusedCount = 0;
+ ASSERT_EQUALS(17, this->_helper.btree.fullValidate(&txn, &unusedCount, true, false, 0));
+ ASSERT_EQUALS(0, unusedCount);
+ }
+};
- Status insert(const BSONObj &key, const DiskLoc dl, bool dupsAllowed = true) {
- OperationContextNoop txn;
- return _helper.btree.insert(&txn, key, dl, dupsAllowed);
- }
+template <class OnDiskFormat>
+class MergeBucketsDelInternal : public BtreeLogicTestBase<OnDiskFormat> {
+public:
+ void run() {
+ OperationContextNoop txn;
+ ArtificialTreeBuilder<OnDiskFormat> builder(&txn, &this->_helper);
- bool unindex(const BSONObj &key) {
- OperationContextNoop txn;
- return _helper.btree.unindex(&txn, key, _helper.dummyDiskLoc);
- }
+ builder.makeTree("{d:{b:{a:null},bb:null,_:{c:null}},_:{f:{e:null},_:{g:null}}}");
+ ASSERT_EQUALS(8, this->_helper.btree.fullValidate(&txn, NULL, true, false, 0));
- void locate(const BSONObj &key,
- int expectedPos,
- bool expectedFound,
- const RecordId &expectedLocation,
- int direction) {
- return locate(key, expectedPos, expectedFound, DiskLoc::fromRecordId(expectedLocation),
- direction);
- }
- void locate(const BSONObj &key,
- int expectedPos,
- bool expectedFound,
- const DiskLoc &expectedLocation,
- int direction) {
- int pos;
- DiskLoc loc;
- OperationContextNoop txn;
- ASSERT_EQUALS(expectedFound,
- _helper.btree.locate(&txn, key, _helper.dummyDiskLoc, direction, &pos, &loc));
- ASSERT_EQUALS(expectedLocation, loc);
- ASSERT_EQUALS(expectedPos, pos);
- }
+ // The tree has 7 buckets + 1 for the this->_helper.dummyDiskLoc
+ ASSERT_EQUALS(8, this->_helper.recordStore.numRecords(NULL));
- const BucketType* child(const BucketType* bucket, int i) const {
- verify(i <= bucket->n);
+ const BSONObj k = BSON(""
+ << "bb");
+ verify(this->unindex(k));
- DiskLoc diskLoc;
- if (i == bucket->n) {
- diskLoc = bucket->nextChild;
- }
- else {
- FullKey fullKey = BtreeLogic<BtreeLayoutType>::getFullKey(bucket, i);
- diskLoc = fullKey.prevChildBucket;
- }
+ ASSERT_EQUALS(7, this->_helper.btree.fullValidate(&txn, NULL, true, false, 0));
- verify(!diskLoc.isNull());
+ // The tree has 5 buckets + 1 for the this->_helper.dummyDiskLoc
+ ASSERT_EQUALS(6, this->_helper.recordStore.numRecords(NULL));
- return _helper.btree.getBucket(NULL, diskLoc);
- }
+ builder.checkStructure("{b:{a:null},d:{c:null},f:{e:null},_:{g:null}}");
+ }
+};
- BucketType* head() const {
- OperationContextNoop txn;
- return _helper.btree.getBucket(&txn, _helper.headManager.getHead(&txn));
- }
+template <class OnDiskFormat>
+class MergeBucketsRightNull : public BtreeLogicTestBase<OnDiskFormat> {
+public:
+ void run() {
+ OperationContextNoop txn;
+ ArtificialTreeBuilder<OnDiskFormat> builder(&txn, &this->_helper);
- void forcePackBucket(const RecordId bucketLoc) {
- BucketType* bucket = _helper.btree.getBucket(NULL, bucketLoc);
+ builder.makeTree("{d:{b:{a:null},bb:null,cc:{c:null}},_:{f:{e:null},h:{g:null}}}");
+ ASSERT_EQUALS(10, this->_helper.btree.fullValidate(&txn, NULL, true, false, 0));
- bucket->topSize += bucket->emptySize;
- bucket->emptySize = 0;
- BtreeLogic<BtreeLayoutType>::setNotPacked(bucket);
- }
+ // The tree has 7 buckets + 1 for the this->_helper.dummyDiskLoc
+ ASSERT_EQUALS(8, this->_helper.recordStore.numRecords(NULL));
- void truncateBucket(BucketType* bucket, int N, int &refPos) {
- _helper.btree.truncateTo(bucket, N, refPos);
- }
+ const BSONObj k = BSON(""
+ << "bb");
+ verify(this->unindex(k));
- int bucketPackedDataSize(BucketType* bucket, int refPos) {
- return _helper.btree._packedDataSize(bucket, refPos);
- }
+ ASSERT_EQUALS(9, this->_helper.btree.fullValidate(&txn, NULL, true, false, 0));
- int bucketRebalancedSeparatorPos(const RecordId bucketLoc, int leftIndex) {
- BucketType* bucket = _helper.btree.getBucket(NULL, bucketLoc);
- OperationContextNoop txn;
- return _helper.btree._rebalancedSeparatorPos(&txn, bucket, leftIndex);
- }
+ // The tree has 5 buckets + 1 for the this->_helper.dummyDiskLoc
+ ASSERT_EQUALS(6, this->_helper.recordStore.numRecords(NULL));
- FullKey getKey(const RecordId bucketLoc, int pos) const {
- const BucketType* bucket = _helper.btree.getBucket(NULL, bucketLoc);
- return BtreeLogic<BtreeLayoutType>::getFullKey(bucket, pos);
- }
+ builder.checkStructure("{b:{a:null},cc:{c:null},d:null,f:{e:null},h:{g:null}}");
+ }
+};
- void markKeyUnused(const DiskLoc bucketLoc, int keyPos) {
- BucketType* bucket = _helper.btree.getBucket(NULL, bucketLoc);
- invariant(keyPos >= 0 && keyPos < bucket->n);
+// This comment was here during porting, not sure what it means:
+//
+// "Not yet handling this case"
+template <class OnDiskFormat>
+class DontMergeSingleBucket : public BtreeLogicTestBase<OnDiskFormat> {
+public:
+ void run() {
+ OperationContextNoop txn;
+ ArtificialTreeBuilder<OnDiskFormat> builder(&txn, &this->_helper);
- _helper.btree.getKeyHeader(bucket, keyPos).setUnused();
- }
+ builder.makeTree("{d:{b:{a:null},c:null}}");
- DiskLoc newBucket() {
- OperationContextNoop txn;
- return _helper.btree._addBucket(&txn);
- }
+ ASSERT_EQUALS(4, this->_helper.btree.fullValidate(&txn, NULL, true, false, 0));
- /**
- * Sets the nextChild pointer for the bucket at the specified location.
- */
- void setBucketNextChild(const DiskLoc bucketLoc, const DiskLoc nextChild) {
- OperationContextNoop txn;
+ // The tree has 3 buckets + 1 for the this->_helper.dummyDiskLoc
+ ASSERT_EQUALS(4, this->_helper.recordStore.numRecords(NULL));
- BucketType* bucket = _helper.btree.getBucket(&txn, bucketLoc);
- bucket->nextChild = nextChild;
+ const BSONObj k = BSON(""
+ << "c");
+ verify(this->unindex(k));
- _helper.btree.fixParentPtrs(&txn, bucket, bucketLoc);
- }
+ ASSERT_EQUALS(3, this->_helper.btree.fullValidate(&txn, NULL, true, false, 0));
- protected:
- BtreeLogicTestHelper<BtreeLayoutType> _helper;
- };
+ // The tree has 3 buckets + 1 for the this->_helper.dummyDiskLoc
+ ASSERT_EQUALS(4, this->_helper.recordStore.numRecords(NULL));
- //
- // TESTS
- //
+ builder.checkStructure("{d:{b:{a:null}}}");
+ }
+};
- template<class OnDiskFormat>
- class SimpleCreate : public BtreeLogicTestBase<OnDiskFormat> {
- public:
- void run() {
- OperationContextNoop txn;
- this->_helper.btree.initAsEmpty(&txn);
+template <class OnDiskFormat>
+class ParentMergeNonRightToLeft : public BtreeLogicTestBase<OnDiskFormat> {
+public:
+ void run() {
+ OperationContextNoop txn;
+ ArtificialTreeBuilder<OnDiskFormat> builder(&txn, &this->_helper);
- this->checkValidNumKeys(0);
- }
- };
+ builder.makeTree("{d:{b:{a:null},bb:null,cc:{c:null}},i:{f:{e:null},h:{g:null}}}");
- template<class OnDiskFormat>
- class SimpleInsertDelete : public BtreeLogicTestBase<OnDiskFormat> {
- public:
- void run() {
- OperationContextNoop txn;
- this->_helper.btree.initAsEmpty(&txn);
+ ASSERT_EQUALS(11, this->_helper.btree.fullValidate(&txn, NULL, true, false, 0));
- BSONObj key = simpleKey('z');
- this->insert(key, this->_helper.dummyDiskLoc);
+ // The tree has 7 buckets + 1 for the this->_helper.dummyDiskLoc
+ ASSERT_EQUALS(8, this->_helper.recordStore.numRecords(NULL));
- this->checkValidNumKeys(1);
- this->locate(key, 0, true, this->_helper.headManager.getHead(&txn), 1);
+ const BSONObj k = BSON(""
+ << "bb");
+ verify(this->unindex(k));
- this->unindex(key);
+ ASSERT_EQUALS(10, this->_helper.btree.fullValidate(&txn, NULL, true, false, 0));
- this->checkValidNumKeys(0);
- this->locate(key, 0, false, DiskLoc(), 1);
- }
- };
+ // Child does not currently replace parent in this case. Also, the tree
+ // has 6 buckets + 1 for the this->_helper.dummyDiskLoc.
+ ASSERT_EQUALS(7, this->_helper.recordStore.numRecords(NULL));
- template<class OnDiskFormat>
- class SplitUnevenBucketBase : public BtreeLogicTestBase<OnDiskFormat> {
- public:
- void run() {
- OperationContextNoop txn;
- this->_helper.btree.initAsEmpty(&txn);
+ builder.checkStructure("{i:{b:{a:null},cc:{c:null},d:null,f:{e:null},h:{g:null}}}");
+ }
+};
- for (int i = 0; i < 10; ++i) {
- BSONObj shortKey = simpleKey(shortToken(i), 1);
- this->insert(shortKey, this->_helper.dummyDiskLoc);
+template <class OnDiskFormat>
+class ParentMergeNonRightToRight : public BtreeLogicTestBase<OnDiskFormat> {
+public:
+ void run() {
+ OperationContextNoop txn;
+ ArtificialTreeBuilder<OnDiskFormat> builder(&txn, &this->_helper);
- BSONObj longKey = simpleKey(longToken(i), 800);
- this->insert(longKey, this->_helper.dummyDiskLoc);
- }
+ builder.makeTree("{d:{b:{a:null},cc:{c:null}},i:{f:{e:null},ff:null,h:{g:null}}}");
- this->checkValidNumKeys(20);
- ASSERT_EQUALS(1, this->head()->n);
- checkSplit();
- }
+ ASSERT_EQUALS(11, this->_helper.btree.fullValidate(&txn, NULL, true, false, 0));
- protected:
- virtual char shortToken(int i) const = 0;
- virtual char longToken(int i) const = 0;
- virtual void checkSplit() = 0;
+ // The tree has 7 buckets + 1 for the this->_helper.dummyDiskLoc
+ ASSERT_EQUALS(8, this->_helper.recordStore.numRecords(NULL));
- static char leftToken(int i) {
- return 'a' + i;
- }
+ const BSONObj k = BSON(""
+ << "ff");
+ verify(this->unindex(k));
- static char rightToken(int i) {
- return 'z' - i;
- }
- };
+ ASSERT_EQUALS(10, this->_helper.btree.fullValidate(&txn, NULL, true, false, 0));
- template<class OnDiskFormat>
- class SplitRightHeavyBucket : public SplitUnevenBucketBase<OnDiskFormat> {
- private:
- virtual char shortToken(int i) const {
- return this->leftToken(i);
- }
- virtual char longToken(int i) const {
- return this->rightToken(i);
- }
- virtual void checkSplit() {
- ASSERT_EQUALS(15, this->child(this->head(), 0)->n);
- ASSERT_EQUALS(4, this->child(this->head(), 1)->n);
- }
- };
+ // Child does not currently replace parent in this case. Also, the tree
+ // has 6 buckets + 1 for the this->_helper.dummyDiskLoc.
+ ASSERT_EQUALS(7, this->_helper.recordStore.numRecords(NULL));
- template<class OnDiskFormat>
- class SplitLeftHeavyBucket : public SplitUnevenBucketBase<OnDiskFormat> {
- private:
- virtual char shortToken(int i) const {
- return this->rightToken(i);
- }
- virtual char longToken(int i) const {
- return this->leftToken(i);
- }
- virtual void checkSplit() {
- ASSERT_EQUALS(4, this->child(this->head(), 0)->n);
- ASSERT_EQUALS(15, this->child(this->head(), 1)->n);
- }
- };
+ builder.checkStructure("{i:{b:{a:null},cc:{c:null},d:null,f:{e:null},h:{g:null}}}");
+ }
+};
- template<class OnDiskFormat>
- class MissingLocate : public BtreeLogicTestBase<OnDiskFormat> {
- public:
- void run() {
- OperationContextNoop txn;
- this->_helper.btree.initAsEmpty(&txn);
+template <class OnDiskFormat>
+class CantMergeRightNoMerge : public BtreeLogicTestBase<OnDiskFormat> {
+public:
+ void run() {
+ OperationContextNoop txn;
+ ArtificialTreeBuilder<OnDiskFormat> builder(&txn, &this->_helper);
- for (int i = 0; i < 3; ++i) {
- BSONObj k = simpleKey('b' + 2 * i);
- this->insert(k, this->_helper.dummyDiskLoc);
- }
-
- locateExtended(1, 'a', 'b', this->_helper.headManager.getHead(&txn));
- locateExtended(1, 'c', 'd', this->_helper.headManager.getHead(&txn));
- locateExtended(1, 'e', 'f', this->_helper.headManager.getHead(&txn));
- locateExtended(1, 'g', 'g' + 1, RecordId()); // of course, 'h' isn't in the index.
-
- // old behavior
- // locateExtended( -1, 'a', 'b', dl() );
- // locateExtended( -1, 'c', 'd', dl() );
- // locateExtended( -1, 'e', 'f', dl() );
- // locateExtended( -1, 'g', 'f', dl() );
-
- locateExtended(-1, 'a', 'a' - 1, RecordId()); // of course, 'a' - 1 isn't in the index
- locateExtended(-1, 'c', 'b', this->_helper.headManager.getHead(&txn));
- locateExtended(-1, 'e', 'd', this->_helper.headManager.getHead(&txn));
- locateExtended(-1, 'g', 'f', this->_helper.headManager.getHead(&txn));
- }
+ builder.makeTree(
+ "{d:{b:{a:null},bb:null,cc:{c:null}},"
+ "dd:null,"
+ "_:{f:{e:null},h:{g:null}}}");
- private:
- void locateExtended(
- int direction, char token, char expectedMatch, RecordId expectedLocation) {
- const BSONObj k = simpleKey(token);
- int expectedPos = (expectedMatch - 'b') / 2;
+ ASSERT_EQUALS(11, this->_helper.btree.fullValidate(&txn, NULL, true, false, 0));
- this->locate(k, expectedPos, false, expectedLocation, direction);
- }
- };
+ // The tree has 7 buckets + 1 for the this->_helper.dummyDiskLoc
+ ASSERT_EQUALS(8, this->_helper.recordStore.numRecords(NULL));
- template<class OnDiskFormat>
- class MissingLocateMultiBucket : public BtreeLogicTestBase<OnDiskFormat> {
- public:
- void run() {
- OperationContextNoop txn;
- this->_helper.btree.initAsEmpty(&txn);
-
- this->insert(simpleKey('A', 800), this->_helper.dummyDiskLoc);
- this->insert(simpleKey('B', 800), this->_helper.dummyDiskLoc);
- this->insert(simpleKey('C', 800), this->_helper.dummyDiskLoc);
- this->insert(simpleKey('D', 800), this->_helper.dummyDiskLoc);
- this->insert(simpleKey('E', 800), this->_helper.dummyDiskLoc);
- this->insert(simpleKey('F', 800), this->_helper.dummyDiskLoc);
- this->insert(simpleKey('G', 800), this->_helper.dummyDiskLoc);
- this->insert(simpleKey('H', 800), this->_helper.dummyDiskLoc);
- this->insert(simpleKey('J', 800), this->_helper.dummyDiskLoc);
-
- // This causes split
- this->insert(simpleKey('I', 800), this->_helper.dummyDiskLoc);
-
- int pos;
- DiskLoc loc;
-
- // 'E' is the split point and should be in the head the rest should be ~50/50
- const BSONObj splitPoint = simpleKey('E', 800);
- this->_helper.btree.locate(&txn, splitPoint, this->_helper.dummyDiskLoc, 1, &pos, &loc);
- ASSERT_EQUALS(this->_helper.headManager.getHead(&txn), loc.toRecordId());
- ASSERT_EQUALS(0, pos);
-
- // Find the one before 'E'
- int largePos;
- DiskLoc largeLoc;
- this->_helper.btree.locate(&txn, splitPoint, this->_helper.dummyDiskLoc, 1, &largePos, &largeLoc);
- this->_helper.btree.advance(&txn, &largeLoc, &largePos, -1);
-
- // Find the one after 'E'
- int smallPos;
- DiskLoc smallLoc;
- this->_helper.btree.locate(&txn, splitPoint, this->_helper.dummyDiskLoc, 1, &smallPos, &smallLoc);
- this->_helper.btree.advance(&txn, &smallLoc, &smallPos, 1);
-
- ASSERT_NOT_EQUALS(smallLoc, largeLoc);
- ASSERT_NOT_EQUALS(smallLoc, loc);
- ASSERT_NOT_EQUALS(largeLoc, loc);
- }
- };
+ const BSONObj k = BSON(""
+ << "bb");
+ verify(this->unindex(k));
- /**
- * Validates that adding keys incrementally produces buckets, which are 90%/10% full.
- */
- template<class OnDiskFormat>
- class SERVER983 : public BtreeLogicTestBase<OnDiskFormat> {
- public:
- void run() {
- OperationContextNoop txn;
- this->_helper.btree.initAsEmpty(&txn);
-
- this->insert(simpleKey('A', 800), this->_helper.dummyDiskLoc);
- this->insert(simpleKey('B', 800), this->_helper.dummyDiskLoc);
- this->insert(simpleKey('C', 800), this->_helper.dummyDiskLoc);
- this->insert(simpleKey('D', 800), this->_helper.dummyDiskLoc);
- this->insert(simpleKey('E', 800), this->_helper.dummyDiskLoc);
- this->insert(simpleKey('F', 800), this->_helper.dummyDiskLoc);
- this->insert(simpleKey('G', 800), this->_helper.dummyDiskLoc);
- this->insert(simpleKey('H', 800), this->_helper.dummyDiskLoc);
- this->insert(simpleKey('I', 800), this->_helper.dummyDiskLoc);
-
- // This will cause split
- this->insert(simpleKey('J', 800), this->_helper.dummyDiskLoc);
-
- int pos;
- DiskLoc loc;
-
- // 'H' is the maximum 'large' interval key, 90% should be < 'H' and 10% larger
- const BSONObj splitPoint = simpleKey('H', 800);
- this->_helper.btree.locate(&txn, splitPoint, this->_helper.dummyDiskLoc, 1, &pos, &loc);
- ASSERT_EQUALS(this->_helper.headManager.getHead(&txn), loc.toRecordId());
- ASSERT_EQUALS(0, pos);
-
- // Find the one before 'H'
- int largePos;
- DiskLoc largeLoc;
- this->_helper.btree.locate(&txn,
- splitPoint, this->_helper.dummyDiskLoc, 1, &largePos, &largeLoc);
- this->_helper.btree.advance(&txn, &largeLoc, &largePos, -1);
-
- // Find the one after 'H'
- int smallPos;
- DiskLoc smallLoc;
- this->_helper.btree.locate(&txn,
- splitPoint, this->_helper.dummyDiskLoc, 1, &smallPos, &smallLoc);
- this->_helper.btree.advance(&txn, &smallLoc, &smallPos, 1);
-
- ASSERT_NOT_EQUALS(smallLoc, largeLoc);
- ASSERT_NOT_EQUALS(smallLoc, loc);
- ASSERT_NOT_EQUALS(largeLoc, loc);
- }
- };
+ ASSERT_EQUALS(10, this->_helper.btree.fullValidate(&txn, NULL, true, false, 0));
- template<class OnDiskFormat>
- class DontReuseUnused : public BtreeLogicTestBase<OnDiskFormat> {
- public:
- void run() {
- OperationContextNoop txn;
- this->_helper.btree.initAsEmpty(&txn);
+ // The tree has 7 buckets + 1 for the this->_helper.dummyDiskLoc
+ ASSERT_EQUALS(8, this->_helper.recordStore.numRecords(NULL));
- for (int i = 0; i < 10; ++i) {
- const BSONObj k = simpleKey('b' + 2 * i, 800);
- this->insert(k, this->_helper.dummyDiskLoc);
- }
+ builder.checkStructure(
+ "{d:{b:{a:null},cc:{c:null}},"
+ "dd:null,"
+ "_:{f:{e:null},h:{g:null}}}");
+ }
+};
- const BSONObj root = simpleKey('p', 800);
- this->unindex(root);
+template <class OnDiskFormat>
+class CantMergeLeftNoMerge : public BtreeLogicTestBase<OnDiskFormat> {
+public:
+ void run() {
+ OperationContextNoop txn;
+ ArtificialTreeBuilder<OnDiskFormat> builder(&txn, &this->_helper);
- this->insert(root, this->_helper.dummyDiskLoc);
- this->locate(root, 0, true, this->head()->nextChild, 1);
- }
- };
+ builder.makeTree("{c:{b:{a:null}},d:null,_:{f:{e:null},g:null}}");
- template<class OnDiskFormat>
- class MergeBucketsTestBase : public BtreeLogicTestBase<OnDiskFormat> {
- public:
- void run() {
- OperationContextNoop txn;
- this->_helper.btree.initAsEmpty(&txn);
+ ASSERT_EQUALS(7, this->_helper.btree.fullValidate(&txn, NULL, true, false, 0));
- for (int i = 0; i < 10; ++i) {
- const BSONObj k = simpleKey('b' + 2 * i, 800);
- this->insert(k, this->_helper.dummyDiskLoc);
- }
-
- // numRecords() - 1, because this->_helper.dummyDiskLoc is actually in the record store too
- ASSERT_EQUALS(3, this->_helper.recordStore.numRecords(NULL) - 1);
+ // The tree has 5 buckets + 1 for the this->_helper.dummyDiskLoc
+ ASSERT_EQUALS(6, this->_helper.recordStore.numRecords(NULL));
- long long expectedCount = 10 - unindexKeys();
- ASSERT_EQUALS(1, this->_helper.recordStore.numRecords(NULL) - 1);
+ const BSONObj k = BSON(""
+ << "g");
+ verify(this->unindex(k));
- long long unusedCount = 0;
- ASSERT_EQUALS(expectedCount, this->_helper.btree.fullValidate(&txn, &unusedCount, true, false, 0));
- ASSERT_EQUALS(0, unusedCount);
- }
+ ASSERT_EQUALS(6, this->_helper.btree.fullValidate(&txn, NULL, true, false, 0));
- protected:
- virtual int unindexKeys() = 0;
- };
+ // The tree has 5 buckets + 1 for the this->_helper.dummyDiskLoc
+ ASSERT_EQUALS(6, this->_helper.recordStore.numRecords(NULL));
- template<class OnDiskFormat>
- class MergeBucketsLeft : public MergeBucketsTestBase<OnDiskFormat> {
- virtual int unindexKeys() {
- BSONObj k = simpleKey('b', 800);
- this->unindex(k);
+ builder.checkStructure("{c:{b:{a:null}},d:null,_:{f:{e:null}}}");
+ }
+};
- k = simpleKey('b' + 2, 800);
- this->unindex(k);
+template <class OnDiskFormat>
+class MergeOption : public BtreeLogicTestBase<OnDiskFormat> {
+public:
+ void run() {
+ OperationContextNoop txn;
+ ArtificialTreeBuilder<OnDiskFormat> builder(&txn, &this->_helper);
- k = simpleKey('b' + 4, 800);
- this->unindex(k);
+ builder.makeTree("{c:{b:{a:null}},f:{e:{d:null},ee:null},_:{h:{g:null}}}");
- k = simpleKey('b' + 6, 800);
- this->unindex(k);
+ ASSERT_EQUALS(9, this->_helper.btree.fullValidate(&txn, NULL, true, false, 0));
- return 4;
- }
- };
+ // The tree has 7 buckets + 1 for the this->_helper.dummyDiskLoc
+ ASSERT_EQUALS(8, this->_helper.recordStore.numRecords(NULL));
- template<class OnDiskFormat>
- class MergeBucketsRight : public MergeBucketsTestBase<OnDiskFormat> {
- virtual int unindexKeys() {
- const BSONObj k = simpleKey('b' + 2 * 9, 800);
- this->unindex(k);
- return 1;
- }
- };
+ const BSONObj k = BSON(""
+ << "ee");
+ verify(this->unindex(k));
- template<class OnDiskFormat>
- class MergeBucketsDontReplaceHead : public BtreeLogicTestBase<OnDiskFormat> {
- public:
- void run() {
- OperationContextNoop txn;
- this->_helper.btree.initAsEmpty(&txn);
+ ASSERT_EQUALS(8, this->_helper.btree.fullValidate(&txn, NULL, true, false, 0));
- for (int i = 0; i < 18; ++i) {
- const BSONObj k = simpleKey('a' + i, 800);
- this->insert(k, this->_helper.dummyDiskLoc);
- }
+ // The tree has 6 buckets + 1 for the this->_helper.dummyDiskLoc
+ ASSERT_EQUALS(7, this->_helper.recordStore.numRecords(NULL));
- // numRecords(NULL) - 1, because fixedDiskLoc is actually in the record store too
- ASSERT_EQUALS(4, this->_helper.recordStore.numRecords(NULL) - 1);
+ builder.checkStructure("{c:{b:{a:null}},_:{e:{d:null},f:null,h:{g:null}}}");
+ }
+};
- const BSONObj k = simpleKey('a' + 17, 800);
- this->unindex(k);
- ASSERT_EQUALS(3, this->_helper.recordStore.numRecords(NULL) - 1);
+template <class OnDiskFormat>
+class ForceMergeLeft : public BtreeLogicTestBase<OnDiskFormat> {
+public:
+ void run() {
+ OperationContextNoop txn;
+ ArtificialTreeBuilder<OnDiskFormat> builder(&txn, &this->_helper);
- long long unusedCount = 0;
- ASSERT_EQUALS(17, this->_helper.btree.fullValidate(&txn, &unusedCount, true, false, 0));
- ASSERT_EQUALS(0, unusedCount);
- }
- };
+ builder.makeTree("{c:{b:{a:null}},f:{e:{d:null},ee:null},ff:null,_:{h:{g:null}}}");
- template<class OnDiskFormat>
- class MergeBucketsDelInternal : public BtreeLogicTestBase<OnDiskFormat> {
- public:
- void run() {
- OperationContextNoop txn;
- ArtificialTreeBuilder<OnDiskFormat> builder(&txn, &this->_helper);
+ ASSERT_EQUALS(10, this->_helper.btree.fullValidate(&txn, NULL, true, false, 0));
- builder.makeTree("{d:{b:{a:null},bb:null,_:{c:null}},_:{f:{e:null},_:{g:null}}}");
- ASSERT_EQUALS(8, this->_helper.btree.fullValidate(&txn, NULL, true, false, 0));
+ // The tree has 7 buckets + 1 for the this->_helper.dummyDiskLoc
+ ASSERT_EQUALS(8, this->_helper.recordStore.numRecords(NULL));
- // The tree has 7 buckets + 1 for the this->_helper.dummyDiskLoc
- ASSERT_EQUALS(8, this->_helper.recordStore.numRecords(NULL));
+ const BSONObj k = BSON(""
+ << "ee");
+ verify(this->unindex(k));
- const BSONObj k = BSON("" << "bb");
- verify(this->unindex(k));
+ ASSERT_EQUALS(9, this->_helper.btree.fullValidate(&txn, NULL, true, false, 0));
- ASSERT_EQUALS(7, this->_helper.btree.fullValidate(&txn, NULL, true, false, 0));
+ // The tree has 6 buckets + 1 for the this->_helper.dummyDiskLoc
+ ASSERT_EQUALS(7, this->_helper.recordStore.numRecords(NULL));
- // The tree has 5 buckets + 1 for the this->_helper.dummyDiskLoc
- ASSERT_EQUALS(6, this->_helper.recordStore.numRecords(NULL));
+ builder.checkStructure("{f:{b:{a:null},c:null,e:{d:null}},ff:null,_:{h:{g:null}}}");
+ }
+};
- builder.checkStructure("{b:{a:null},d:{c:null},f:{e:null},_:{g:null}}");
- }
- };
+template <class OnDiskFormat>
+class ForceMergeRight : public BtreeLogicTestBase<OnDiskFormat> {
+public:
+ void run() {
+ OperationContextNoop txn;
+ ArtificialTreeBuilder<OnDiskFormat> builder(&txn, &this->_helper);
- template<class OnDiskFormat>
- class MergeBucketsRightNull : public BtreeLogicTestBase<OnDiskFormat> {
- public:
- void run() {
- OperationContextNoop txn;
- ArtificialTreeBuilder<OnDiskFormat> builder(&txn, &this->_helper);
+ builder.makeTree("{c:{b:{a:null}},cc:null,f:{e:{d:null},ee:null},_:{h:{g:null}}}");
- builder.makeTree("{d:{b:{a:null},bb:null,cc:{c:null}},_:{f:{e:null},h:{g:null}}}");
- ASSERT_EQUALS(10, this->_helper.btree.fullValidate(&txn, NULL, true, false, 0));
+ ASSERT_EQUALS(10, this->_helper.btree.fullValidate(&txn, NULL, true, false, 0));
- // The tree has 7 buckets + 1 for the this->_helper.dummyDiskLoc
- ASSERT_EQUALS(8, this->_helper.recordStore.numRecords(NULL));
+ // The tree has 7 buckets + 1 for the this->_helper.dummyDiskLoc
+ ASSERT_EQUALS(8, this->_helper.recordStore.numRecords(NULL));
- const BSONObj k = BSON("" << "bb");
- verify(this->unindex(k));
+ const BSONObj k = BSON(""
+ << "ee");
+ verify(this->unindex(k));
- ASSERT_EQUALS(9, this->_helper.btree.fullValidate(&txn, NULL, true, false, 0));
+ ASSERT_EQUALS(9, this->_helper.btree.fullValidate(&txn, NULL, true, false, 0));
- // The tree has 5 buckets + 1 for the this->_helper.dummyDiskLoc
- ASSERT_EQUALS(6, this->_helper.recordStore.numRecords(NULL));
+ // The tree has 6 buckets + 1 for the this->_helper.dummyDiskLoc
+ ASSERT_EQUALS(7, this->_helper.recordStore.numRecords(NULL));
- builder.checkStructure("{b:{a:null},cc:{c:null},d:null,f:{e:null},h:{g:null}}");
- }
- };
+ builder.checkStructure("{c:{b:{a:null}},cc:null,_:{e:{d:null},f:null,h:{g:null}}}");
+ }
+};
- // This comment was here during porting, not sure what it means:
- //
- // "Not yet handling this case"
- template<class OnDiskFormat>
- class DontMergeSingleBucket : public BtreeLogicTestBase<OnDiskFormat> {
- public:
- void run() {
- OperationContextNoop txn;
- ArtificialTreeBuilder<OnDiskFormat> builder(&txn, &this->_helper);
+template <class OnDiskFormat>
+class RecursiveMerge : public BtreeLogicTestBase<OnDiskFormat> {
+public:
+ void run() {
+ OperationContextNoop txn;
+ ArtificialTreeBuilder<OnDiskFormat> builder(&txn, &this->_helper);
- builder.makeTree("{d:{b:{a:null},c:null}}");
+ builder.makeTree("{h:{e:{b:{a:null},c:null,d:null},g:{f:null}},j:{i:null}}");
- ASSERT_EQUALS(4, this->_helper.btree.fullValidate(&txn, NULL, true, false, 0));
+ ASSERT_EQUALS(10, this->_helper.btree.fullValidate(&txn, NULL, true, false, 0));
- // The tree has 3 buckets + 1 for the this->_helper.dummyDiskLoc
- ASSERT_EQUALS(4, this->_helper.recordStore.numRecords(NULL));
+ // The tree has 6 buckets + 1 for the this->_helper.dummyDiskLoc
+ ASSERT_EQUALS(7, this->_helper.recordStore.numRecords(NULL));
- const BSONObj k = BSON("" << "c");
- verify(this->unindex(k));
+ const BSONObj k = BSON(""
+ << "c");
+ verify(this->unindex(k));
- ASSERT_EQUALS(3, this->_helper.btree.fullValidate(&txn, NULL, true, false, 0));
+ ASSERT_EQUALS(9, this->_helper.btree.fullValidate(&txn, NULL, true, false, 0));
- // The tree has 3 buckets + 1 for the this->_helper.dummyDiskLoc
- ASSERT_EQUALS(4, this->_helper.recordStore.numRecords(NULL));
+ // The tree has 4 buckets + 1 for the this->_helper.dummyDiskLoc
+ ASSERT_EQUALS(5, this->_helper.recordStore.numRecords(NULL));
- builder.checkStructure("{d:{b:{a:null}}}");
- }
- };
+ // Height is not currently reduced in this case
+ builder.checkStructure("{j:{g:{b:{a:null},d:null,e:null,f:null},h:null,i:null}}");
+ }
+};
- template<class OnDiskFormat>
- class ParentMergeNonRightToLeft : public BtreeLogicTestBase<OnDiskFormat> {
- public:
- void run() {
- OperationContextNoop txn;
- ArtificialTreeBuilder<OnDiskFormat> builder(&txn, &this->_helper);
+template <class OnDiskFormat>
+class RecursiveMergeRightBucket : public BtreeLogicTestBase<OnDiskFormat> {
+public:
+ void run() {
+ OperationContextNoop txn;
+ ArtificialTreeBuilder<OnDiskFormat> builder(&txn, &this->_helper);
- builder.makeTree("{d:{b:{a:null},bb:null,cc:{c:null}},i:{f:{e:null},h:{g:null}}}");
+ builder.makeTree("{h:{e:{b:{a:null},c:null,d:null},g:{f:null}},_:{i:null}}");
- ASSERT_EQUALS(11, this->_helper.btree.fullValidate(&txn, NULL, true, false, 0));
+ ASSERT_EQUALS(9, this->_helper.btree.fullValidate(&txn, NULL, true, false, 0));
- // The tree has 7 buckets + 1 for the this->_helper.dummyDiskLoc
- ASSERT_EQUALS(8, this->_helper.recordStore.numRecords(NULL));
+ // The tree has 6 buckets + 1 for the this->_helper.dummyDiskLoc
+ ASSERT_EQUALS(7, this->_helper.recordStore.numRecords(NULL));
- const BSONObj k = BSON("" << "bb");
- verify(this->unindex(k));
+ const BSONObj k = BSON(""
+ << "c");
+ verify(this->unindex(k));
- ASSERT_EQUALS(10, this->_helper.btree.fullValidate(&txn, NULL, true, false, 0));
+ ASSERT_EQUALS(8, this->_helper.btree.fullValidate(&txn, NULL, true, false, 0));
- // Child does not currently replace parent in this case. Also, the tree
- // has 6 buckets + 1 for the this->_helper.dummyDiskLoc.
- ASSERT_EQUALS(7, this->_helper.recordStore.numRecords(NULL));
+ // The tree has 3 buckets + 1 for the this->_helper.dummyDiskLoc
+ ASSERT_EQUALS(4, this->_helper.recordStore.numRecords(NULL));
- builder.checkStructure("{i:{b:{a:null},cc:{c:null},d:null,f:{e:null},h:{g:null}}}");
- }
- };
+ builder.checkStructure("{g:{b:{a:null},d:null,e:null,f:null},h:null,i:null}");
+ }
+};
- template<class OnDiskFormat>
- class ParentMergeNonRightToRight : public BtreeLogicTestBase<OnDiskFormat> {
- public:
- void run() {
- OperationContextNoop txn;
- ArtificialTreeBuilder<OnDiskFormat> builder(&txn, &this->_helper);
+template <class OnDiskFormat>
+class RecursiveMergeDoubleRightBucket : public BtreeLogicTestBase<OnDiskFormat> {
+public:
+ void run() {
+ OperationContextNoop txn;
+ ArtificialTreeBuilder<OnDiskFormat> builder(&txn, &this->_helper);
- builder.makeTree("{d:{b:{a:null},cc:{c:null}},i:{f:{e:null},ff:null,h:{g:null}}}");
+ builder.makeTree("{h:{e:{b:{a:null},c:null,d:null},_:{f:null}},_:{i:null}}");
- ASSERT_EQUALS(11, this->_helper.btree.fullValidate(&txn, NULL, true, false, 0));
+ ASSERT_EQUALS(8, this->_helper.btree.fullValidate(&txn, NULL, true, false, 0));
- // The tree has 7 buckets + 1 for the this->_helper.dummyDiskLoc
- ASSERT_EQUALS(8, this->_helper.recordStore.numRecords(NULL));
+ // The tree has 6 buckets + 1 for the this->_helper.dummyDiskLoc
+ ASSERT_EQUALS(7, this->_helper.recordStore.numRecords(NULL));
- const BSONObj k = BSON("" << "ff");
- verify(this->unindex(k));
+ const BSONObj k = BSON(""
+ << "c");
+ verify(this->unindex(k));
- ASSERT_EQUALS(10, this->_helper.btree.fullValidate(&txn, NULL, true, false, 0));
+ ASSERT_EQUALS(7, this->_helper.btree.fullValidate(&txn, NULL, true, false, 0));
- // Child does not currently replace parent in this case. Also, the tree
- // has 6 buckets + 1 for the this->_helper.dummyDiskLoc.
- ASSERT_EQUALS(7, this->_helper.recordStore.numRecords(NULL));
+ // The tree has 4 buckets + 1 for the this->_helper.dummyDiskLoc
+ ASSERT_EQUALS(5, this->_helper.recordStore.numRecords(NULL));
- builder.checkStructure("{i:{b:{a:null},cc:{c:null},d:null,f:{e:null},h:{g:null}}}");
- }
- };
+ // no recursion currently in this case
+ builder.checkStructure("{h:{b:{a:null},d:null,e:null,f:null},_:{i:null}}");
+ }
+};
- template<class OnDiskFormat>
- class CantMergeRightNoMerge : public BtreeLogicTestBase<OnDiskFormat> {
- public:
- void run() {
- OperationContextNoop txn;
- ArtificialTreeBuilder<OnDiskFormat> builder(&txn, &this->_helper);
+template <class OnDiskFormat>
+class MergeSizeTestBase : public BtreeLogicTestBase<OnDiskFormat> {
+public:
+ MergeSizeTestBase() : _count(0) {}
- builder.makeTree("{d:{b:{a:null},bb:null,cc:{c:null}},"
- "dd:null,"
- "_:{f:{e:null},h:{g:null}}}");
+ void run() {
+ OperationContextNoop txn;
+ this->_helper.btree.initAsEmpty(&txn);
- ASSERT_EQUALS(11, this->_helper.btree.fullValidate(&txn, NULL, true, false, 0));
+ ArtificialTreeBuilder<OnDiskFormat> builder(&txn, &this->_helper);
- // The tree has 7 buckets + 1 for the this->_helper.dummyDiskLoc
- ASSERT_EQUALS(8, this->_helper.recordStore.numRecords(NULL));
+ const BSONObj& topKey = biggestKey('m');
- const BSONObj k = BSON("" << "bb");
- verify(this->unindex(k));
+ DiskLoc leftChild = this->newBucket();
+ builder.push(
+ DiskLoc::fromRecordId(this->_helper.headManager.getHead(&txn)), topKey, leftChild);
+ _count++;
- ASSERT_EQUALS(10, this->_helper.btree.fullValidate(&txn, NULL, true, false, 0));
+ DiskLoc rightChild = this->newBucket();
+ this->setBucketNextChild(DiskLoc::fromRecordId(this->_helper.headManager.getHead(&txn)),
+ rightChild);
- // The tree has 7 buckets + 1 for the this->_helper.dummyDiskLoc
- ASSERT_EQUALS(8, this->_helper.recordStore.numRecords(NULL));
+ _count += builder.fillBucketToExactSize(leftChild, leftSize(), 'a');
+ _count += builder.fillBucketToExactSize(rightChild, rightSize(), 'n');
- builder.checkStructure("{d:{b:{a:null},cc:{c:null}},"
- "dd:null,"
- "_:{f:{e:null},h:{g:null}}}");
+ ASSERT(leftAdditional() <= 2);
+ if (leftAdditional() >= 2) {
+ builder.push(leftChild, bigKey('k'), DiskLoc());
}
- };
-
- template<class OnDiskFormat>
- class CantMergeLeftNoMerge : public BtreeLogicTestBase<OnDiskFormat> {
- public:
- void run() {
- OperationContextNoop txn;
- ArtificialTreeBuilder<OnDiskFormat> builder(&txn, &this->_helper);
-
- builder.makeTree("{c:{b:{a:null}},d:null,_:{f:{e:null},g:null}}");
-
- ASSERT_EQUALS(7, this->_helper.btree.fullValidate(&txn, NULL, true, false, 0));
-
- // The tree has 5 buckets + 1 for the this->_helper.dummyDiskLoc
- ASSERT_EQUALS(6, this->_helper.recordStore.numRecords(NULL));
-
- const BSONObj k = BSON("" << "g");
- verify(this->unindex(k));
-
- ASSERT_EQUALS(6, this->_helper.btree.fullValidate(&txn, NULL, true, false, 0));
-
- // The tree has 5 buckets + 1 for the this->_helper.dummyDiskLoc
- ASSERT_EQUALS(6, this->_helper.recordStore.numRecords(NULL));
-
- builder.checkStructure("{c:{b:{a:null}},d:null,_:{f:{e:null}}}");
+ if (leftAdditional() >= 1) {
+ builder.push(leftChild, bigKey('l'), DiskLoc());
}
- };
-
- template<class OnDiskFormat>
- class MergeOption : public BtreeLogicTestBase<OnDiskFormat> {
- public:
- void run() {
- OperationContextNoop txn;
- ArtificialTreeBuilder<OnDiskFormat> builder(&txn, &this->_helper);
-
- builder.makeTree("{c:{b:{a:null}},f:{e:{d:null},ee:null},_:{h:{g:null}}}");
-
- ASSERT_EQUALS(9, this->_helper.btree.fullValidate(&txn, NULL, true, false, 0));
- // The tree has 7 buckets + 1 for the this->_helper.dummyDiskLoc
- ASSERT_EQUALS(8, this->_helper.recordStore.numRecords(NULL));
-
- const BSONObj k = BSON("" << "ee");
- verify(this->unindex(k));
-
- ASSERT_EQUALS(8, this->_helper.btree.fullValidate(&txn, NULL, true, false, 0));
-
- // The tree has 6 buckets + 1 for the this->_helper.dummyDiskLoc
- ASSERT_EQUALS(7, this->_helper.recordStore.numRecords(NULL));
-
- builder.checkStructure("{c:{b:{a:null}},_:{e:{d:null},f:null,h:{g:null}}}");
+ ASSERT(rightAdditional() <= 2);
+ if (rightAdditional() >= 2) {
+ builder.push(rightChild, bigKey('y'), DiskLoc());
}
- };
-
- template<class OnDiskFormat>
- class ForceMergeLeft : public BtreeLogicTestBase<OnDiskFormat> {
- public:
- void run() {
- OperationContextNoop txn;
- ArtificialTreeBuilder<OnDiskFormat> builder(&txn, &this->_helper);
-
- builder.makeTree("{c:{b:{a:null}},f:{e:{d:null},ee:null},ff:null,_:{h:{g:null}}}");
-
- ASSERT_EQUALS(10, this->_helper.btree.fullValidate(&txn, NULL, true, false, 0));
-
- // The tree has 7 buckets + 1 for the this->_helper.dummyDiskLoc
- ASSERT_EQUALS(8, this->_helper.recordStore.numRecords(NULL));
-
- const BSONObj k = BSON("" << "ee");
- verify(this->unindex(k));
-
- ASSERT_EQUALS(9, this->_helper.btree.fullValidate(&txn, NULL, true, false, 0));
-
- // The tree has 6 buckets + 1 for the this->_helper.dummyDiskLoc
- ASSERT_EQUALS(7, this->_helper.recordStore.numRecords(NULL));
-
- builder.checkStructure("{f:{b:{a:null},c:null,e:{d:null}},ff:null,_:{h:{g:null}}}");
- }
- };
-
- template<class OnDiskFormat>
- class ForceMergeRight : public BtreeLogicTestBase<OnDiskFormat> {
- public:
- void run() {
- OperationContextNoop txn;
- ArtificialTreeBuilder<OnDiskFormat> builder(&txn, &this->_helper);
-
- builder.makeTree("{c:{b:{a:null}},cc:null,f:{e:{d:null},ee:null},_:{h:{g:null}}}");
-
- ASSERT_EQUALS(10, this->_helper.btree.fullValidate(&txn, NULL, true, false, 0));
-
- // The tree has 7 buckets + 1 for the this->_helper.dummyDiskLoc
- ASSERT_EQUALS(8, this->_helper.recordStore.numRecords(NULL));
-
- const BSONObj k = BSON("" << "ee");
- verify(this->unindex(k));
-
- ASSERT_EQUALS(9, this->_helper.btree.fullValidate(&txn, NULL, true, false, 0));
-
- // The tree has 6 buckets + 1 for the this->_helper.dummyDiskLoc
- ASSERT_EQUALS(7, this->_helper.recordStore.numRecords(NULL));
-
- builder.checkStructure("{c:{b:{a:null}},cc:null,_:{e:{d:null},f:null,h:{g:null}}}");
- }
- };
-
- template<class OnDiskFormat>
- class RecursiveMerge : public BtreeLogicTestBase<OnDiskFormat> {
- public:
- void run() {
- OperationContextNoop txn;
- ArtificialTreeBuilder<OnDiskFormat> builder(&txn, &this->_helper);
-
- builder.makeTree("{h:{e:{b:{a:null},c:null,d:null},g:{f:null}},j:{i:null}}");
-
- ASSERT_EQUALS(10, this->_helper.btree.fullValidate(&txn, NULL, true, false, 0));
-
- // The tree has 6 buckets + 1 for the this->_helper.dummyDiskLoc
- ASSERT_EQUALS(7, this->_helper.recordStore.numRecords(NULL));
-
- const BSONObj k = BSON("" << "c");
- verify(this->unindex(k));
-
- ASSERT_EQUALS(9, this->_helper.btree.fullValidate(&txn, NULL, true, false, 0));
-
- // The tree has 4 buckets + 1 for the this->_helper.dummyDiskLoc
- ASSERT_EQUALS(5, this->_helper.recordStore.numRecords(NULL));
-
- // Height is not currently reduced in this case
- builder.checkStructure("{j:{g:{b:{a:null},d:null,e:null,f:null},h:null,i:null}}");
+ if (rightAdditional() >= 1) {
+ builder.push(rightChild, bigKey('z'), DiskLoc());
}
- };
-
- template<class OnDiskFormat>
- class RecursiveMergeRightBucket : public BtreeLogicTestBase<OnDiskFormat> {
- public:
- void run() {
- OperationContextNoop txn;
- ArtificialTreeBuilder<OnDiskFormat> builder(&txn, &this->_helper);
-
- builder.makeTree("{h:{e:{b:{a:null},c:null,d:null},g:{f:null}},_:{i:null}}");
-
- ASSERT_EQUALS(9, this->_helper.btree.fullValidate(&txn, NULL, true, false, 0));
-
- // The tree has 6 buckets + 1 for the this->_helper.dummyDiskLoc
- ASSERT_EQUALS(7, this->_helper.recordStore.numRecords(NULL));
-
- const BSONObj k = BSON("" << "c");
- verify(this->unindex(k));
-
- ASSERT_EQUALS(8, this->_helper.btree.fullValidate(&txn, NULL, true, false, 0));
-
- // The tree has 3 buckets + 1 for the this->_helper.dummyDiskLoc
- ASSERT_EQUALS(4, this->_helper.recordStore.numRecords(NULL));
-
- builder.checkStructure("{g:{b:{a:null},d:null,e:null,f:null},h:null,i:null}");
- }
- };
-
- template<class OnDiskFormat>
- class RecursiveMergeDoubleRightBucket : public BtreeLogicTestBase<OnDiskFormat> {
- public:
- void run() {
- OperationContextNoop txn;
- ArtificialTreeBuilder<OnDiskFormat> builder(&txn, &this->_helper);
-
- builder.makeTree("{h:{e:{b:{a:null},c:null,d:null},_:{f:null}},_:{i:null}}");
-
- ASSERT_EQUALS(8, this->_helper.btree.fullValidate(&txn, NULL, true, false, 0));
-
- // The tree has 6 buckets + 1 for the this->_helper.dummyDiskLoc
- ASSERT_EQUALS(7, this->_helper.recordStore.numRecords(NULL));
-
- const BSONObj k = BSON("" << "c");
- verify(this->unindex(k));
-
- ASSERT_EQUALS(7, this->_helper.btree.fullValidate(&txn, NULL, true, false, 0));
-
- // The tree has 4 buckets + 1 for the this->_helper.dummyDiskLoc
- ASSERT_EQUALS(5, this->_helper.recordStore.numRecords(NULL));
-
- // no recursion currently in this case
- builder.checkStructure("{h:{b:{a:null},d:null,e:null,f:null},_:{i:null}}");
- }
- };
-
- template<class OnDiskFormat>
- class MergeSizeTestBase : public BtreeLogicTestBase<OnDiskFormat> {
- public:
- MergeSizeTestBase() : _count(0) {
-
- }
-
- void run() {
- OperationContextNoop txn;
- this->_helper.btree.initAsEmpty(&txn);
-
- ArtificialTreeBuilder<OnDiskFormat> builder(&txn, &this->_helper);
-
- const BSONObj& topKey = biggestKey('m');
-
- DiskLoc leftChild = this->newBucket();
- builder.push(DiskLoc::fromRecordId(this->_helper.headManager.getHead(&txn)), topKey,
- leftChild);
- _count++;
-
- DiskLoc rightChild = this->newBucket();
- this->setBucketNextChild(DiskLoc::fromRecordId(this->_helper.headManager.getHead(&txn)),
- rightChild);
-
- _count += builder.fillBucketToExactSize(leftChild, leftSize(), 'a');
- _count += builder.fillBucketToExactSize(rightChild, rightSize(), 'n');
-
- ASSERT(leftAdditional() <= 2);
- if (leftAdditional() >= 2) {
- builder.push(leftChild, bigKey('k'), DiskLoc());
- }
- if (leftAdditional() >= 1) {
- builder.push(leftChild, bigKey('l'), DiskLoc());
- }
-
- ASSERT(rightAdditional() <= 2);
- if (rightAdditional() >= 2) {
- builder.push(rightChild, bigKey('y'), DiskLoc());
- }
- if (rightAdditional() >= 1) {
- builder.push(rightChild, bigKey('z'), DiskLoc());
- }
-
- _count += leftAdditional() + rightAdditional();
-
- initCheck();
-
- const char *keys = delKeys();
- for (const char *i = keys; *i; ++i) {
- long long unused = 0;
- ASSERT_EQUALS(_count, this->_helper.btree.fullValidate(&txn, &unused, true, false, 0));
- ASSERT_EQUALS(0, unused);
- // The tree has 3 buckets + 1 for the this->_helper.dummyDiskLoc
- ASSERT_EQUALS(4, this->_helper.recordStore.numRecords(NULL));
+ _count += leftAdditional() + rightAdditional();
- const BSONObj k = bigKey(*i);
- this->unindex(k);
-
- --_count;
- }
+ initCheck();
+ const char* keys = delKeys();
+ for (const char* i = keys; *i; ++i) {
long long unused = 0;
ASSERT_EQUALS(_count, this->_helper.btree.fullValidate(&txn, &unused, true, false, 0));
ASSERT_EQUALS(0, unused);
- validate();
-
- if (!merge()) {
- // The tree has 3 buckets + 1 for the this->_helper.dummyDiskLoc
- ASSERT_EQUALS(4, this->_helper.recordStore.numRecords(NULL));
- }
- else {
- // The tree has 1 bucket + 1 for the this->_helper.dummyDiskLoc
- ASSERT_EQUALS(2, this->_helper.recordStore.numRecords(NULL));
- }
- }
-
- protected:
- virtual int leftAdditional() const { return 2; }
- virtual int rightAdditional() const { return 2; }
- virtual void initCheck() {}
- virtual void validate() {}
- virtual int leftSize() const = 0;
- virtual int rightSize() const = 0;
- virtual const char * delKeys() const { return "klyz"; }
- virtual bool merge() const { return true; }
-
- static BSONObj bigKey(char a) {
- return simpleKey(a, 801);
- }
-
- static BSONObj biggestKey(char a) {
- int size = OnDiskFormat::KeyMax - bigSize() + 801;
- return simpleKey(a, size);
- }
-
- static int bigSize() {
- return typename BtreeLogicTestBase<OnDiskFormat>::KeyDataOwnedType(bigKey('a')).dataSize();
- }
-
- static int biggestSize() {
- return typename BtreeLogicTestBase<OnDiskFormat>::KeyDataOwnedType(biggestKey('a')).dataSize();
- }
-
- int _count;
- };
-
- template<class OnDiskFormat>
- class MergeSizeJustRightRight : public MergeSizeTestBase<OnDiskFormat> {
- protected:
- virtual int rightSize() const {
- return BtreeLogic<OnDiskFormat>::lowWaterMark() - 1;
- }
-
- virtual int leftSize() const {
- return OnDiskFormat::BucketBodySize -
- MergeSizeTestBase<OnDiskFormat>::biggestSize() -
- sizeof(typename BtreeLogicTestBase<OnDiskFormat>::FixedWidthKeyType) -
- (BtreeLogic<OnDiskFormat>::lowWaterMark() - 1);
- }
- };
-
- template<class OnDiskFormat>
- class MergeSizeJustRightLeft : public MergeSizeTestBase<OnDiskFormat> {
- protected:
- virtual int leftSize() const {
- return BtreeLogic<OnDiskFormat>::lowWaterMark() - 1;
- }
-
- virtual int rightSize() const {
- return OnDiskFormat::BucketBodySize -
- MergeSizeTestBase<OnDiskFormat>::biggestSize() -
- sizeof(typename BtreeLogicTestBase<OnDiskFormat>::FixedWidthKeyType) -
- (BtreeLogic<OnDiskFormat>::lowWaterMark() - 1);
- }
-
- virtual const char * delKeys() const { return "yzkl"; }
- };
-
- template<class OnDiskFormat>
- class MergeSizeRight : public MergeSizeJustRightRight<OnDiskFormat> {
- virtual int rightSize() const { return MergeSizeJustRightRight<OnDiskFormat>::rightSize() - 1; }
- virtual int leftSize() const { return MergeSizeJustRightRight<OnDiskFormat>::leftSize() + 1; }
- };
-
- template<class OnDiskFormat>
- class MergeSizeLeft : public MergeSizeJustRightLeft<OnDiskFormat> {
- virtual int rightSize() const { return MergeSizeJustRightLeft<OnDiskFormat>::rightSize() + 1; }
- virtual int leftSize() const { return MergeSizeJustRightLeft<OnDiskFormat>::leftSize() - 1; }
- };
-
- template<class OnDiskFormat>
- class NoMergeBelowMarkRight : public MergeSizeJustRightRight<OnDiskFormat> {
- virtual int rightSize() const { return MergeSizeJustRightRight<OnDiskFormat>::rightSize() + 1; }
- virtual int leftSize() const { return MergeSizeJustRightRight<OnDiskFormat>::leftSize() - 1; }
- virtual bool merge() const { return false; }
- };
-
- template<class OnDiskFormat>
- class NoMergeBelowMarkLeft : public MergeSizeJustRightLeft<OnDiskFormat> {
- virtual int rightSize() const { return MergeSizeJustRightLeft<OnDiskFormat>::rightSize() - 1; }
- virtual int leftSize() const { return MergeSizeJustRightLeft<OnDiskFormat>::leftSize() + 1; }
- virtual bool merge() const { return false; }
- };
-
- template<class OnDiskFormat>
- class MergeSizeRightTooBig : public MergeSizeJustRightLeft<OnDiskFormat> {
- virtual int rightSize() const { return MergeSizeJustRightLeft<OnDiskFormat>::rightSize() + 1; }
- virtual bool merge() const { return false; }
- };
-
- template<class OnDiskFormat>
- class MergeSizeLeftTooBig : public MergeSizeJustRightRight<OnDiskFormat> {
- virtual int leftSize() const { return MergeSizeJustRightRight<OnDiskFormat>::leftSize() + 1; }
- virtual bool merge() const { return false; }
- };
-
- template<class OnDiskFormat>
- class MergeRightEmpty : public MergeSizeTestBase<OnDiskFormat> {
- protected:
- virtual int rightAdditional() const { return 1; }
- virtual int leftAdditional() const { return 1; }
- virtual const char * delKeys() const { return "lz"; }
- virtual int rightSize() const { return 0; }
- virtual int leftSize() const {
- return OnDiskFormat::BucketBodySize -
- MergeSizeTestBase<OnDiskFormat>::biggestSize() -
- sizeof(typename BtreeLogicTestBase<OnDiskFormat>::FixedWidthKeyType);
- }
- };
-
- template<class OnDiskFormat>
- class MergeMinRightEmpty : public MergeSizeTestBase<OnDiskFormat> {
- protected:
- virtual int rightAdditional() const { return 1; }
- virtual int leftAdditional() const { return 0; }
- virtual const char * delKeys() const { return "z"; }
- virtual int rightSize() const { return 0; }
- virtual int leftSize() const {
- return MergeSizeTestBase<OnDiskFormat>::bigSize() +
- sizeof(typename BtreeLogicTestBase<OnDiskFormat>::FixedWidthKeyType);
- }
- };
-
- template<class OnDiskFormat>
- class MergeLeftEmpty : public MergeSizeTestBase<OnDiskFormat> {
- protected:
- virtual int rightAdditional() const { return 1; }
- virtual int leftAdditional() const { return 1; }
- virtual const char * delKeys() const { return "zl"; }
- virtual int leftSize() const { return 0; }
- virtual int rightSize() const {
- return OnDiskFormat::BucketBodySize -
- MergeSizeTestBase<OnDiskFormat>::biggestSize() -
- sizeof(typename BtreeLogicTestBase<OnDiskFormat>::FixedWidthKeyType);
- }
- };
-
- template<class OnDiskFormat>
- class MergeMinLeftEmpty : public MergeSizeTestBase<OnDiskFormat> {
- protected:
- virtual int leftAdditional() const { return 1; }
- virtual int rightAdditional() const { return 0; }
- virtual const char * delKeys() const { return "l"; }
- virtual int leftSize() const { return 0; }
- virtual int rightSize() const {
- return MergeSizeTestBase<OnDiskFormat>::bigSize() +
- sizeof(typename BtreeLogicTestBase<OnDiskFormat>::FixedWidthKeyType);
- }
- };
-
- template<class OnDiskFormat>
- class BalanceRightEmpty : public MergeRightEmpty<OnDiskFormat> {
- protected:
- virtual int leftSize() const {
- return OnDiskFormat::BucketBodySize -
- MergeSizeTestBase<OnDiskFormat>::biggestSize() -
- sizeof(typename BtreeLogicTestBase<OnDiskFormat>::FixedWidthKeyType) + 1;
- }
-
- virtual bool merge() const { return false; }
-
- virtual void initCheck() {
- OperationContextNoop txn;
- _oldTop = this->getKey(this->_helper.headManager.getHead(&txn), 0).data.toBson();
- }
-
- virtual void validate() {
- OperationContextNoop txn;
- ASSERT_NOT_EQUALS(_oldTop,
- this->getKey(this->_helper.headManager.getHead(&txn), 0).data.toBson());
- }
-
- private:
- BSONObj _oldTop;
- };
-
- template<class OnDiskFormat>
- class BalanceLeftEmpty : public MergeLeftEmpty<OnDiskFormat> {
- protected:
- virtual int rightSize() const {
- return OnDiskFormat::BucketBodySize -
- MergeSizeTestBase<OnDiskFormat>::biggestSize() -
- sizeof(typename BtreeLogicTestBase<OnDiskFormat>::FixedWidthKeyType) + 1;
- }
-
- virtual bool merge() const { return false; }
-
- virtual void initCheck() {
- OperationContextNoop txn;
- _oldTop = this->getKey(this->_helper.headManager.getHead(&txn), 0).data.toBson();
- }
-
- virtual void validate() {
- OperationContextNoop txn;
- ASSERT_TRUE(_oldTop != this->getKey(this->_helper.headManager.getHead(&txn), 0).data.toBson());
- }
-
- private:
- BSONObj _oldTop;
- };
-
- template<class OnDiskFormat>
- class BalanceOneLeftToRight : public BtreeLogicTestBase<OnDiskFormat> {
- public:
- void run() {
- OperationContextNoop txn;
- ArtificialTreeBuilder<OnDiskFormat> builder(&txn, &this->_helper);
-
- builder.makeTree("{$10:{$1:null,$2:null,$3:null,$4:null,$5:null,$6:null},"
- "b:{$20:null,$30:null,$40:null,$50:null,a:null},"
- "_:{c:null}}");
-
- ASSERT_EQUALS(14, this->_helper.btree.fullValidate(&txn, NULL, true, false, 0));
-
- // The tree has 4 buckets + 1 for the this->_helper.dummyDiskLoc
- ASSERT_EQUALS(5, this->_helper.recordStore.numRecords(NULL));
-
- const BSONObj k = BSON("" << bigNumString(0x40, 800));
- ASSERT(this->unindex(k));
-
- ASSERT_EQUALS(13, this->_helper.btree.fullValidate(&txn, NULL, true, false, 0));
-
- // The tree has 4 buckets + 1 for the this->_helper.dummyDiskLoc
- ASSERT_EQUALS(5, this->_helper.recordStore.numRecords(NULL));
-
- builder.checkStructure("{$6:{$1:null,$2:null,$3:null,$4:null,$5:null},"
- "b:{$10:null,$20:null,$30:null,$50:null,a:null},"
- "_:{c:null}}");
- }
- };
-
- template<class OnDiskFormat>
- class BalanceOneRightToLeft : public BtreeLogicTestBase<OnDiskFormat> {
- public:
- void run() {
- OperationContextNoop txn;
- ArtificialTreeBuilder<OnDiskFormat> builder(&txn, &this->_helper);
-
- builder.makeTree("{$10:{$1:null,$2:null,$3:null,$4:null},"
- "b:{$20:null,$30:null,$40:null,$50:null,$60:null,$70:null},"
- "_:{c:null}}");
-
- ASSERT_EQUALS(13, this->_helper.btree.fullValidate(&txn, NULL, true, false, 0));
-
- // The tree has 4 buckets + 1 for the this->_helper.dummyDiskLoc
- ASSERT_EQUALS(5, this->_helper.recordStore.numRecords(NULL));
-
- const BSONObj k = BSON("" << bigNumString(0x3, 800));
- ASSERT(this->unindex(k));
-
- ASSERT_EQUALS(12, this->_helper.btree.fullValidate(&txn, NULL, true, false, 0));
-
- // The tree has 4 buckets + 1 for the this->_helper.dummyDiskLoc
- ASSERT_EQUALS(5, this->_helper.recordStore.numRecords(NULL));
-
- builder.checkStructure("{$20:{$1:null,$2:null,$4:null,$10:null},"
- "b:{$30:null,$40:null,$50:null,$60:null,$70:null},"
- "_:{c:null}}");
- }
- };
-
- template<class OnDiskFormat>
- class BalanceThreeLeftToRight : public BtreeLogicTestBase<OnDiskFormat> {
- public:
- void run() {
- OperationContextNoop txn;
- ArtificialTreeBuilder<OnDiskFormat> builder(&txn, &this->_helper);
-
- builder.makeTree("{$20:{$1:{$0:null},$3:{$2:null},$5:{$4:null},$7:{$6:null},"
- "$9:{$8:null},$11:{$10:null},$13:{$12:null},_:{$14:null}},"
- "b:{$30:null,$40:{$35:null},$50:{$45:null}},"
- "_:{c:null}}");
-
- ASSERT_EQUALS(23, this->_helper.btree.fullValidate(&txn, NULL, true, false, 0));
-
- // The tree has 14 buckets + 1 for the this->_helper.dummyDiskLoc
- ASSERT_EQUALS(15, this->_helper.recordStore.numRecords(NULL));
-
- const BSONObj k = BSON("" << bigNumString(0x30, 800));
- ASSERT(this->unindex(k));
-
- ASSERT_EQUALS(22, this->_helper.btree.fullValidate(&txn, NULL, true, false, 0));
-
- // The tree has 14 buckets + 1 for the this->_helper.dummyDiskLoc
- ASSERT_EQUALS(15, this->_helper.recordStore.numRecords(NULL));
-
- builder.checkStructure("{$9:{$1:{$0:null},$3:{$2:null},"
- "$5:{$4:null},$7:{$6:null},_:{$8:null}},"
- "b:{$11:{$10:null},$13:{$12:null},$20:{$14:null},"
- "$40:{$35:null},$50:{$45:null}},"
- "_:{c:null}}");
- }
- };
-
- template<class OnDiskFormat>
- class BalanceThreeRightToLeft : public BtreeLogicTestBase<OnDiskFormat> {
- public:
- void run() {
- OperationContextNoop txn;
- ArtificialTreeBuilder<OnDiskFormat> builder(&txn, &this->_helper);
-
- builder.makeTree("{$20:{$1:{$0:null},$3:{$2:null},$5:null,_:{$14:null}},"
- "b:{$30:{$25:null},$40:{$35:null},$50:{$45:null},$60:{$55:null},"
- "$70:{$65:null},$80:{$75:null},"
- "$90:{$85:null},$100:{$95:null}},"
- "_:{c:null}}");
-
- ASSERT_EQUALS(25, this->_helper.btree.fullValidate(&txn, NULL, true, false, 0));
-
- // The tree has 15 buckets + 1 for the this->_helper.dummyDiskLoc
- ASSERT_EQUALS(16, this->_helper.recordStore.numRecords(NULL));
-
- const BSONObj k = BSON("" << bigNumString(0x5, 800));
- ASSERT(this->unindex(k));
-
- ASSERT_EQUALS(24, this->_helper.btree.fullValidate(&txn, NULL, true, false, 0));
-
- // The tree has 15 buckets + 1 for the this->_helper.dummyDiskLoc
- ASSERT_EQUALS(16, this->_helper.recordStore.numRecords(NULL));
-
- builder.checkStructure("{$50:{$1:{$0:null},$3:{$2:null},$20:{$14:null},"
- "$30:{$25:null},$40:{$35:null},_:{$45:null}},"
- "b:{$60:{$55:null},$70:{$65:null},$80:{$75:null},"
- "$90:{$85:null},$100:{$95:null}},"
- "_:{c:null}}");
- }
- };
-
- template<class OnDiskFormat>
- class BalanceSingleParentKey : public BtreeLogicTestBase<OnDiskFormat> {
- public:
- void run() {
- OperationContextNoop txn;
- ArtificialTreeBuilder<OnDiskFormat> builder(&txn, &this->_helper);
-
- builder.makeTree("{$10:{$1:null,$2:null,$3:null,$4:null,$5:null,$6:null},"
- "_:{$20:null,$30:null,$40:null,$50:null,a:null}}");
-
- ASSERT_EQUALS(12, this->_helper.btree.fullValidate(&txn, NULL, true, false, 0));
-
// The tree has 3 buckets + 1 for the this->_helper.dummyDiskLoc
ASSERT_EQUALS(4, this->_helper.recordStore.numRecords(NULL));
- const BSONObj k = BSON("" << bigNumString(0x40, 800));
- ASSERT(this->unindex(k));
-
- ASSERT_EQUALS(11, this->_helper.btree.fullValidate(&txn, NULL, true, false, 0));
-
- // The tree has 3 buckets + 1 for the this->_helper.dummyDiskLoc
- ASSERT_EQUALS(4, this->_helper.recordStore.numRecords(NULL));
-
- builder.checkStructure("{$6:{$1:null,$2:null,$3:null,$4:null,$5:null},"
- "_:{$10:null,$20:null,$30:null,$50:null,a:null}}");
- }
- };
-
- template<class OnDiskFormat>
- class PackEmptyBucket : public BtreeLogicTestBase<OnDiskFormat> {
- public:
- void run() {
- OperationContextNoop txn;
- ArtificialTreeBuilder<OnDiskFormat> builder(&txn, &this->_helper);
-
- builder.makeTree("{a:null}");
-
- const BSONObj k = BSON("" << "a");
- ASSERT(this->unindex(k));
-
- this->forcePackBucket(this->_helper.headManager.getHead(&txn));
-
- typename BtreeLogicTestBase<OnDiskFormat>::BucketType* headBucket = this->head();
-
- ASSERT_EQUALS(0, headBucket->n);
- ASSERT_FALSE(headBucket->flags & Packed);
-
- int unused = 0;
- this->truncateBucket(headBucket, 0, unused);
-
- ASSERT_EQUALS(0, headBucket->n);
- ASSERT_EQUALS(0, headBucket->topSize);
- ASSERT_EQUALS((int)OnDiskFormat::BucketBodySize, headBucket->emptySize);
- ASSERT_TRUE(headBucket->flags & Packed);
- }
- };
-
- template<class OnDiskFormat>
- class PackedDataSizeEmptyBucket : public BtreeLogicTestBase<OnDiskFormat> {
- public:
- void run() {
- OperationContextNoop txn;
- ArtificialTreeBuilder<OnDiskFormat> builder(&txn, &this->_helper);
-
- builder.makeTree("{a:null}");
-
- const BSONObj k = BSON("" << "a");
- ASSERT(this->unindex(k));
-
- this->forcePackBucket(this->_helper.headManager.getHead(&txn));
-
- typename BtreeLogicTestBase<OnDiskFormat>::BucketType* headBucket = this->head();
+ const BSONObj k = bigKey(*i);
+ this->unindex(k);
- ASSERT_EQUALS(0, headBucket->n);
- ASSERT_FALSE(headBucket->flags & Packed);
- ASSERT_EQUALS(0, this->bucketPackedDataSize(headBucket, 0));
- ASSERT_FALSE(headBucket->flags & Packed);
+ --_count;
}
- };
- template<class OnDiskFormat>
- class BalanceSingleParentKeyPackParent : public BtreeLogicTestBase<OnDiskFormat> {
- public:
- void run() {
- OperationContextNoop txn;
- ArtificialTreeBuilder<OnDiskFormat> builder(&txn, &this->_helper);
+ long long unused = 0;
+ ASSERT_EQUALS(_count, this->_helper.btree.fullValidate(&txn, &unused, true, false, 0));
+ ASSERT_EQUALS(0, unused);
- builder.makeTree("{$10:{$1:null,$2:null,$3:null,$4:null,$5:null,$6:null},"
- "_:{$20:null,$30:null,$40:null,$50:null,a:null}}");
-
- ASSERT_EQUALS(12, this->_helper.btree.fullValidate(&txn, NULL, true, false, 0));
+ validate();
+ if (!merge()) {
// The tree has 3 buckets + 1 for the this->_helper.dummyDiskLoc
ASSERT_EQUALS(4, this->_helper.recordStore.numRecords(NULL));
-
- // force parent pack
- this->forcePackBucket(this->_helper.headManager.getHead(&txn));
-
- const BSONObj k = BSON("" << bigNumString(0x40, 800));
- ASSERT(this->unindex(k));
-
- ASSERT_EQUALS(11, this->_helper.btree.fullValidate(&txn, NULL, true, false, 0));
-
- // The tree has 3 buckets + 1 for the this->_helper.dummyDiskLoc
- ASSERT_EQUALS(4, this->_helper.recordStore.numRecords(NULL));
-
- builder.checkStructure("{$6:{$1:null,$2:null,$3:null,$4:null,$5:null},"
- "_:{$10:null,$20:null,$30:null,$50:null,a:null}}");
- }
- };
-
- template<class OnDiskFormat>
- class BalanceSplitParent : public BtreeLogicTestBase<OnDiskFormat> {
- public:
- void run() {
- OperationContextNoop txn;
- ArtificialTreeBuilder<OnDiskFormat> builder(&txn, &this->_helper);
-
- builder.makeTree(
- "{$10$10:{$1:null,$2:null,$3:null,$4:null},"
- "$100:{$20:null,$30:null,$40:null,$50:null,$60:null,$70:null,$80:null},"
- "$200:null,$300:null,$400:null,$500:null,$600:null,"
- "$700:null,$800:null,$900:null,_:{c:null}}");
-
- ASSERT_EQUALS(22, this->_helper.btree.fullValidate(&txn, NULL, true, false, 0));
-
- // The tree has 4 buckets + 1 for the this->_helper.dummyDiskLoc
- ASSERT_EQUALS(5, this->_helper.recordStore.numRecords(NULL));
-
- const BSONObj k = BSON("" << bigNumString(0x3, 800));
- ASSERT(this->unindex(k));
-
- ASSERT_EQUALS(21, this->_helper.btree.fullValidate(&txn, NULL, true, false, 0));
-
- // The tree has 6 buckets + 1 for the this->_helper.dummyDiskLoc
- ASSERT_EQUALS(7, this->_helper.recordStore.numRecords(NULL));
-
- builder.checkStructure("{$500:{ $30:{$1:null,$2:null,$4:null,$10$10:null,$20:null},"
- "$100:{$40:null,$50:null,$60:null,$70:null,$80:null},"
- "$200:null,$300:null,$400:null},"
- "_:{$600:null,$700:null,$800:null,$900:null,_:{c:null}}}");
- }
- };
-
- template<class OnDiskFormat>
- class RebalancedSeparatorBase : public BtreeLogicTestBase<OnDiskFormat> {
- public:
- void run() {
- OperationContextNoop txn;
- ArtificialTreeBuilder<OnDiskFormat> builder(&txn, &this->_helper);
-
- builder.makeTree(treeSpec());
- modTree();
-
- ASSERT_EQUALS(expectedSeparator(),
- this->bucketRebalancedSeparatorPos(
- this->_helper.headManager.getHead(&txn), 0));
- }
-
- virtual string treeSpec() const = 0;
- virtual int expectedSeparator() const = 0;
- virtual void modTree() {}
- };
-
- template<class OnDiskFormat>
- class EvenRebalanceLeft : public RebalancedSeparatorBase<OnDiskFormat> {
- virtual string treeSpec() const { return "{$7:{$1:null,$2$31f:null,$3:null,"
- "$4$31f:null,$5:null,$6:null},"
- "_:{$8:null,$9:null,$10$31e:null}}"; }
- virtual int expectedSeparator() const { return 4; }
- };
-
- template<class OnDiskFormat>
- class EvenRebalanceLeftCusp : public RebalancedSeparatorBase<OnDiskFormat> {
- virtual string treeSpec() const {
- return "{$6:{$1:null,$2$31f:null,$3:null,$4$31f:null,$5:null},"
- "_:{$7:null,$8:null,$9$31e:null,$10:null}}";
- }
- virtual int expectedSeparator() const { return 4; }
- };
-
- template<class OnDiskFormat>
- class EvenRebalanceRight : public RebalancedSeparatorBase<OnDiskFormat> {
- virtual string treeSpec() const { return "{$3:{$1:null,$2$31f:null},_:{$4$31f:null,$5:null,$6:null,$7:null,$8$31e:null,$9:null,$10:null}}"; }
- virtual int expectedSeparator() const { return 4; }
- };
-
- template<class OnDiskFormat>
- class EvenRebalanceRightCusp : public RebalancedSeparatorBase<OnDiskFormat> {
- virtual string treeSpec() const { return "{$4$31f:{$1:null,$2$31f:null,$3:null},_:{$5:null,$6:null,$7$31e:null,$8:null,$9:null,$10:null}}"; }
- virtual int expectedSeparator() const { return 4; }
- };
-
- template<class OnDiskFormat>
- class EvenRebalanceCenter : public RebalancedSeparatorBase<OnDiskFormat> {
- virtual string treeSpec() const { return "{$5:{$1:null,$2$31f:null,$3:null,$4$31f:null},_:{$6:null,$7$31e:null,$8:null,$9:null,$10:null}}"; }
- virtual int expectedSeparator() const { return 4; }
- };
-
- template<class OnDiskFormat>
- class OddRebalanceLeft : public RebalancedSeparatorBase<OnDiskFormat> {
- virtual string treeSpec() const { return "{$6$31f:{$1:null,$2:null,$3:null,$4:null,$5:null},_:{$7:null,$8:null,$9:null,$10:null}}"; }
- virtual int expectedSeparator() const { return 4; }
- };
-
- template<class OnDiskFormat>
- class OddRebalanceRight : public RebalancedSeparatorBase<OnDiskFormat> {
- virtual string treeSpec() const { return "{$4:{$1:null,$2:null,$3:null},_:{$5:null,$6:null,$7:null,$8$31f:null,$9:null,$10:null}}"; }
- virtual int expectedSeparator() const { return 4; }
- };
-
- template<class OnDiskFormat>
- class OddRebalanceCenter : public RebalancedSeparatorBase<OnDiskFormat> {
- virtual string treeSpec() const { return "{$5:{$1:null,$2:null,$3:null,$4:null},_:{$6:null,$7:null,$8:null,$9:null,$10$31f:null}}"; }
- virtual int expectedSeparator() const { return 4; }
- };
-
- template<class OnDiskFormat>
- class RebalanceEmptyRight : public RebalancedSeparatorBase<OnDiskFormat> {
- virtual string treeSpec() const { return "{$a:{$1:null,$2:null,$3:null,$4:null,$5:null,$6:null,$7:null,$8:null,$9:null},_:{$b:null}}"; }
- virtual void modTree() {
- BSONObj k = BSON("" << bigNumString(0xb, 800));
- ASSERT(this->unindex(k));
- }
- virtual int expectedSeparator() const { return 4; }
- };
-
- template<class OnDiskFormat>
- class RebalanceEmptyLeft : public RebalancedSeparatorBase<OnDiskFormat> {
- virtual string treeSpec() const { return "{$a:{$1:null},_:{$11:null,$12:null,$13:null,$14:null,$15:null,$16:null,$17:null,$18:null,$19:null}}"; }
- virtual void modTree() {
- BSONObj k = BSON("" << bigNumString(0x1, 800));
- ASSERT(this->unindex(k));
- }
- virtual int expectedSeparator() const { return 4; }
- };
-
- template<class OnDiskFormat>
- class NoMoveAtLowWaterMarkRight : public MergeSizeJustRightRight<OnDiskFormat> {
- virtual int rightSize() const { return MergeSizeJustRightRight<OnDiskFormat>::rightSize() + 1; }
-
- virtual void initCheck() {
- OperationContextNoop txn;
- _oldTop = this->getKey(this->_helper.headManager.getHead(&txn), 0).data.toBson();
- }
-
- virtual void validate() {
- OperationContextNoop txn;
- ASSERT_EQUALS(_oldTop, this->getKey(this->_helper.headManager.getHead(&txn), 0).data.toBson());
- }
-
- virtual bool merge() const { return false; }
-
- protected:
- BSONObj _oldTop;
- };
-
- template<class OnDiskFormat>
- class MoveBelowLowWaterMarkRight : public NoMoveAtLowWaterMarkRight<OnDiskFormat> {
- virtual int rightSize() const { return MergeSizeJustRightRight<OnDiskFormat>::rightSize(); }
- virtual int leftSize() const { return MergeSizeJustRightRight<OnDiskFormat>::leftSize() + 1; }
-
- virtual void validate() {
- OperationContextNoop txn;
- // Different top means we rebalanced
- ASSERT_NOT_EQUALS(this->_oldTop,
- this->getKey(this->_helper.headManager.getHead(&txn), 0).data.toBson());
- }
- };
-
- template<class OnDiskFormat>
- class NoMoveAtLowWaterMarkLeft : public MergeSizeJustRightLeft<OnDiskFormat> {
- virtual int leftSize() const { return MergeSizeJustRightLeft<OnDiskFormat>::leftSize() + 1; }
- virtual void initCheck() {
- OperationContextNoop txn;
- this->_oldTop = this->getKey(this->_helper.headManager.getHead(&txn), 0).data.toBson();
+ } else {
+ // The tree has 1 bucket + 1 for the this->_helper.dummyDiskLoc
+ ASSERT_EQUALS(2, this->_helper.recordStore.numRecords(NULL));
}
-
- virtual void validate() {
- OperationContextNoop txn;
- ASSERT_EQUALS(this->_oldTop,
+ }
+
+protected:
+ virtual int leftAdditional() const {
+ return 2;
+ }
+ virtual int rightAdditional() const {
+ return 2;
+ }
+ virtual void initCheck() {}
+ virtual void validate() {}
+ virtual int leftSize() const = 0;
+ virtual int rightSize() const = 0;
+ virtual const char* delKeys() const {
+ return "klyz";
+ }
+ virtual bool merge() const {
+ return true;
+ }
+
+ static BSONObj bigKey(char a) {
+ return simpleKey(a, 801);
+ }
+
+ static BSONObj biggestKey(char a) {
+ int size = OnDiskFormat::KeyMax - bigSize() + 801;
+ return simpleKey(a, size);
+ }
+
+ static int bigSize() {
+ return typename BtreeLogicTestBase<OnDiskFormat>::KeyDataOwnedType(bigKey('a')).dataSize();
+ }
+
+ static int biggestSize() {
+ return
+ typename BtreeLogicTestBase<OnDiskFormat>::KeyDataOwnedType(biggestKey('a')).dataSize();
+ }
+
+ int _count;
+};
+
+template <class OnDiskFormat>
+class MergeSizeJustRightRight : public MergeSizeTestBase<OnDiskFormat> {
+protected:
+ virtual int rightSize() const {
+ return BtreeLogic<OnDiskFormat>::lowWaterMark() - 1;
+ }
+
+ virtual int leftSize() const {
+ return OnDiskFormat::BucketBodySize - MergeSizeTestBase<OnDiskFormat>::biggestSize() -
+ sizeof(typename BtreeLogicTestBase<OnDiskFormat>::FixedWidthKeyType) -
+ (BtreeLogic<OnDiskFormat>::lowWaterMark() - 1);
+ }
+};
+
+template <class OnDiskFormat>
+class MergeSizeJustRightLeft : public MergeSizeTestBase<OnDiskFormat> {
+protected:
+ virtual int leftSize() const {
+ return BtreeLogic<OnDiskFormat>::lowWaterMark() - 1;
+ }
+
+ virtual int rightSize() const {
+ return OnDiskFormat::BucketBodySize - MergeSizeTestBase<OnDiskFormat>::biggestSize() -
+ sizeof(typename BtreeLogicTestBase<OnDiskFormat>::FixedWidthKeyType) -
+ (BtreeLogic<OnDiskFormat>::lowWaterMark() - 1);
+ }
+
+ virtual const char* delKeys() const {
+ return "yzkl";
+ }
+};
+
+template <class OnDiskFormat>
+class MergeSizeRight : public MergeSizeJustRightRight<OnDiskFormat> {
+ virtual int rightSize() const {
+ return MergeSizeJustRightRight<OnDiskFormat>::rightSize() - 1;
+ }
+ virtual int leftSize() const {
+ return MergeSizeJustRightRight<OnDiskFormat>::leftSize() + 1;
+ }
+};
+
+template <class OnDiskFormat>
+class MergeSizeLeft : public MergeSizeJustRightLeft<OnDiskFormat> {
+ virtual int rightSize() const {
+ return MergeSizeJustRightLeft<OnDiskFormat>::rightSize() + 1;
+ }
+ virtual int leftSize() const {
+ return MergeSizeJustRightLeft<OnDiskFormat>::leftSize() - 1;
+ }
+};
+
+template <class OnDiskFormat>
+class NoMergeBelowMarkRight : public MergeSizeJustRightRight<OnDiskFormat> {
+ virtual int rightSize() const {
+ return MergeSizeJustRightRight<OnDiskFormat>::rightSize() + 1;
+ }
+ virtual int leftSize() const {
+ return MergeSizeJustRightRight<OnDiskFormat>::leftSize() - 1;
+ }
+ virtual bool merge() const {
+ return false;
+ }
+};
+
+template <class OnDiskFormat>
+class NoMergeBelowMarkLeft : public MergeSizeJustRightLeft<OnDiskFormat> {
+ virtual int rightSize() const {
+ return MergeSizeJustRightLeft<OnDiskFormat>::rightSize() - 1;
+ }
+ virtual int leftSize() const {
+ return MergeSizeJustRightLeft<OnDiskFormat>::leftSize() + 1;
+ }
+ virtual bool merge() const {
+ return false;
+ }
+};
+
+template <class OnDiskFormat>
+class MergeSizeRightTooBig : public MergeSizeJustRightLeft<OnDiskFormat> {
+ virtual int rightSize() const {
+ return MergeSizeJustRightLeft<OnDiskFormat>::rightSize() + 1;
+ }
+ virtual bool merge() const {
+ return false;
+ }
+};
+
+template <class OnDiskFormat>
+class MergeSizeLeftTooBig : public MergeSizeJustRightRight<OnDiskFormat> {
+ virtual int leftSize() const {
+ return MergeSizeJustRightRight<OnDiskFormat>::leftSize() + 1;
+ }
+ virtual bool merge() const {
+ return false;
+ }
+};
+
+template <class OnDiskFormat>
+class MergeRightEmpty : public MergeSizeTestBase<OnDiskFormat> {
+protected:
+ virtual int rightAdditional() const {
+ return 1;
+ }
+ virtual int leftAdditional() const {
+ return 1;
+ }
+ virtual const char* delKeys() const {
+ return "lz";
+ }
+ virtual int rightSize() const {
+ return 0;
+ }
+ virtual int leftSize() const {
+ return OnDiskFormat::BucketBodySize - MergeSizeTestBase<OnDiskFormat>::biggestSize() -
+ sizeof(typename BtreeLogicTestBase<OnDiskFormat>::FixedWidthKeyType);
+ }
+};
+
+template <class OnDiskFormat>
+class MergeMinRightEmpty : public MergeSizeTestBase<OnDiskFormat> {
+protected:
+ virtual int rightAdditional() const {
+ return 1;
+ }
+ virtual int leftAdditional() const {
+ return 0;
+ }
+ virtual const char* delKeys() const {
+ return "z";
+ }
+ virtual int rightSize() const {
+ return 0;
+ }
+ virtual int leftSize() const {
+ return MergeSizeTestBase<OnDiskFormat>::bigSize() +
+ sizeof(typename BtreeLogicTestBase<OnDiskFormat>::FixedWidthKeyType);
+ }
+};
+
+template <class OnDiskFormat>
+class MergeLeftEmpty : public MergeSizeTestBase<OnDiskFormat> {
+protected:
+ virtual int rightAdditional() const {
+ return 1;
+ }
+ virtual int leftAdditional() const {
+ return 1;
+ }
+ virtual const char* delKeys() const {
+ return "zl";
+ }
+ virtual int leftSize() const {
+ return 0;
+ }
+ virtual int rightSize() const {
+ return OnDiskFormat::BucketBodySize - MergeSizeTestBase<OnDiskFormat>::biggestSize() -
+ sizeof(typename BtreeLogicTestBase<OnDiskFormat>::FixedWidthKeyType);
+ }
+};
+
+template <class OnDiskFormat>
+class MergeMinLeftEmpty : public MergeSizeTestBase<OnDiskFormat> {
+protected:
+ virtual int leftAdditional() const {
+ return 1;
+ }
+ virtual int rightAdditional() const {
+ return 0;
+ }
+ virtual const char* delKeys() const {
+ return "l";
+ }
+ virtual int leftSize() const {
+ return 0;
+ }
+ virtual int rightSize() const {
+ return MergeSizeTestBase<OnDiskFormat>::bigSize() +
+ sizeof(typename BtreeLogicTestBase<OnDiskFormat>::FixedWidthKeyType);
+ }
+};
+
+template <class OnDiskFormat>
+class BalanceRightEmpty : public MergeRightEmpty<OnDiskFormat> {
+protected:
+ virtual int leftSize() const {
+ return OnDiskFormat::BucketBodySize - MergeSizeTestBase<OnDiskFormat>::biggestSize() -
+ sizeof(typename BtreeLogicTestBase<OnDiskFormat>::FixedWidthKeyType) + 1;
+ }
+
+ virtual bool merge() const {
+ return false;
+ }
+
+ virtual void initCheck() {
+ OperationContextNoop txn;
+ _oldTop = this->getKey(this->_helper.headManager.getHead(&txn), 0).data.toBson();
+ }
+
+ virtual void validate() {
+ OperationContextNoop txn;
+ ASSERT_NOT_EQUALS(_oldTop,
this->getKey(this->_helper.headManager.getHead(&txn), 0).data.toBson());
- }
- virtual bool merge() const { return false; }
+ }
+
+private:
+ BSONObj _oldTop;
+};
+
+template <class OnDiskFormat>
+class BalanceLeftEmpty : public MergeLeftEmpty<OnDiskFormat> {
+protected:
+ virtual int rightSize() const {
+ return OnDiskFormat::BucketBodySize - MergeSizeTestBase<OnDiskFormat>::biggestSize() -
+ sizeof(typename BtreeLogicTestBase<OnDiskFormat>::FixedWidthKeyType) + 1;
+ }
+
+ virtual bool merge() const {
+ return false;
+ }
+
+ virtual void initCheck() {
+ OperationContextNoop txn;
+ _oldTop = this->getKey(this->_helper.headManager.getHead(&txn), 0).data.toBson();
+ }
- protected:
- BSONObj _oldTop;
- };
+ virtual void validate() {
+ OperationContextNoop txn;
+ ASSERT_TRUE(_oldTop !=
+ this->getKey(this->_helper.headManager.getHead(&txn), 0).data.toBson());
+ }
+
+private:
+ BSONObj _oldTop;
+};
+
+template <class OnDiskFormat>
+class BalanceOneLeftToRight : public BtreeLogicTestBase<OnDiskFormat> {
+public:
+ void run() {
+ OperationContextNoop txn;
+ ArtificialTreeBuilder<OnDiskFormat> builder(&txn, &this->_helper);
+
+ builder.makeTree(
+ "{$10:{$1:null,$2:null,$3:null,$4:null,$5:null,$6:null},"
+ "b:{$20:null,$30:null,$40:null,$50:null,a:null},"
+ "_:{c:null}}");
+
+ ASSERT_EQUALS(14, this->_helper.btree.fullValidate(&txn, NULL, true, false, 0));
+
+ // The tree has 4 buckets + 1 for the this->_helper.dummyDiskLoc
+ ASSERT_EQUALS(5, this->_helper.recordStore.numRecords(NULL));
+
+ const BSONObj k = BSON("" << bigNumString(0x40, 800));
+ ASSERT(this->unindex(k));
+
+ ASSERT_EQUALS(13, this->_helper.btree.fullValidate(&txn, NULL, true, false, 0));
+
+ // The tree has 4 buckets + 1 for the this->_helper.dummyDiskLoc
+ ASSERT_EQUALS(5, this->_helper.recordStore.numRecords(NULL));
+
+ builder.checkStructure(
+ "{$6:{$1:null,$2:null,$3:null,$4:null,$5:null},"
+ "b:{$10:null,$20:null,$30:null,$50:null,a:null},"
+ "_:{c:null}}");
+ }
+};
+
+template <class OnDiskFormat>
+class BalanceOneRightToLeft : public BtreeLogicTestBase<OnDiskFormat> {
+public:
+ void run() {
+ OperationContextNoop txn;
+ ArtificialTreeBuilder<OnDiskFormat> builder(&txn, &this->_helper);
+
+ builder.makeTree(
+ "{$10:{$1:null,$2:null,$3:null,$4:null},"
+ "b:{$20:null,$30:null,$40:null,$50:null,$60:null,$70:null},"
+ "_:{c:null}}");
+
+ ASSERT_EQUALS(13, this->_helper.btree.fullValidate(&txn, NULL, true, false, 0));
+
+ // The tree has 4 buckets + 1 for the this->_helper.dummyDiskLoc
+ ASSERT_EQUALS(5, this->_helper.recordStore.numRecords(NULL));
+
+ const BSONObj k = BSON("" << bigNumString(0x3, 800));
+ ASSERT(this->unindex(k));
+
+ ASSERT_EQUALS(12, this->_helper.btree.fullValidate(&txn, NULL, true, false, 0));
+
+ // The tree has 4 buckets + 1 for the this->_helper.dummyDiskLoc
+ ASSERT_EQUALS(5, this->_helper.recordStore.numRecords(NULL));
+
+ builder.checkStructure(
+ "{$20:{$1:null,$2:null,$4:null,$10:null},"
+ "b:{$30:null,$40:null,$50:null,$60:null,$70:null},"
+ "_:{c:null}}");
+ }
+};
+
+template <class OnDiskFormat>
+class BalanceThreeLeftToRight : public BtreeLogicTestBase<OnDiskFormat> {
+public:
+ void run() {
+ OperationContextNoop txn;
+ ArtificialTreeBuilder<OnDiskFormat> builder(&txn, &this->_helper);
+
+ builder.makeTree(
+ "{$20:{$1:{$0:null},$3:{$2:null},$5:{$4:null},$7:{$6:null},"
+ "$9:{$8:null},$11:{$10:null},$13:{$12:null},_:{$14:null}},"
+ "b:{$30:null,$40:{$35:null},$50:{$45:null}},"
+ "_:{c:null}}");
+
+ ASSERT_EQUALS(23, this->_helper.btree.fullValidate(&txn, NULL, true, false, 0));
+
+ // The tree has 14 buckets + 1 for the this->_helper.dummyDiskLoc
+ ASSERT_EQUALS(15, this->_helper.recordStore.numRecords(NULL));
+
+ const BSONObj k = BSON("" << bigNumString(0x30, 800));
+ ASSERT(this->unindex(k));
- template<class OnDiskFormat>
- class MoveBelowLowWaterMarkLeft : public NoMoveAtLowWaterMarkLeft<OnDiskFormat> {
- virtual int leftSize() const { return MergeSizeJustRightLeft<OnDiskFormat>::leftSize(); }
- virtual int rightSize() const { return MergeSizeJustRightLeft<OnDiskFormat>::rightSize() + 1; }
+ ASSERT_EQUALS(22, this->_helper.btree.fullValidate(&txn, NULL, true, false, 0));
+
+ // The tree has 14 buckets + 1 for the this->_helper.dummyDiskLoc
+ ASSERT_EQUALS(15, this->_helper.recordStore.numRecords(NULL));
- virtual void validate() {
- OperationContextNoop txn;
- // Different top means we rebalanced
- ASSERT_NOT_EQUALS(this->_oldTop,
- this->getKey(this->_helper.headManager.getHead(&txn), 0).data.toBson());
- }
- };
+ builder.checkStructure(
+ "{$9:{$1:{$0:null},$3:{$2:null},"
+ "$5:{$4:null},$7:{$6:null},_:{$8:null}},"
+ "b:{$11:{$10:null},$13:{$12:null},$20:{$14:null},"
+ "$40:{$35:null},$50:{$45:null}},"
+ "_:{c:null}}");
+ }
+};
+
+template <class OnDiskFormat>
+class BalanceThreeRightToLeft : public BtreeLogicTestBase<OnDiskFormat> {
+public:
+ void run() {
+ OperationContextNoop txn;
+ ArtificialTreeBuilder<OnDiskFormat> builder(&txn, &this->_helper);
- template<class OnDiskFormat>
- class PreferBalanceLeft : public BtreeLogicTestBase<OnDiskFormat> {
- public:
- void run() {
- OperationContextNoop txn;
- ArtificialTreeBuilder<OnDiskFormat> builder(&txn, &this->_helper);
+ builder.makeTree(
+ "{$20:{$1:{$0:null},$3:{$2:null},$5:null,_:{$14:null}},"
+ "b:{$30:{$25:null},$40:{$35:null},$50:{$45:null},$60:{$55:null},"
+ "$70:{$65:null},$80:{$75:null},"
+ "$90:{$85:null},$100:{$95:null}},"
+ "_:{c:null}}");
- builder.makeTree("{$10:{$1:null,$2:null,$3:null,$4:null,$5:null,$6:null},"
- "$20:{$11:null,$12:null,$13:null,$14:null},"
- "_:{$30:null}}");
+ ASSERT_EQUALS(25, this->_helper.btree.fullValidate(&txn, NULL, true, false, 0));
- ASSERT_EQUALS(13, this->_helper.btree.fullValidate(&txn, NULL, true, false, 0));
+ // The tree has 15 buckets + 1 for the this->_helper.dummyDiskLoc
+ ASSERT_EQUALS(16, this->_helper.recordStore.numRecords(NULL));
+
+ const BSONObj k = BSON("" << bigNumString(0x5, 800));
+ ASSERT(this->unindex(k));
+
+ ASSERT_EQUALS(24, this->_helper.btree.fullValidate(&txn, NULL, true, false, 0));
+
+ // The tree has 15 buckets + 1 for the this->_helper.dummyDiskLoc
+ ASSERT_EQUALS(16, this->_helper.recordStore.numRecords(NULL));
+
+ builder.checkStructure(
+ "{$50:{$1:{$0:null},$3:{$2:null},$20:{$14:null},"
+ "$30:{$25:null},$40:{$35:null},_:{$45:null}},"
+ "b:{$60:{$55:null},$70:{$65:null},$80:{$75:null},"
+ "$90:{$85:null},$100:{$95:null}},"
+ "_:{c:null}}");
+ }
+};
- // The tree has 4 buckets + 1 for the this->_helper.dummyDiskLoc
- ASSERT_EQUALS(5, this->_helper.recordStore.numRecords(NULL));
+template <class OnDiskFormat>
+class BalanceSingleParentKey : public BtreeLogicTestBase<OnDiskFormat> {
+public:
+ void run() {
+ OperationContextNoop txn;
+ ArtificialTreeBuilder<OnDiskFormat> builder(&txn, &this->_helper);
- const BSONObj k = BSON("" << bigNumString(0x12, 800));
- ASSERT(this->unindex(k));
+ builder.makeTree(
+ "{$10:{$1:null,$2:null,$3:null,$4:null,$5:null,$6:null},"
+ "_:{$20:null,$30:null,$40:null,$50:null,a:null}}");
- ASSERT_EQUALS(12, this->_helper.btree.fullValidate(&txn, NULL, true, false, 0));
+ ASSERT_EQUALS(12, this->_helper.btree.fullValidate(&txn, NULL, true, false, 0));
- // The tree has 4 buckets + 1 for the this->_helper.dummyDiskLoc
- ASSERT_EQUALS(5, this->_helper.recordStore.numRecords(NULL));
+ // The tree has 3 buckets + 1 for the this->_helper.dummyDiskLoc
+ ASSERT_EQUALS(4, this->_helper.recordStore.numRecords(NULL));
- builder.checkStructure("{$5:{$1:null,$2:null,$3:null,$4:null},"
- "$20:{$6:null,$10:null,$11:null,$13:null,$14:null},"
- "_:{$30:null}}");
- }
- };
+ const BSONObj k = BSON("" << bigNumString(0x40, 800));
+ ASSERT(this->unindex(k));
- template<class OnDiskFormat>
- class PreferBalanceRight : public BtreeLogicTestBase<OnDiskFormat> {
- public:
- void run() {
- OperationContextNoop txn;
- ArtificialTreeBuilder<OnDiskFormat> builder(&txn, &this->_helper);
+ ASSERT_EQUALS(11, this->_helper.btree.fullValidate(&txn, NULL, true, false, 0));
+
+ // The tree has 3 buckets + 1 for the this->_helper.dummyDiskLoc
+ ASSERT_EQUALS(4, this->_helper.recordStore.numRecords(NULL));
+
+ builder.checkStructure(
+ "{$6:{$1:null,$2:null,$3:null,$4:null,$5:null},"
+ "_:{$10:null,$20:null,$30:null,$50:null,a:null}}");
+ }
+};
+
+template <class OnDiskFormat>
+class PackEmptyBucket : public BtreeLogicTestBase<OnDiskFormat> {
+public:
+ void run() {
+ OperationContextNoop txn;
+ ArtificialTreeBuilder<OnDiskFormat> builder(&txn, &this->_helper);
- builder.makeTree("{$10:{$1:null},"
- "$20:{$11:null,$12:null,$13:null,$14:null},"
- "_:{$31:null,$32:null,$33:null,$34:null,$35:null,$36:null}}");
+ builder.makeTree("{a:null}");
- ASSERT_EQUALS(13, this->_helper.btree.fullValidate(&txn, NULL, true, false, 0));
+ const BSONObj k = BSON(""
+ << "a");
+ ASSERT(this->unindex(k));
- // The tree has 4 buckets + 1 for the this->_helper.dummyDiskLoc
- ASSERT_EQUALS(5, this->_helper.recordStore.numRecords(NULL));
+ this->forcePackBucket(this->_helper.headManager.getHead(&txn));
- const BSONObj k = BSON("" << bigNumString(0x12, 800));
- ASSERT(this->unindex(k));
+ typename BtreeLogicTestBase<OnDiskFormat>::BucketType* headBucket = this->head();
- ASSERT_EQUALS(12, this->_helper.btree.fullValidate(&txn, NULL, true, false, 0));
+ ASSERT_EQUALS(0, headBucket->n);
+ ASSERT_FALSE(headBucket->flags & Packed);
- // The tree has 4 buckets + 1 for the this->_helper.dummyDiskLoc
- ASSERT_EQUALS(5, this->_helper.recordStore.numRecords(NULL));
+ int unused = 0;
+ this->truncateBucket(headBucket, 0, unused);
+
+ ASSERT_EQUALS(0, headBucket->n);
+ ASSERT_EQUALS(0, headBucket->topSize);
+ ASSERT_EQUALS((int)OnDiskFormat::BucketBodySize, headBucket->emptySize);
+ ASSERT_TRUE(headBucket->flags & Packed);
+ }
+};
+
+template <class OnDiskFormat>
+class PackedDataSizeEmptyBucket : public BtreeLogicTestBase<OnDiskFormat> {
+public:
+ void run() {
+ OperationContextNoop txn;
+ ArtificialTreeBuilder<OnDiskFormat> builder(&txn, &this->_helper);
- builder.checkStructure("{$10:{$1:null},"
- "$31:{$11:null,$13:null,$14:null,$20:null},"
- "_:{$32:null,$33:null,$34:null,$35:null,$36:null}}");
- }
- };
+ builder.makeTree("{a:null}");
- template<class OnDiskFormat>
- class RecursiveMergeThenBalance : public BtreeLogicTestBase<OnDiskFormat> {
- public:
- void run() {
- OperationContextNoop txn;
- ArtificialTreeBuilder<OnDiskFormat> builder(&txn, &this->_helper);
+ const BSONObj k = BSON(""
+ << "a");
+ ASSERT(this->unindex(k));
- builder.makeTree("{$10:{$5:{$1:null,$2:null},$8:{$6:null,$7:null}},"
- "_:{$20:null,$30:null,$40:null,$50:null,"
- "$60:null,$70:null,$80:null,$90:null}}");
+ this->forcePackBucket(this->_helper.headManager.getHead(&txn));
- ASSERT_EQUALS(15, this->_helper.btree.fullValidate(&txn, NULL, true, false, 0));
+ typename BtreeLogicTestBase<OnDiskFormat>::BucketType* headBucket = this->head();
- // The tree has 5 buckets + 1 for the this->_helper.dummyDiskLoc
- ASSERT_EQUALS(6, this->_helper.recordStore.numRecords(NULL));
+ ASSERT_EQUALS(0, headBucket->n);
+ ASSERT_FALSE(headBucket->flags & Packed);
+ ASSERT_EQUALS(0, this->bucketPackedDataSize(headBucket, 0));
+ ASSERT_FALSE(headBucket->flags & Packed);
+ }
+};
+
+template <class OnDiskFormat>
+class BalanceSingleParentKeyPackParent : public BtreeLogicTestBase<OnDiskFormat> {
+public:
+ void run() {
+ OperationContextNoop txn;
+ ArtificialTreeBuilder<OnDiskFormat> builder(&txn, &this->_helper);
+
+ builder.makeTree(
+ "{$10:{$1:null,$2:null,$3:null,$4:null,$5:null,$6:null},"
+ "_:{$20:null,$30:null,$40:null,$50:null,a:null}}");
+
+ ASSERT_EQUALS(12, this->_helper.btree.fullValidate(&txn, NULL, true, false, 0));
+
+ // The tree has 3 buckets + 1 for the this->_helper.dummyDiskLoc
+ ASSERT_EQUALS(4, this->_helper.recordStore.numRecords(NULL));
+
+ // force parent pack
+ this->forcePackBucket(this->_helper.headManager.getHead(&txn));
+
+ const BSONObj k = BSON("" << bigNumString(0x40, 800));
+ ASSERT(this->unindex(k));
+
+ ASSERT_EQUALS(11, this->_helper.btree.fullValidate(&txn, NULL, true, false, 0));
+
+ // The tree has 3 buckets + 1 for the this->_helper.dummyDiskLoc
+ ASSERT_EQUALS(4, this->_helper.recordStore.numRecords(NULL));
+
+ builder.checkStructure(
+ "{$6:{$1:null,$2:null,$3:null,$4:null,$5:null},"
+ "_:{$10:null,$20:null,$30:null,$50:null,a:null}}");
+ }
+};
+
+template <class OnDiskFormat>
+class BalanceSplitParent : public BtreeLogicTestBase<OnDiskFormat> {
+public:
+ void run() {
+ OperationContextNoop txn;
+ ArtificialTreeBuilder<OnDiskFormat> builder(&txn, &this->_helper);
+
+ builder.makeTree(
+ "{$10$10:{$1:null,$2:null,$3:null,$4:null},"
+ "$100:{$20:null,$30:null,$40:null,$50:null,$60:null,$70:null,$80:null},"
+ "$200:null,$300:null,$400:null,$500:null,$600:null,"
+ "$700:null,$800:null,$900:null,_:{c:null}}");
+
+ ASSERT_EQUALS(22, this->_helper.btree.fullValidate(&txn, NULL, true, false, 0));
+
+ // The tree has 4 buckets + 1 for the this->_helper.dummyDiskLoc
+ ASSERT_EQUALS(5, this->_helper.recordStore.numRecords(NULL));
+
+ const BSONObj k = BSON("" << bigNumString(0x3, 800));
+ ASSERT(this->unindex(k));
+
+ ASSERT_EQUALS(21, this->_helper.btree.fullValidate(&txn, NULL, true, false, 0));
+
+ // The tree has 6 buckets + 1 for the this->_helper.dummyDiskLoc
+ ASSERT_EQUALS(7, this->_helper.recordStore.numRecords(NULL));
+
+ builder.checkStructure(
+ "{$500:{ $30:{$1:null,$2:null,$4:null,$10$10:null,$20:null},"
+ "$100:{$40:null,$50:null,$60:null,$70:null,$80:null},"
+ "$200:null,$300:null,$400:null},"
+ "_:{$600:null,$700:null,$800:null,$900:null,_:{c:null}}}");
+ }
+};
+
+template <class OnDiskFormat>
+class RebalancedSeparatorBase : public BtreeLogicTestBase<OnDiskFormat> {
+public:
+ void run() {
+ OperationContextNoop txn;
+ ArtificialTreeBuilder<OnDiskFormat> builder(&txn, &this->_helper);
+
+ builder.makeTree(treeSpec());
+ modTree();
+
+ ASSERT_EQUALS(
+ expectedSeparator(),
+ this->bucketRebalancedSeparatorPos(this->_helper.headManager.getHead(&txn), 0));
+ }
+
+ virtual string treeSpec() const = 0;
+ virtual int expectedSeparator() const = 0;
+ virtual void modTree() {}
+};
+
+template <class OnDiskFormat>
+class EvenRebalanceLeft : public RebalancedSeparatorBase<OnDiskFormat> {
+ virtual string treeSpec() const {
+ return "{$7:{$1:null,$2$31f:null,$3:null,"
+ "$4$31f:null,$5:null,$6:null},"
+ "_:{$8:null,$9:null,$10$31e:null}}";
+ }
+ virtual int expectedSeparator() const {
+ return 4;
+ }
+};
+
+template <class OnDiskFormat>
+class EvenRebalanceLeftCusp : public RebalancedSeparatorBase<OnDiskFormat> {
+ virtual string treeSpec() const {
+ return "{$6:{$1:null,$2$31f:null,$3:null,$4$31f:null,$5:null},"
+ "_:{$7:null,$8:null,$9$31e:null,$10:null}}";
+ }
+ virtual int expectedSeparator() const {
+ return 4;
+ }
+};
+
+template <class OnDiskFormat>
+class EvenRebalanceRight : public RebalancedSeparatorBase<OnDiskFormat> {
+ virtual string treeSpec() const {
+ return "{$3:{$1:null,$2$31f:null},_:{$4$31f:null,$5:null,$6:null,$7:null,$8$31e:null,$9:"
+ "null,$10:null}}";
+ }
+ virtual int expectedSeparator() const {
+ return 4;
+ }
+};
+
+template <class OnDiskFormat>
+class EvenRebalanceRightCusp : public RebalancedSeparatorBase<OnDiskFormat> {
+ virtual string treeSpec() const {
+ return "{$4$31f:{$1:null,$2$31f:null,$3:null},_:{$5:null,$6:null,$7$31e:null,$8:null,$9:"
+ "null,$10:null}}";
+ }
+ virtual int expectedSeparator() const {
+ return 4;
+ }
+};
+
+template <class OnDiskFormat>
+class EvenRebalanceCenter : public RebalancedSeparatorBase<OnDiskFormat> {
+ virtual string treeSpec() const {
+ return "{$5:{$1:null,$2$31f:null,$3:null,$4$31f:null},_:{$6:null,$7$31e:null,$8:null,$9:"
+ "null,$10:null}}";
+ }
+ virtual int expectedSeparator() const {
+ return 4;
+ }
+};
+
+template <class OnDiskFormat>
+class OddRebalanceLeft : public RebalancedSeparatorBase<OnDiskFormat> {
+ virtual string treeSpec() const {
+ return "{$6$31f:{$1:null,$2:null,$3:null,$4:null,$5:null},_:{$7:null,$8:null,$9:null,$10:"
+ "null}}";
+ }
+ virtual int expectedSeparator() const {
+ return 4;
+ }
+};
+
+template <class OnDiskFormat>
+class OddRebalanceRight : public RebalancedSeparatorBase<OnDiskFormat> {
+ virtual string treeSpec() const {
+ return "{$4:{$1:null,$2:null,$3:null},_:{$5:null,$6:null,$7:null,$8$31f:null,$9:null,$10:"
+ "null}}";
+ }
+ virtual int expectedSeparator() const {
+ return 4;
+ }
+};
+
+template <class OnDiskFormat>
+class OddRebalanceCenter : public RebalancedSeparatorBase<OnDiskFormat> {
+ virtual string treeSpec() const {
+ return "{$5:{$1:null,$2:null,$3:null,$4:null},_:{$6:null,$7:null,$8:null,$9:null,$10$31f:"
+ "null}}";
+ }
+ virtual int expectedSeparator() const {
+ return 4;
+ }
+};
+
+template <class OnDiskFormat>
+class RebalanceEmptyRight : public RebalancedSeparatorBase<OnDiskFormat> {
+ virtual string treeSpec() const {
+ return "{$a:{$1:null,$2:null,$3:null,$4:null,$5:null,$6:null,$7:null,$8:null,$9:null},_:{$"
+ "b:null}}";
+ }
+ virtual void modTree() {
+ BSONObj k = BSON("" << bigNumString(0xb, 800));
+ ASSERT(this->unindex(k));
+ }
+ virtual int expectedSeparator() const {
+ return 4;
+ }
+};
+
+template <class OnDiskFormat>
+class RebalanceEmptyLeft : public RebalancedSeparatorBase<OnDiskFormat> {
+ virtual string treeSpec() const {
+ return "{$a:{$1:null},_:{$11:null,$12:null,$13:null,$14:null,$15:null,$16:null,$17:null,$"
+ "18:null,$19:null}}";
+ }
+ virtual void modTree() {
+ BSONObj k = BSON("" << bigNumString(0x1, 800));
+ ASSERT(this->unindex(k));
+ }
+ virtual int expectedSeparator() const {
+ return 4;
+ }
+};
+
+template <class OnDiskFormat>
+class NoMoveAtLowWaterMarkRight : public MergeSizeJustRightRight<OnDiskFormat> {
+ virtual int rightSize() const {
+ return MergeSizeJustRightRight<OnDiskFormat>::rightSize() + 1;
+ }
+
+ virtual void initCheck() {
+ OperationContextNoop txn;
+ _oldTop = this->getKey(this->_helper.headManager.getHead(&txn), 0).data.toBson();
+ }
+
+ virtual void validate() {
+ OperationContextNoop txn;
+ ASSERT_EQUALS(_oldTop,
+ this->getKey(this->_helper.headManager.getHead(&txn), 0).data.toBson());
+ }
+
+ virtual bool merge() const {
+ return false;
+ }
+
+protected:
+ BSONObj _oldTop;
+};
+
+template <class OnDiskFormat>
+class MoveBelowLowWaterMarkRight : public NoMoveAtLowWaterMarkRight<OnDiskFormat> {
+ virtual int rightSize() const {
+ return MergeSizeJustRightRight<OnDiskFormat>::rightSize();
+ }
+ virtual int leftSize() const {
+ return MergeSizeJustRightRight<OnDiskFormat>::leftSize() + 1;
+ }
+
+ virtual void validate() {
+ OperationContextNoop txn;
+ // Different top means we rebalanced
+ ASSERT_NOT_EQUALS(this->_oldTop,
+ this->getKey(this->_helper.headManager.getHead(&txn), 0).data.toBson());
+ }
+};
+
+template <class OnDiskFormat>
+class NoMoveAtLowWaterMarkLeft : public MergeSizeJustRightLeft<OnDiskFormat> {
+ virtual int leftSize() const {
+ return MergeSizeJustRightLeft<OnDiskFormat>::leftSize() + 1;
+ }
+ virtual void initCheck() {
+ OperationContextNoop txn;
+ this->_oldTop = this->getKey(this->_helper.headManager.getHead(&txn), 0).data.toBson();
+ }
+
+ virtual void validate() {
+ OperationContextNoop txn;
+ ASSERT_EQUALS(this->_oldTop,
+ this->getKey(this->_helper.headManager.getHead(&txn), 0).data.toBson());
+ }
+ virtual bool merge() const {
+ return false;
+ }
+
+protected:
+ BSONObj _oldTop;
+};
+
+template <class OnDiskFormat>
+class MoveBelowLowWaterMarkLeft : public NoMoveAtLowWaterMarkLeft<OnDiskFormat> {
+ virtual int leftSize() const {
+ return MergeSizeJustRightLeft<OnDiskFormat>::leftSize();
+ }
+ virtual int rightSize() const {
+ return MergeSizeJustRightLeft<OnDiskFormat>::rightSize() + 1;
+ }
+
+ virtual void validate() {
+ OperationContextNoop txn;
+ // Different top means we rebalanced
+ ASSERT_NOT_EQUALS(this->_oldTop,
+ this->getKey(this->_helper.headManager.getHead(&txn), 0).data.toBson());
+ }
+};
- const BSONObj k = BSON("" << bigNumString(0x7, 800));
- ASSERT(this->unindex(k));
+template <class OnDiskFormat>
+class PreferBalanceLeft : public BtreeLogicTestBase<OnDiskFormat> {
+public:
+ void run() {
+ OperationContextNoop txn;
+ ArtificialTreeBuilder<OnDiskFormat> builder(&txn, &this->_helper);
- ASSERT_EQUALS(14, this->_helper.btree.fullValidate(&txn, NULL, true, false, 0));
+ builder.makeTree(
+ "{$10:{$1:null,$2:null,$3:null,$4:null,$5:null,$6:null},"
+ "$20:{$11:null,$12:null,$13:null,$14:null},"
+ "_:{$30:null}}");
- // The tree has 4 buckets + 1 for the this->_helper.dummyDiskLoc
- ASSERT_EQUALS(5, this->_helper.recordStore.numRecords(NULL));
+ ASSERT_EQUALS(13, this->_helper.btree.fullValidate(&txn, NULL, true, false, 0));
- builder.checkStructure(
- "{$40:{$8:{$1:null,$2:null,$5:null,$6:null},$10:null,$20:null,$30:null},"
- "_:{$50:null,$60:null,$70:null,$80:null,$90:null}}");
- }
- };
+ // The tree has 4 buckets + 1 for the this->_helper.dummyDiskLoc
+ ASSERT_EQUALS(5, this->_helper.recordStore.numRecords(NULL));
- template<class OnDiskFormat>
- class DelEmptyNoNeighbors : public BtreeLogicTestBase<OnDiskFormat> {
- public:
- void run() {
- OperationContextNoop txn;
- ArtificialTreeBuilder<OnDiskFormat> builder(&txn, &this->_helper);
+ const BSONObj k = BSON("" << bigNumString(0x12, 800));
+ ASSERT(this->unindex(k));
- builder.makeTree("{b:{a:null}}");
+ ASSERT_EQUALS(12, this->_helper.btree.fullValidate(&txn, NULL, true, false, 0));
- ASSERT_EQUALS(2, this->_helper.btree.fullValidate(&txn, NULL, true, false, 0));
+ // The tree has 4 buckets + 1 for the this->_helper.dummyDiskLoc
+ ASSERT_EQUALS(5, this->_helper.recordStore.numRecords(NULL));
- // The tree has 2 buckets + 1 for the this->_helper.dummyDiskLoc
- ASSERT_EQUALS(3, this->_helper.recordStore.numRecords(NULL));
+ builder.checkStructure(
+ "{$5:{$1:null,$2:null,$3:null,$4:null},"
+ "$20:{$6:null,$10:null,$11:null,$13:null,$14:null},"
+ "_:{$30:null}}");
+ }
+};
- const BSONObj k = BSON("" << "a");
- ASSERT(this->unindex(k));
+template <class OnDiskFormat>
+class PreferBalanceRight : public BtreeLogicTestBase<OnDiskFormat> {
+public:
+ void run() {
+ OperationContextNoop txn;
+ ArtificialTreeBuilder<OnDiskFormat> builder(&txn, &this->_helper);
- ASSERT_EQUALS(1, this->_helper.btree.fullValidate(&txn, NULL, true, false, 0));
+ builder.makeTree(
+ "{$10:{$1:null},"
+ "$20:{$11:null,$12:null,$13:null,$14:null},"
+ "_:{$31:null,$32:null,$33:null,$34:null,$35:null,$36:null}}");
- // The tree has 1 bucket + 1 for the this->_helper.dummyDiskLoc
- ASSERT_EQUALS(2, this->_helper.recordStore.numRecords(NULL));
+ ASSERT_EQUALS(13, this->_helper.btree.fullValidate(&txn, NULL, true, false, 0));
- builder.checkStructure("{b:null}");
- }
- };
+ // The tree has 4 buckets + 1 for the this->_helper.dummyDiskLoc
+ ASSERT_EQUALS(5, this->_helper.recordStore.numRecords(NULL));
- template<class OnDiskFormat>
- class DelEmptyEmptyNeighbors : public BtreeLogicTestBase<OnDiskFormat> {
- public:
- void run() {
- OperationContextNoop txn;
- ArtificialTreeBuilder<OnDiskFormat> builder(&txn, &this->_helper);
+ const BSONObj k = BSON("" << bigNumString(0x12, 800));
+ ASSERT(this->unindex(k));
- builder.makeTree("{a:null,c:{b:null},d:null}");
+ ASSERT_EQUALS(12, this->_helper.btree.fullValidate(&txn, NULL, true, false, 0));
- ASSERT_EQUALS(4, this->_helper.btree.fullValidate(&txn, NULL, true, false, 0));
+ // The tree has 4 buckets + 1 for the this->_helper.dummyDiskLoc
+ ASSERT_EQUALS(5, this->_helper.recordStore.numRecords(NULL));
- // The tree has 2 buckets + 1 for the this->_helper.dummyDiskLoc
- ASSERT_EQUALS(3, this->_helper.recordStore.numRecords(NULL));
+ builder.checkStructure(
+ "{$10:{$1:null},"
+ "$31:{$11:null,$13:null,$14:null,$20:null},"
+ "_:{$32:null,$33:null,$34:null,$35:null,$36:null}}");
+ }
+};
- const BSONObj k = BSON("" << "b");
- ASSERT(this->unindex(k));
+template <class OnDiskFormat>
+class RecursiveMergeThenBalance : public BtreeLogicTestBase<OnDiskFormat> {
+public:
+ void run() {
+ OperationContextNoop txn;
+ ArtificialTreeBuilder<OnDiskFormat> builder(&txn, &this->_helper);
- ASSERT_EQUALS(3, this->_helper.btree.fullValidate(&txn, NULL, true, false, 0));
+ builder.makeTree(
+ "{$10:{$5:{$1:null,$2:null},$8:{$6:null,$7:null}},"
+ "_:{$20:null,$30:null,$40:null,$50:null,"
+ "$60:null,$70:null,$80:null,$90:null}}");
- // The tree has 1 bucket + 1 for the this->_helper.dummyDiskLoc
- ASSERT_EQUALS(2, this->_helper.recordStore.numRecords(NULL));
+ ASSERT_EQUALS(15, this->_helper.btree.fullValidate(&txn, NULL, true, false, 0));
- builder.checkStructure("{a:null,c:null,d:null}");
- }
- };
+ // The tree has 5 buckets + 1 for the this->_helper.dummyDiskLoc
+ ASSERT_EQUALS(6, this->_helper.recordStore.numRecords(NULL));
- template<class OnDiskFormat>
- class DelInternal : public BtreeLogicTestBase<OnDiskFormat> {
- public:
- void run() {
- OperationContextNoop txn;
- ArtificialTreeBuilder<OnDiskFormat> builder(&txn, &this->_helper);
+ const BSONObj k = BSON("" << bigNumString(0x7, 800));
+ ASSERT(this->unindex(k));
- builder.makeTree("{a:null,c:{b:null},d:null}");
+ ASSERT_EQUALS(14, this->_helper.btree.fullValidate(&txn, NULL, true, false, 0));
- long long unused = 0;
- ASSERT_EQUALS(4, this->_helper.btree.fullValidate(&txn, &unused, true, false, 0));
+ // The tree has 4 buckets + 1 for the this->_helper.dummyDiskLoc
+ ASSERT_EQUALS(5, this->_helper.recordStore.numRecords(NULL));
- // The tree has 2 buckets + 1 for the this->_helper.dummyDiskLoc
- ASSERT_EQUALS(3, this->_helper.recordStore.numRecords(NULL));
- ASSERT_EQUALS(0, unused);
+ builder.checkStructure(
+ "{$40:{$8:{$1:null,$2:null,$5:null,$6:null},$10:null,$20:null,$30:null},"
+ "_:{$50:null,$60:null,$70:null,$80:null,$90:null}}");
+ }
+};
- const BSONObj k = BSON("" << "c");
- ASSERT(this->unindex(k));
+template <class OnDiskFormat>
+class DelEmptyNoNeighbors : public BtreeLogicTestBase<OnDiskFormat> {
+public:
+ void run() {
+ OperationContextNoop txn;
+ ArtificialTreeBuilder<OnDiskFormat> builder(&txn, &this->_helper);
- ASSERT_EQUALS(3, this->_helper.btree.fullValidate(&txn, &unused, true, false, 0));
+ builder.makeTree("{b:{a:null}}");
- // The tree has 1 bucket + 1 for the this->_helper.dummyDiskLoc
- ASSERT_EQUALS(2, this->_helper.recordStore.numRecords(NULL));
- ASSERT_EQUALS(0, unused);
+ ASSERT_EQUALS(2, this->_helper.btree.fullValidate(&txn, NULL, true, false, 0));
- builder.checkStructure("{a:null,b:null,d:null}");
- }
- };
+ // The tree has 2 buckets + 1 for the this->_helper.dummyDiskLoc
+ ASSERT_EQUALS(3, this->_helper.recordStore.numRecords(NULL));
- template<class OnDiskFormat>
- class DelInternalReplaceWithUnused : public BtreeLogicTestBase<OnDiskFormat> {
- public:
- void run() {
- OperationContextNoop txn;
- ArtificialTreeBuilder<OnDiskFormat> builder(&txn, &this->_helper);
+ const BSONObj k = BSON(""
+ << "a");
+ ASSERT(this->unindex(k));
- builder.makeTree("{a:null,c:{b:null},d:null}");
+ ASSERT_EQUALS(1, this->_helper.btree.fullValidate(&txn, NULL, true, false, 0));
- const DiskLoc prevChildBucket =
- this->getKey(this->_helper.headManager.getHead(&txn), 1).prevChildBucket;
- this->markKeyUnused(prevChildBucket, 0);
+ // The tree has 1 bucket + 1 for the this->_helper.dummyDiskLoc
+ ASSERT_EQUALS(2, this->_helper.recordStore.numRecords(NULL));
- long long unused = 0;
- ASSERT_EQUALS(3, this->_helper.btree.fullValidate(&txn, &unused, true, false, 0));
+ builder.checkStructure("{b:null}");
+ }
+};
- // The tree has 2 buckets + 1 for the this->_helper.dummyDiskLoc
- ASSERT_EQUALS(3, this->_helper.recordStore.numRecords(NULL));
- ASSERT_EQUALS(1, unused);
+template <class OnDiskFormat>
+class DelEmptyEmptyNeighbors : public BtreeLogicTestBase<OnDiskFormat> {
+public:
+ void run() {
+ OperationContextNoop txn;
+ ArtificialTreeBuilder<OnDiskFormat> builder(&txn, &this->_helper);
- const BSONObj k = BSON("" << "c");
- ASSERT(this->unindex(k));
+ builder.makeTree("{a:null,c:{b:null},d:null}");
- unused = 0;
- ASSERT_EQUALS(2, this->_helper.btree.fullValidate(&txn, &unused, true, false, 0));
+ ASSERT_EQUALS(4, this->_helper.btree.fullValidate(&txn, NULL, true, false, 0));
- // The tree has 1 bucket + 1 for the this->_helper.dummyDiskLoc
- ASSERT_EQUALS(2, this->_helper.recordStore.numRecords(NULL));
- ASSERT_EQUALS(1, unused);
+ // The tree has 2 buckets + 1 for the this->_helper.dummyDiskLoc
+ ASSERT_EQUALS(3, this->_helper.recordStore.numRecords(NULL));
- // doesn't discriminate between used and unused
- builder.checkStructure("{a:null,b:null,d:null}");
- }
- };
+ const BSONObj k = BSON(""
+ << "b");
+ ASSERT(this->unindex(k));
- template<class OnDiskFormat>
- class DelInternalReplaceRight : public BtreeLogicTestBase<OnDiskFormat> {
- public:
- void run() {
- OperationContextNoop txn;
- ArtificialTreeBuilder<OnDiskFormat> builder(&txn, &this->_helper);
+ ASSERT_EQUALS(3, this->_helper.btree.fullValidate(&txn, NULL, true, false, 0));
- builder.makeTree("{a:null,_:{b:null}}");
+ // The tree has 1 bucket + 1 for the this->_helper.dummyDiskLoc
+ ASSERT_EQUALS(2, this->_helper.recordStore.numRecords(NULL));
- long long unused = 0;
- ASSERT_EQUALS(2, this->_helper.btree.fullValidate(&txn, &unused, true, false, 0));
+ builder.checkStructure("{a:null,c:null,d:null}");
+ }
+};
- // The tree has 2 buckets + 1 for the this->_helper.dummyDiskLoc
- ASSERT_EQUALS(3, this->_helper.recordStore.numRecords(NULL));
- ASSERT_EQUALS(0, unused);
+template <class OnDiskFormat>
+class DelInternal : public BtreeLogicTestBase<OnDiskFormat> {
+public:
+ void run() {
+ OperationContextNoop txn;
+ ArtificialTreeBuilder<OnDiskFormat> builder(&txn, &this->_helper);
- const BSONObj k = BSON("" << "a");
- ASSERT(this->unindex(k));
+ builder.makeTree("{a:null,c:{b:null},d:null}");
- unused = 0;
- ASSERT_EQUALS(1, this->_helper.btree.fullValidate(&txn, &unused, true, false, 0));
+ long long unused = 0;
+ ASSERT_EQUALS(4, this->_helper.btree.fullValidate(&txn, &unused, true, false, 0));
- // The tree has 1 bucket + 1 for the this->_helper.dummyDiskLoc
- ASSERT_EQUALS(2, this->_helper.recordStore.numRecords(NULL));
- ASSERT_EQUALS(0, unused);
+ // The tree has 2 buckets + 1 for the this->_helper.dummyDiskLoc
+ ASSERT_EQUALS(3, this->_helper.recordStore.numRecords(NULL));
+ ASSERT_EQUALS(0, unused);
- builder.checkStructure("{b:null}");
- }
- };
+ const BSONObj k = BSON(""
+ << "c");
+ ASSERT(this->unindex(k));
- template<class OnDiskFormat>
- class DelInternalPromoteKey : public BtreeLogicTestBase<OnDiskFormat> {
- public:
- void run() {
- OperationContextNoop txn;
- ArtificialTreeBuilder<OnDiskFormat> builder(&txn, &this->_helper);
+ ASSERT_EQUALS(3, this->_helper.btree.fullValidate(&txn, &unused, true, false, 0));
- builder.makeTree("{a:null,y:{d:{c:{b:null}},_:{e:null}},z:null}");
+ // The tree has 1 bucket + 1 for the this->_helper.dummyDiskLoc
+ ASSERT_EQUALS(2, this->_helper.recordStore.numRecords(NULL));
+ ASSERT_EQUALS(0, unused);
- long long unused = 0;
- ASSERT_EQUALS(7, this->_helper.btree.fullValidate(&txn, &unused, true, false, 0));
+ builder.checkStructure("{a:null,b:null,d:null}");
+ }
+};
- // The tree has 5 buckets + 1 for the this->_helper.dummyDiskLoc
- ASSERT_EQUALS(6, this->_helper.recordStore.numRecords(NULL));
- ASSERT_EQUALS(0, unused);
+template <class OnDiskFormat>
+class DelInternalReplaceWithUnused : public BtreeLogicTestBase<OnDiskFormat> {
+public:
+ void run() {
+ OperationContextNoop txn;
+ ArtificialTreeBuilder<OnDiskFormat> builder(&txn, &this->_helper);
- const BSONObj k = BSON("" << "y");
- ASSERT(this->unindex(k));
+ builder.makeTree("{a:null,c:{b:null},d:null}");
- unused = 0;
- ASSERT_EQUALS(6, this->_helper.btree.fullValidate(&txn, &unused, true, false, 0));
+ const DiskLoc prevChildBucket =
+ this->getKey(this->_helper.headManager.getHead(&txn), 1).prevChildBucket;
+ this->markKeyUnused(prevChildBucket, 0);
- // The tree has 3 buckets + 1 for the this->_helper.dummyDiskLoc
- ASSERT_EQUALS(4, this->_helper.recordStore.numRecords(NULL));
- ASSERT_EQUALS(0, unused);
+ long long unused = 0;
+ ASSERT_EQUALS(3, this->_helper.btree.fullValidate(&txn, &unused, true, false, 0));
- builder.checkStructure("{a:null,e:{c:{b:null},d:null},z:null}");
- }
- };
+ // The tree has 2 buckets + 1 for the this->_helper.dummyDiskLoc
+ ASSERT_EQUALS(3, this->_helper.recordStore.numRecords(NULL));
+ ASSERT_EQUALS(1, unused);
- template<class OnDiskFormat>
- class DelInternalPromoteRightKey : public BtreeLogicTestBase<OnDiskFormat> {
- public:
- void run() {
- OperationContextNoop txn;
- ArtificialTreeBuilder<OnDiskFormat> builder(&txn, &this->_helper);
+ const BSONObj k = BSON(""
+ << "c");
+ ASSERT(this->unindex(k));
- builder.makeTree("{a:null,_:{e:{c:null},_:{f:null}}}");
+ unused = 0;
+ ASSERT_EQUALS(2, this->_helper.btree.fullValidate(&txn, &unused, true, false, 0));
- long long unused = 0;
- ASSERT_EQUALS(4, this->_helper.btree.fullValidate(&txn, &unused, true, false, 0));
+ // The tree has 1 bucket + 1 for the this->_helper.dummyDiskLoc
+ ASSERT_EQUALS(2, this->_helper.recordStore.numRecords(NULL));
+ ASSERT_EQUALS(1, unused);
- // The tree has 4 buckets + 1 for the this->_helper.dummyDiskLoc
- ASSERT_EQUALS(5, this->_helper.recordStore.numRecords(NULL));
- ASSERT_EQUALS(0, unused);
+ // doesn't discriminate between used and unused
+ builder.checkStructure("{a:null,b:null,d:null}");
+ }
+};
- const BSONObj k = BSON("" << "a");
- ASSERT(this->unindex(k));
+template <class OnDiskFormat>
+class DelInternalReplaceRight : public BtreeLogicTestBase<OnDiskFormat> {
+public:
+ void run() {
+ OperationContextNoop txn;
+ ArtificialTreeBuilder<OnDiskFormat> builder(&txn, &this->_helper);
- unused = 0;
- ASSERT_EQUALS(3, this->_helper.btree.fullValidate(&txn, &unused, true, false, 0));
+ builder.makeTree("{a:null,_:{b:null}}");
- // The tree has 2 buckets + 1 for the this->_helper.dummyDiskLoc
- ASSERT_EQUALS(3, this->_helper.recordStore.numRecords(NULL));
- ASSERT_EQUALS(0, unused);
+ long long unused = 0;
+ ASSERT_EQUALS(2, this->_helper.btree.fullValidate(&txn, &unused, true, false, 0));
- builder.checkStructure("{c:null,_:{e:null,f:null}}");
- }
- };
+ // The tree has 2 buckets + 1 for the this->_helper.dummyDiskLoc
+ ASSERT_EQUALS(3, this->_helper.recordStore.numRecords(NULL));
+ ASSERT_EQUALS(0, unused);
- template<class OnDiskFormat>
- class DelInternalReplacementPrevNonNull : public BtreeLogicTestBase<OnDiskFormat> {
- public:
- void run() {
- OperationContextNoop txn;
- ArtificialTreeBuilder<OnDiskFormat> builder(&txn, &this->_helper);
+ const BSONObj k = BSON(""
+ << "a");
+ ASSERT(this->unindex(k));
- builder.makeTree("{a:null,d:{c:{b:null}},e:null}");
+ unused = 0;
+ ASSERT_EQUALS(1, this->_helper.btree.fullValidate(&txn, &unused, true, false, 0));
- long long unused = 0;
- ASSERT_EQUALS(5, this->_helper.btree.fullValidate(&txn, &unused, true, false, 0));
+ // The tree has 1 bucket + 1 for the this->_helper.dummyDiskLoc
+ ASSERT_EQUALS(2, this->_helper.recordStore.numRecords(NULL));
+ ASSERT_EQUALS(0, unused);
- // The tree has 3 buckets + 1 for the this->_helper.dummyDiskLoc
- ASSERT_EQUALS(4, this->_helper.recordStore.numRecords(NULL));
- ASSERT_EQUALS(0, unused);
+ builder.checkStructure("{b:null}");
+ }
+};
- const BSONObj k = BSON("" << "d");
- ASSERT(this->unindex(k));
+template <class OnDiskFormat>
+class DelInternalPromoteKey : public BtreeLogicTestBase<OnDiskFormat> {
+public:
+ void run() {
+ OperationContextNoop txn;
+ ArtificialTreeBuilder<OnDiskFormat> builder(&txn, &this->_helper);
- ASSERT_EQUALS(4, this->_helper.btree.fullValidate(&txn, &unused, true, false, 0));
+ builder.makeTree("{a:null,y:{d:{c:{b:null}},_:{e:null}},z:null}");
- // The tree has 3 buckets + 1 for the this->_helper.dummyDiskLoc
- ASSERT_EQUALS(4, this->_helper.recordStore.numRecords(NULL));
- ASSERT_EQUALS(1, unused);
+ long long unused = 0;
+ ASSERT_EQUALS(7, this->_helper.btree.fullValidate(&txn, &unused, true, false, 0));
- builder.checkStructure("{a:null,d:{c:{b:null}},e:null}");
+ // The tree has 5 buckets + 1 for the this->_helper.dummyDiskLoc
+ ASSERT_EQUALS(6, this->_helper.recordStore.numRecords(NULL));
+ ASSERT_EQUALS(0, unused);
- // Check 'unused' key
- ASSERT(this->getKey(this->_helper.headManager.getHead(&txn), 1).recordLoc.getOfs() & 1);
- }
- };
+ const BSONObj k = BSON(""
+ << "y");
+ ASSERT(this->unindex(k));
- template<class OnDiskFormat>
- class DelInternalReplacementNextNonNull : public BtreeLogicTestBase<OnDiskFormat> {
- public:
- void run() {
- OperationContextNoop txn;
- ArtificialTreeBuilder<OnDiskFormat> builder(&txn, &this->_helper);
+ unused = 0;
+ ASSERT_EQUALS(6, this->_helper.btree.fullValidate(&txn, &unused, true, false, 0));
- builder.makeTree("{a:null,_:{c:null,_:{d:null}}}");
+ // The tree has 3 buckets + 1 for the this->_helper.dummyDiskLoc
+ ASSERT_EQUALS(4, this->_helper.recordStore.numRecords(NULL));
+ ASSERT_EQUALS(0, unused);
- long long unused = 0;
- ASSERT_EQUALS(3, this->_helper.btree.fullValidate(&txn, &unused, true, false, 0));
+ builder.checkStructure("{a:null,e:{c:{b:null},d:null},z:null}");
+ }
+};
- // The tree has 3 buckets + 1 for the this->_helper.dummyDiskLoc
- ASSERT_EQUALS(4, this->_helper.recordStore.numRecords(NULL));
- ASSERT_EQUALS(0, unused);
-
- const BSONObj k = BSON("" << "a");
- ASSERT(this->unindex(k));
-
- ASSERT_EQUALS(2, this->_helper.btree.fullValidate(&txn, &unused, true, false, 0));
-
- // The tree has 3 buckets + 1 for the this->_helper.dummyDiskLoc
- ASSERT_EQUALS(4, this->_helper.recordStore.numRecords(NULL));
- ASSERT_EQUALS(1, unused);
-
- builder.checkStructure("{a:null,_:{c:null,_:{d:null}}}");
+template <class OnDiskFormat>
+class DelInternalPromoteRightKey : public BtreeLogicTestBase<OnDiskFormat> {
+public:
+ void run() {
+ OperationContextNoop txn;
+ ArtificialTreeBuilder<OnDiskFormat> builder(&txn, &this->_helper);
- // Check 'unused' key
- ASSERT(this->getKey(this->_helper.headManager.getHead(&txn), 0).recordLoc.getOfs() & 1);
- }
- };
+ builder.makeTree("{a:null,_:{e:{c:null},_:{f:null}}}");
- template<class OnDiskFormat>
- class DelInternalSplitPromoteLeft : public BtreeLogicTestBase<OnDiskFormat> {
- public:
- void run() {
- OperationContextNoop txn;
- ArtificialTreeBuilder<OnDiskFormat> builder(&txn, &this->_helper);
+ long long unused = 0;
+ ASSERT_EQUALS(4, this->_helper.btree.fullValidate(&txn, &unused, true, false, 0));
- builder.makeTree("{$10:null,$20:null,$30$10:{$25:{$23:null},_:{$27:null}},"
- "$40:null,$50:null,$60:null,$70:null,$80:null,$90:null,$100:null}");
+ // The tree has 4 buckets + 1 for the this->_helper.dummyDiskLoc
+ ASSERT_EQUALS(5, this->_helper.recordStore.numRecords(NULL));
+ ASSERT_EQUALS(0, unused);
- long long unused = 0;
- ASSERT_EQUALS(13, this->_helper.btree.fullValidate(&txn, &unused, true, false, 0));
+ const BSONObj k = BSON(""
+ << "a");
+ ASSERT(this->unindex(k));
- // The tree has 4 buckets + 1 for the this->_helper.dummyDiskLoc
- ASSERT_EQUALS(5, this->_helper.recordStore.numRecords(NULL));
- ASSERT_EQUALS(0, unused);
-
- const BSONObj k = BSON("" << bigNumString(0x30, 0x10));
- ASSERT(this->unindex(k));
-
- ASSERT_EQUALS(12, this->_helper.btree.fullValidate(&txn, &unused, true, false, 0));
-
- // The tree has 4 buckets + 1 for the this->_helper.dummyDiskLoc
- ASSERT_EQUALS(5, this->_helper.recordStore.numRecords(NULL));
- ASSERT_EQUALS(0, unused);
+ unused = 0;
+ ASSERT_EQUALS(3, this->_helper.btree.fullValidate(&txn, &unused, true, false, 0));
- builder.checkStructure("{$60:{$10:null,$20:null,"
- "$27:{$23:null,$25:null},$40:null,$50:null},"
- "_:{$70:null,$80:null,$90:null,$100:null}}");
- }
- };
+ // The tree has 2 buckets + 1 for the this->_helper.dummyDiskLoc
+ ASSERT_EQUALS(3, this->_helper.recordStore.numRecords(NULL));
+ ASSERT_EQUALS(0, unused);
- template<class OnDiskFormat>
- class DelInternalSplitPromoteRight : public BtreeLogicTestBase<OnDiskFormat> {
- public:
- void run() {
- OperationContextNoop txn;
- ArtificialTreeBuilder<OnDiskFormat> builder(&txn, &this->_helper);
+ builder.checkStructure("{c:null,_:{e:null,f:null}}");
+ }
+};
- builder.makeTree("{$10:null,$20:null,$30:null,$40:null,$50:null,$60:null,$70:null,"
- "$80:null,$90:null,$100$10:{$95:{$93:null},_:{$97:null}}}");
+template <class OnDiskFormat>
+class DelInternalReplacementPrevNonNull : public BtreeLogicTestBase<OnDiskFormat> {
+public:
+ void run() {
+ OperationContextNoop txn;
+ ArtificialTreeBuilder<OnDiskFormat> builder(&txn, &this->_helper);
- long long unused = 0;
- ASSERT_EQUALS(13, this->_helper.btree.fullValidate(&txn, &unused, true, false, 0));
+ builder.makeTree("{a:null,d:{c:{b:null}},e:null}");
- // The tree has 4 buckets + 1 for the this->_helper.dummyDiskLoc
- ASSERT_EQUALS(5, this->_helper.recordStore.numRecords(NULL));
- ASSERT_EQUALS(0, unused);
+ long long unused = 0;
+ ASSERT_EQUALS(5, this->_helper.btree.fullValidate(&txn, &unused, true, false, 0));
- const BSONObj k = BSON("" << bigNumString(0x100, 0x10));
- ASSERT(this->unindex(k));
+ // The tree has 3 buckets + 1 for the this->_helper.dummyDiskLoc
+ ASSERT_EQUALS(4, this->_helper.recordStore.numRecords(NULL));
+ ASSERT_EQUALS(0, unused);
- ASSERT_EQUALS(12, this->_helper.btree.fullValidate(&txn, &unused, true, false, 0));
+ const BSONObj k = BSON(""
+ << "d");
+ ASSERT(this->unindex(k));
- // The tree has 4 buckets + 1 for the this->_helper.dummyDiskLoc
- ASSERT_EQUALS(5, this->_helper.recordStore.numRecords(NULL));
- ASSERT_EQUALS(0, unused);
+ ASSERT_EQUALS(4, this->_helper.btree.fullValidate(&txn, &unused, true, false, 0));
- builder.checkStructure(
- "{$80:{$10:null,$20:null,$30:null,$40:null,$50:null,$60:null,$70:null},"
- "_:{$90:null,$97:{$93:null,$95:null}}}");
- }
- };
+ // The tree has 3 buckets + 1 for the this->_helper.dummyDiskLoc
+ ASSERT_EQUALS(4, this->_helper.recordStore.numRecords(NULL));
+ ASSERT_EQUALS(1, unused);
- template<class OnDiskFormat>
- class LocateEmptyForward : public BtreeLogicTestBase<OnDiskFormat> {
- public:
- void run() {
- OperationContextNoop txn;
- this->_helper.btree.initAsEmpty(&txn);
-
- BSONObj key1 = simpleKey('a');
- this->insert(key1, this->_helper.dummyDiskLoc);
- BSONObj key2 = simpleKey('b');
- this->insert(key2, this->_helper.dummyDiskLoc);
- BSONObj key3 = simpleKey('c');
- this->insert(key3, this->_helper.dummyDiskLoc);
-
- this->checkValidNumKeys(3);
- this->locate(BSONObj(), 0, false, this->_helper.headManager.getHead(&txn), 1);
- }
- };
+ builder.checkStructure("{a:null,d:{c:{b:null}},e:null}");
- template<class OnDiskFormat>
- class LocateEmptyReverse : public BtreeLogicTestBase<OnDiskFormat> {
- public:
- void run() {
- OperationContextNoop txn;
- this->_helper.btree.initAsEmpty(&txn);
-
- BSONObj key1 = simpleKey('a');
- this->insert(key1, this->_helper.dummyDiskLoc);
- BSONObj key2 = simpleKey('b');
- this->insert(key2, this->_helper.dummyDiskLoc);
- BSONObj key3 = simpleKey('c');
- this->insert(key3, this->_helper.dummyDiskLoc);
-
- this->checkValidNumKeys(3);
- this->locate(BSONObj(), -1, false, DiskLoc(), -1);
- }
- };
+ // Check 'unused' key
+ ASSERT(this->getKey(this->_helper.headManager.getHead(&txn), 1).recordLoc.getOfs() & 1);
+ }
+};
- template<class OnDiskFormat>
- class DuplicateKeys : public BtreeLogicTestBase<OnDiskFormat> {
- public:
- void run() {
- OperationContextNoop txn;
- this->_helper.btree.initAsEmpty(&txn);
-
- BSONObj key1 = simpleKey('z');
- ASSERT_OK(this->insert(key1, this->_helper.dummyDiskLoc, true));
- this->checkValidNumKeys(1);
- this->locate(key1, 0, true, this->_helper.headManager.getHead(&txn), 1);
-
- // Attempt to insert a dup key/value.
- ASSERT_EQUALS(ErrorCodes::DuplicateKeyValue,
- this->insert(key1, this->_helper.dummyDiskLoc, true));
- this->checkValidNumKeys(1);
- this->locate(key1, 0, true, this->_helper.headManager.getHead(&txn), 1);
-
- // Attempt to insert a dup key/value with dupsAllowed=false.
- ASSERT_EQUALS(ErrorCodes::DuplicateKeyValue,
- this->insert(key1, this->_helper.dummyDiskLoc, false));
- this->checkValidNumKeys(1);
- this->locate(key1, 0, true, this->_helper.headManager.getHead(&txn), 1);
-
- // Add another record to produce another diskloc.
- StatusWith<RecordId> s = this->_helper.recordStore.insertRecord(&txn, "a", 1, false);
-
- ASSERT_TRUE(s.isOK());
- ASSERT_EQUALS(3, this->_helper.recordStore.numRecords(NULL));
-
- const DiskLoc dummyDiskLoc2 = DiskLoc::fromRecordId(s.getValue());
-
- // Attempt to insert a dup key but this time with a different value.
- ASSERT_EQUALS(ErrorCodes::DuplicateKey, this->insert(key1, dummyDiskLoc2, false));
- this->checkValidNumKeys(1);
-
- // Insert a dup key with dupsAllowed=true, should succeed.
- ASSERT_OK(this->insert(key1, dummyDiskLoc2, true));
- this->checkValidNumKeys(2);
-
- // Clean up.
- this->_helper.recordStore.deleteRecord(&txn, s.getValue());
- ASSERT_EQUALS(2, this->_helper.recordStore.numRecords(NULL));
- }
- };
+template <class OnDiskFormat>
+class DelInternalReplacementNextNonNull : public BtreeLogicTestBase<OnDiskFormat> {
+public:
+ void run() {
+ OperationContextNoop txn;
+ ArtificialTreeBuilder<OnDiskFormat> builder(&txn, &this->_helper);
+ builder.makeTree("{a:null,_:{c:null,_:{d:null}}}");
- /* This test requires the entire server to be linked-in and it is better implemented using
- the JS framework. Disabling here and will put in jsCore.
+ long long unused = 0;
+ ASSERT_EQUALS(3, this->_helper.btree.fullValidate(&txn, &unused, true, false, 0));
- template<class OnDiskFormat>
- class SignedZeroDuplication : public BtreeLogicTestBase<OnDiskFormat> {
- public:
- void run() {
- ASSERT_EQUALS(0.0, -0.0);
- DBDirectClient c;
-
- static const string ns("unittests.SignedZeroDuplication");
+ // The tree has 3 buckets + 1 for the this->_helper.dummyDiskLoc
+ ASSERT_EQUALS(4, this->_helper.recordStore.numRecords(NULL));
+ ASSERT_EQUALS(0, unused);
- c.ensureIndex(ns, BSON("b" << 1), true);
- c.insert(ns, BSON("b" << 0.0));
- c.insert(ns, BSON("b" << 1.0));
- c.update(ns, BSON("b" << 1.0), BSON("b" << -0.0));
+ const BSONObj k = BSON(""
+ << "a");
+ ASSERT(this->unindex(k));
- ASSERT_EQUALS(1U, c.count(ns, BSON("b" << 0.0)));
- }
- };
- */
+ ASSERT_EQUALS(2, this->_helper.btree.fullValidate(&txn, &unused, true, false, 0));
+
+ // The tree has 3 buckets + 1 for the this->_helper.dummyDiskLoc
+ ASSERT_EQUALS(4, this->_helper.recordStore.numRecords(NULL));
+ ASSERT_EQUALS(1, unused);
+
+ builder.checkStructure("{a:null,_:{c:null,_:{d:null}}}");
+
+ // Check 'unused' key
+ ASSERT(this->getKey(this->_helper.headManager.getHead(&txn), 0).recordLoc.getOfs() & 1);
+ }
+};
+
+template <class OnDiskFormat>
+class DelInternalSplitPromoteLeft : public BtreeLogicTestBase<OnDiskFormat> {
+public:
+ void run() {
+ OperationContextNoop txn;
+ ArtificialTreeBuilder<OnDiskFormat> builder(&txn, &this->_helper);
+
+ builder.makeTree(
+ "{$10:null,$20:null,$30$10:{$25:{$23:null},_:{$27:null}},"
+ "$40:null,$50:null,$60:null,$70:null,$80:null,$90:null,$100:null}");
+
+ long long unused = 0;
+ ASSERT_EQUALS(13, this->_helper.btree.fullValidate(&txn, &unused, true, false, 0));
+
+ // The tree has 4 buckets + 1 for the this->_helper.dummyDiskLoc
+ ASSERT_EQUALS(5, this->_helper.recordStore.numRecords(NULL));
+ ASSERT_EQUALS(0, unused);
+
+ const BSONObj k = BSON("" << bigNumString(0x30, 0x10));
+ ASSERT(this->unindex(k));
+
+ ASSERT_EQUALS(12, this->_helper.btree.fullValidate(&txn, &unused, true, false, 0));
+
+ // The tree has 4 buckets + 1 for the this->_helper.dummyDiskLoc
+ ASSERT_EQUALS(5, this->_helper.recordStore.numRecords(NULL));
+ ASSERT_EQUALS(0, unused);
+
+ builder.checkStructure(
+ "{$60:{$10:null,$20:null,"
+ "$27:{$23:null,$25:null},$40:null,$50:null},"
+ "_:{$70:null,$80:null,$90:null,$100:null}}");
+ }
+};
+
+template <class OnDiskFormat>
+class DelInternalSplitPromoteRight : public BtreeLogicTestBase<OnDiskFormat> {
+public:
+ void run() {
+ OperationContextNoop txn;
+ ArtificialTreeBuilder<OnDiskFormat> builder(&txn, &this->_helper);
+
+ builder.makeTree(
+ "{$10:null,$20:null,$30:null,$40:null,$50:null,$60:null,$70:null,"
+ "$80:null,$90:null,$100$10:{$95:{$93:null},_:{$97:null}}}");
+
+ long long unused = 0;
+ ASSERT_EQUALS(13, this->_helper.btree.fullValidate(&txn, &unused, true, false, 0));
+
+ // The tree has 4 buckets + 1 for the this->_helper.dummyDiskLoc
+ ASSERT_EQUALS(5, this->_helper.recordStore.numRecords(NULL));
+ ASSERT_EQUALS(0, unused);
+
+ const BSONObj k = BSON("" << bigNumString(0x100, 0x10));
+ ASSERT(this->unindex(k));
+
+ ASSERT_EQUALS(12, this->_helper.btree.fullValidate(&txn, &unused, true, false, 0));
+
+ // The tree has 4 buckets + 1 for the this->_helper.dummyDiskLoc
+ ASSERT_EQUALS(5, this->_helper.recordStore.numRecords(NULL));
+ ASSERT_EQUALS(0, unused);
+
+ builder.checkStructure(
+ "{$80:{$10:null,$20:null,$30:null,$40:null,$50:null,$60:null,$70:null},"
+ "_:{$90:null,$97:{$93:null,$95:null}}}");
+ }
+};
+
+template <class OnDiskFormat>
+class LocateEmptyForward : public BtreeLogicTestBase<OnDiskFormat> {
+public:
+ void run() {
+ OperationContextNoop txn;
+ this->_helper.btree.initAsEmpty(&txn);
+
+ BSONObj key1 = simpleKey('a');
+ this->insert(key1, this->_helper.dummyDiskLoc);
+ BSONObj key2 = simpleKey('b');
+ this->insert(key2, this->_helper.dummyDiskLoc);
+ BSONObj key3 = simpleKey('c');
+ this->insert(key3, this->_helper.dummyDiskLoc);
+
+ this->checkValidNumKeys(3);
+ this->locate(BSONObj(), 0, false, this->_helper.headManager.getHead(&txn), 1);
+ }
+};
+
+template <class OnDiskFormat>
+class LocateEmptyReverse : public BtreeLogicTestBase<OnDiskFormat> {
+public:
+ void run() {
+ OperationContextNoop txn;
+ this->_helper.btree.initAsEmpty(&txn);
+
+ BSONObj key1 = simpleKey('a');
+ this->insert(key1, this->_helper.dummyDiskLoc);
+ BSONObj key2 = simpleKey('b');
+ this->insert(key2, this->_helper.dummyDiskLoc);
+ BSONObj key3 = simpleKey('c');
+ this->insert(key3, this->_helper.dummyDiskLoc);
+
+ this->checkValidNumKeys(3);
+ this->locate(BSONObj(), -1, false, DiskLoc(), -1);
+ }
+};
+
+template <class OnDiskFormat>
+class DuplicateKeys : public BtreeLogicTestBase<OnDiskFormat> {
+public:
+ void run() {
+ OperationContextNoop txn;
+ this->_helper.btree.initAsEmpty(&txn);
+
+ BSONObj key1 = simpleKey('z');
+ ASSERT_OK(this->insert(key1, this->_helper.dummyDiskLoc, true));
+ this->checkValidNumKeys(1);
+ this->locate(key1, 0, true, this->_helper.headManager.getHead(&txn), 1);
+
+ // Attempt to insert a dup key/value.
+ ASSERT_EQUALS(ErrorCodes::DuplicateKeyValue,
+ this->insert(key1, this->_helper.dummyDiskLoc, true));
+ this->checkValidNumKeys(1);
+ this->locate(key1, 0, true, this->_helper.headManager.getHead(&txn), 1);
+
+ // Attempt to insert a dup key/value with dupsAllowed=false.
+ ASSERT_EQUALS(ErrorCodes::DuplicateKeyValue,
+ this->insert(key1, this->_helper.dummyDiskLoc, false));
+ this->checkValidNumKeys(1);
+ this->locate(key1, 0, true, this->_helper.headManager.getHead(&txn), 1);
+
+ // Add another record to produce another diskloc.
+ StatusWith<RecordId> s = this->_helper.recordStore.insertRecord(&txn, "a", 1, false);
+
+ ASSERT_TRUE(s.isOK());
+ ASSERT_EQUALS(3, this->_helper.recordStore.numRecords(NULL));
+
+ const DiskLoc dummyDiskLoc2 = DiskLoc::fromRecordId(s.getValue());
+
+ // Attempt to insert a dup key but this time with a different value.
+ ASSERT_EQUALS(ErrorCodes::DuplicateKey, this->insert(key1, dummyDiskLoc2, false));
+ this->checkValidNumKeys(1);
+
+ // Insert a dup key with dupsAllowed=true, should succeed.
+ ASSERT_OK(this->insert(key1, dummyDiskLoc2, true));
+ this->checkValidNumKeys(2);
+
+ // Clean up.
+ this->_helper.recordStore.deleteRecord(&txn, s.getValue());
+ ASSERT_EQUALS(2, this->_helper.recordStore.numRecords(NULL));
+ }
+};
+
+
+/* This test requires the entire server to be linked-in and it is better implemented using
+ the JS framework. Disabling here and will put in jsCore.
+
+template<class OnDiskFormat>
+class SignedZeroDuplication : public BtreeLogicTestBase<OnDiskFormat> {
+public:
+ void run() {
+ ASSERT_EQUALS(0.0, -0.0);
+ DBDirectClient c;
+
+ static const string ns("unittests.SignedZeroDuplication");
+
+ c.ensureIndex(ns, BSON("b" << 1), true);
+ c.insert(ns, BSON("b" << 0.0));
+ c.insert(ns, BSON("b" << 1.0));
+ c.update(ns, BSON("b" << 1.0), BSON("b" << -0.0));
+
+ ASSERT_EQUALS(1U, c.count(ns, BSON("b" << 0.0)));
+ }
+};
+*/
/*
// QUERY_MIGRATION: port later
@@ -2217,111 +2391,107 @@ namespace mongo {
};
*/
- //
- // TEST SUITE DEFINITION
- //
-
- template<class OnDiskFormat>
- class BtreeLogicTestSuite : public unittest::Suite {
- public:
- BtreeLogicTestSuite(const std::string& name) : Suite(name) {
-
- }
-
- void setupTests() {
- add< SimpleCreate<OnDiskFormat> >();
- add< SimpleInsertDelete<OnDiskFormat> >();
- add< SplitRightHeavyBucket<OnDiskFormat> >();
- add< SplitLeftHeavyBucket<OnDiskFormat> >();
- add< MissingLocate<OnDiskFormat> >();
- add< MissingLocateMultiBucket<OnDiskFormat> >();
- add< SERVER983<OnDiskFormat> >();
- add< DontReuseUnused<OnDiskFormat> >();
- add< MergeBucketsLeft<OnDiskFormat> >();
- add< MergeBucketsRight<OnDiskFormat> >();
- add< MergeBucketsDontReplaceHead<OnDiskFormat> >();
- add< MergeBucketsDelInternal<OnDiskFormat> >();
- add< MergeBucketsRightNull<OnDiskFormat> >();
- add< DontMergeSingleBucket<OnDiskFormat> >();
- add< ParentMergeNonRightToLeft<OnDiskFormat> >();
- add< ParentMergeNonRightToRight<OnDiskFormat> >();
- add< CantMergeRightNoMerge<OnDiskFormat> >();
- add< CantMergeLeftNoMerge<OnDiskFormat> >();
- add< MergeOption<OnDiskFormat> >();
- add< ForceMergeLeft<OnDiskFormat> >();
- add< ForceMergeRight<OnDiskFormat> >();
- add< RecursiveMerge<OnDiskFormat> >();
- add< RecursiveMergeRightBucket<OnDiskFormat> >();
- add< RecursiveMergeDoubleRightBucket<OnDiskFormat> >();
-
- add< MergeSizeJustRightRight<OnDiskFormat> >();
- add< MergeSizeJustRightLeft<OnDiskFormat> >();
- add< MergeSizeRight<OnDiskFormat> >();
- add< MergeSizeLeft<OnDiskFormat> >();
- add< NoMergeBelowMarkRight<OnDiskFormat> >();
- add< NoMergeBelowMarkLeft<OnDiskFormat> >();
- add< MergeSizeRightTooBig<OnDiskFormat> >();
- add< MergeSizeLeftTooBig<OnDiskFormat> >();
- add< MergeRightEmpty<OnDiskFormat> >();
- add< MergeMinRightEmpty<OnDiskFormat> >();
- add< MergeLeftEmpty<OnDiskFormat> >();
- add< MergeMinLeftEmpty<OnDiskFormat> >();
- add< BalanceRightEmpty<OnDiskFormat> >();
- add< BalanceLeftEmpty<OnDiskFormat> >();
-
- add< BalanceOneLeftToRight<OnDiskFormat> >();
- add< BalanceOneRightToLeft<OnDiskFormat> >();
- add< BalanceThreeLeftToRight<OnDiskFormat> >();
- add< BalanceThreeRightToLeft<OnDiskFormat> >();
- add< BalanceSingleParentKey<OnDiskFormat> >();
-
- add< PackEmptyBucket<OnDiskFormat> >();
- add< PackedDataSizeEmptyBucket<OnDiskFormat> >();
-
- add< BalanceSingleParentKeyPackParent<OnDiskFormat> >();
- add< BalanceSplitParent<OnDiskFormat> >();
- add< EvenRebalanceLeft<OnDiskFormat> >();
- add< EvenRebalanceLeftCusp<OnDiskFormat> >();
- add< EvenRebalanceRight<OnDiskFormat> >();
- add< EvenRebalanceRightCusp<OnDiskFormat> >();
- add< EvenRebalanceCenter<OnDiskFormat> >();
- add< OddRebalanceLeft<OnDiskFormat> >();
- add< OddRebalanceRight<OnDiskFormat> >();
- add< OddRebalanceCenter<OnDiskFormat> >();
- add< RebalanceEmptyRight<OnDiskFormat> >();
- add< RebalanceEmptyLeft<OnDiskFormat> >();
-
- add< NoMoveAtLowWaterMarkRight<OnDiskFormat> >();
- add< MoveBelowLowWaterMarkRight<OnDiskFormat> >();
- add< NoMoveAtLowWaterMarkLeft<OnDiskFormat> >();
- add< MoveBelowLowWaterMarkLeft<OnDiskFormat> >();
-
- add< PreferBalanceLeft<OnDiskFormat> >();
- add< PreferBalanceRight<OnDiskFormat> >();
- add< RecursiveMergeThenBalance<OnDiskFormat> >();
- add< DelEmptyNoNeighbors<OnDiskFormat> >();
- add< DelEmptyEmptyNeighbors<OnDiskFormat> >();
- add< DelInternal<OnDiskFormat> >();
- add< DelInternalReplaceWithUnused<OnDiskFormat> >();
- add< DelInternalReplaceRight<OnDiskFormat> >();
- add< DelInternalPromoteKey<OnDiskFormat> >();
- add< DelInternalPromoteRightKey<OnDiskFormat> >();
- add< DelInternalReplacementPrevNonNull<OnDiskFormat> >();
- add< DelInternalReplacementNextNonNull<OnDiskFormat> >();
- add< DelInternalSplitPromoteLeft<OnDiskFormat> >();
- add< DelInternalSplitPromoteRight<OnDiskFormat> >();
-
- add< LocateEmptyForward<OnDiskFormat> >();
- add< LocateEmptyReverse<OnDiskFormat> >();
-
- add< DuplicateKeys<OnDiskFormat> >();
- }
- };
-
- // Test suite for both V0 and V1
- static unittest::SuiteInstance< BtreeLogicTestSuite<BtreeLayoutV0> > SUITE_V0(
- "BTreeLogicTests_V0");
+//
+// TEST SUITE DEFINITION
+//
- static unittest::SuiteInstance< BtreeLogicTestSuite<BtreeLayoutV1> > SUITE_V1(
- "BTreeLogicTests_V1");
+template <class OnDiskFormat>
+class BtreeLogicTestSuite : public unittest::Suite {
+public:
+ BtreeLogicTestSuite(const std::string& name) : Suite(name) {}
+
+ void setupTests() {
+ add<SimpleCreate<OnDiskFormat>>();
+ add<SimpleInsertDelete<OnDiskFormat>>();
+ add<SplitRightHeavyBucket<OnDiskFormat>>();
+ add<SplitLeftHeavyBucket<OnDiskFormat>>();
+ add<MissingLocate<OnDiskFormat>>();
+ add<MissingLocateMultiBucket<OnDiskFormat>>();
+ add<SERVER983<OnDiskFormat>>();
+ add<DontReuseUnused<OnDiskFormat>>();
+ add<MergeBucketsLeft<OnDiskFormat>>();
+ add<MergeBucketsRight<OnDiskFormat>>();
+ add<MergeBucketsDontReplaceHead<OnDiskFormat>>();
+ add<MergeBucketsDelInternal<OnDiskFormat>>();
+ add<MergeBucketsRightNull<OnDiskFormat>>();
+ add<DontMergeSingleBucket<OnDiskFormat>>();
+ add<ParentMergeNonRightToLeft<OnDiskFormat>>();
+ add<ParentMergeNonRightToRight<OnDiskFormat>>();
+ add<CantMergeRightNoMerge<OnDiskFormat>>();
+ add<CantMergeLeftNoMerge<OnDiskFormat>>();
+ add<MergeOption<OnDiskFormat>>();
+ add<ForceMergeLeft<OnDiskFormat>>();
+ add<ForceMergeRight<OnDiskFormat>>();
+ add<RecursiveMerge<OnDiskFormat>>();
+ add<RecursiveMergeRightBucket<OnDiskFormat>>();
+ add<RecursiveMergeDoubleRightBucket<OnDiskFormat>>();
+
+ add<MergeSizeJustRightRight<OnDiskFormat>>();
+ add<MergeSizeJustRightLeft<OnDiskFormat>>();
+ add<MergeSizeRight<OnDiskFormat>>();
+ add<MergeSizeLeft<OnDiskFormat>>();
+ add<NoMergeBelowMarkRight<OnDiskFormat>>();
+ add<NoMergeBelowMarkLeft<OnDiskFormat>>();
+ add<MergeSizeRightTooBig<OnDiskFormat>>();
+ add<MergeSizeLeftTooBig<OnDiskFormat>>();
+ add<MergeRightEmpty<OnDiskFormat>>();
+ add<MergeMinRightEmpty<OnDiskFormat>>();
+ add<MergeLeftEmpty<OnDiskFormat>>();
+ add<MergeMinLeftEmpty<OnDiskFormat>>();
+ add<BalanceRightEmpty<OnDiskFormat>>();
+ add<BalanceLeftEmpty<OnDiskFormat>>();
+
+ add<BalanceOneLeftToRight<OnDiskFormat>>();
+ add<BalanceOneRightToLeft<OnDiskFormat>>();
+ add<BalanceThreeLeftToRight<OnDiskFormat>>();
+ add<BalanceThreeRightToLeft<OnDiskFormat>>();
+ add<BalanceSingleParentKey<OnDiskFormat>>();
+
+ add<PackEmptyBucket<OnDiskFormat>>();
+ add<PackedDataSizeEmptyBucket<OnDiskFormat>>();
+
+ add<BalanceSingleParentKeyPackParent<OnDiskFormat>>();
+ add<BalanceSplitParent<OnDiskFormat>>();
+ add<EvenRebalanceLeft<OnDiskFormat>>();
+ add<EvenRebalanceLeftCusp<OnDiskFormat>>();
+ add<EvenRebalanceRight<OnDiskFormat>>();
+ add<EvenRebalanceRightCusp<OnDiskFormat>>();
+ add<EvenRebalanceCenter<OnDiskFormat>>();
+ add<OddRebalanceLeft<OnDiskFormat>>();
+ add<OddRebalanceRight<OnDiskFormat>>();
+ add<OddRebalanceCenter<OnDiskFormat>>();
+ add<RebalanceEmptyRight<OnDiskFormat>>();
+ add<RebalanceEmptyLeft<OnDiskFormat>>();
+
+ add<NoMoveAtLowWaterMarkRight<OnDiskFormat>>();
+ add<MoveBelowLowWaterMarkRight<OnDiskFormat>>();
+ add<NoMoveAtLowWaterMarkLeft<OnDiskFormat>>();
+ add<MoveBelowLowWaterMarkLeft<OnDiskFormat>>();
+
+ add<PreferBalanceLeft<OnDiskFormat>>();
+ add<PreferBalanceRight<OnDiskFormat>>();
+ add<RecursiveMergeThenBalance<OnDiskFormat>>();
+ add<DelEmptyNoNeighbors<OnDiskFormat>>();
+ add<DelEmptyEmptyNeighbors<OnDiskFormat>>();
+ add<DelInternal<OnDiskFormat>>();
+ add<DelInternalReplaceWithUnused<OnDiskFormat>>();
+ add<DelInternalReplaceRight<OnDiskFormat>>();
+ add<DelInternalPromoteKey<OnDiskFormat>>();
+ add<DelInternalPromoteRightKey<OnDiskFormat>>();
+ add<DelInternalReplacementPrevNonNull<OnDiskFormat>>();
+ add<DelInternalReplacementNextNonNull<OnDiskFormat>>();
+ add<DelInternalSplitPromoteLeft<OnDiskFormat>>();
+ add<DelInternalSplitPromoteRight<OnDiskFormat>>();
+
+ add<LocateEmptyForward<OnDiskFormat>>();
+ add<LocateEmptyReverse<OnDiskFormat>>();
+
+ add<DuplicateKeys<OnDiskFormat>>();
+ }
+};
+
+// Test suite for both V0 and V1
+static unittest::SuiteInstance<BtreeLogicTestSuite<BtreeLayoutV0>> SUITE_V0("BTreeLogicTests_V0");
+
+static unittest::SuiteInstance<BtreeLogicTestSuite<BtreeLayoutV1>> SUITE_V1("BTreeLogicTests_V1");
}
diff --git a/src/mongo/db/storage/mmap_v1/btree/btree_ondisk.cpp b/src/mongo/db/storage/mmap_v1/btree/btree_ondisk.cpp
index 15997d5681c..91b7141e7ed 100644
--- a/src/mongo/db/storage/mmap_v1/btree/btree_ondisk.cpp
+++ b/src/mongo/db/storage/mmap_v1/btree/btree_ondisk.cpp
@@ -37,23 +37,23 @@
namespace mongo {
- void DiskLoc56Bit::operator=(const DiskLoc& loc) {
- ofs = loc.getOfs();
- int la = loc.a();
- if (la == DiskLoc::max().a()) {
- invariant(ofs == DiskLoc::max().getOfs());
- la = OurMaxA;
- }
- invariant( la <= OurMaxA ); // must fit in 3 bytes
- if( la < 0 ) {
- if ( la != -1 ) {
- log() << "btree diskloc isn't negative 1: " << la << std::endl;
- invariant ( la == -1 );
- }
- la = 0;
- ofs = OurNullOfs;
+void DiskLoc56Bit::operator=(const DiskLoc& loc) {
+ ofs = loc.getOfs();
+ int la = loc.a();
+ if (la == DiskLoc::max().a()) {
+ invariant(ofs == DiskLoc::max().getOfs());
+ la = OurMaxA;
+ }
+ invariant(la <= OurMaxA); // must fit in 3 bytes
+ if (la < 0) {
+ if (la != -1) {
+ log() << "btree diskloc isn't negative 1: " << la << std::endl;
+ invariant(la == -1);
}
- memcpy(_a, &la, 3); // endian
+ la = 0;
+ ofs = OurNullOfs;
}
+ memcpy(_a, &la, 3); // endian
+}
} // namespace mongo
diff --git a/src/mongo/db/storage/mmap_v1/btree/btree_ondisk.h b/src/mongo/db/storage/mmap_v1/btree/btree_ondisk.h
index a5ddec6bccd..3238ec64179 100644
--- a/src/mongo/db/storage/mmap_v1/btree/btree_ondisk.h
+++ b/src/mongo/db/storage/mmap_v1/btree/btree_ondisk.h
@@ -34,337 +34,342 @@
namespace mongo {
- const int OldBucketSize = 8192;
+const int OldBucketSize = 8192;
+//
+// On-disk index format
+//
+
+#pragma pack(1)
+/**
+ * This is the fixed width data component for storage of a key within a bucket. It contains an
+ * offset pointer to the variable width bson data component. This may be 'unused', please see
+ * below.
+ *
+ * Why is this templated on Loc? Because V0 and V1 have different size DiskLoc(s) but otherwise
+ * the same layout.
+ */
+template <class LocType>
+struct FixedWidthKey {
//
- // On-disk index format
+ // Data
//
-#pragma pack(1)
/**
- * This is the fixed width data component for storage of a key within a bucket. It contains an
- * offset pointer to the variable width bson data component. This may be 'unused', please see
- * below.
- *
- * Why is this templated on Loc? Because V0 and V1 have different size DiskLoc(s) but otherwise
- * the same layout.
+ * The 'left' child bucket of this key. If this is the i-th key, it points to the i index
+ * child bucket.
*/
- template <class LocType>
- struct FixedWidthKey {
- //
- // Data
- //
-
- /**
- * The 'left' child bucket of this key. If this is the i-th key, it points to the i index
- * child bucket.
- */
- LocType prevChildBucket;
-
- /**
- * The location of the record associated with this key.
- */
- LocType recordLoc;
-
- /**
- * Offset within current bucket of the variable width bson key for this _KeyNode.
- */
- unsigned short _kdo;
-
- //
- // Accessors / mutators
- //
-
- short keyDataOfs() const {
- return static_cast<short>(_kdo);
- }
+ LocType prevChildBucket;
- void setKeyDataOfs(short s) {
- _kdo = s;
- invariant(s>=0);
- }
+ /**
+ * The location of the record associated with this key.
+ */
+ LocType recordLoc;
- void setKeyDataOfsSavingUse(short s) {
- // XXX kill this func
- setKeyDataOfs(s);
- }
+ /**
+ * Offset within current bucket of the variable width bson key for this _KeyNode.
+ */
+ unsigned short _kdo;
- /**
- * Unused keys are not returned by read operations. Keys may be marked
- * as unused in cases where it is difficult to delete them while
- * maintaining the constraints required of a btree.
- *
- * Setting ofs to odd is the sentinel for unused, as real recordLoc's
- * are always even numbers. Note we need to keep its value basically
- * the same as we use the recordLoc as part of the key in the index
- * (to handle duplicate keys efficiently).
- *
- * Flagging keys as unused is a feature that is being phased out in favor
- * of deleting the keys outright. The current btree implementation is
- * not expected to mark a key as unused in a non legacy btree.
- */
- void setUnused() {
- recordLoc.GETOFS() |= 1;
- }
+ //
+ // Accessors / mutators
+ //
- void setUsed() { recordLoc.GETOFS() &= ~1; }
+ short keyDataOfs() const {
+ return static_cast<short>(_kdo);
+ }
- int isUnused() const {
- return recordLoc.getOfs() & 1;
- }
+ void setKeyDataOfs(short s) {
+ _kdo = s;
+ invariant(s >= 0);
+ }
- int isUsed() const {
- return !isUnused();
- }
- };
+ void setKeyDataOfsSavingUse(short s) {
+ // XXX kill this func
+ setKeyDataOfs(s);
+ }
/**
- * This structure represents header data for a btree bucket. An object of
- * this type is typically allocated inside of a buffer of size BucketSize,
- * resulting in a full bucket with an appropriate header.
+ * Unused keys are not returned by read operations. Keys may be marked
+ * as unused in cases where it is difficult to delete them while
+ * maintaining the constraints required of a btree.
*
- * The body of a btree bucket contains an array of _KeyNode objects starting
- * from its lowest indexed bytes and growing to higher indexed bytes. The
- * body also contains variable width bson keys, which are allocated from the
- * highest indexed bytes toward lower indexed bytes.
+ * Setting ofs to odd is the sentinel for unused, as real recordLoc's
+ * are always even numbers. Note we need to keep its value basically
+ * the same as we use the recordLoc as part of the key in the index
+ * (to handle duplicate keys efficiently).
*
- * |hhhh|kkkkkkk--------bbbbbbbbbbbuuubbbuubbb|
- * h = header data
- * k = KeyNode data
- * - = empty space
- * b = bson key data
- * u = unused (old) bson key data, that may be garbage collected
+ * Flagging keys as unused is a feature that is being phased out in favor
+ * of deleting the keys outright. The current btree implementation is
+ * not expected to mark a key as unused in a non legacy btree.
*/
- struct BtreeBucketV0 {
- /**
- * Parent bucket of this bucket, which isNull() for the root bucket.
- */
- DiskLoc parent;
+ void setUnused() {
+ recordLoc.GETOFS() |= 1;
+ }
- /**
- * Given that there are n keys, this is the n index child.
- */
- DiskLoc nextChild;
+ void setUsed() {
+ recordLoc.GETOFS() &= ~1;
+ }
- /**
- * Can be reused, value is 8192 in current pdfile version Apr2010
- */
- unsigned short _wasSize;
+ int isUnused() const {
+ return recordLoc.getOfs() & 1;
+ }
- /**
- * zero
- */
- unsigned short _reserved1;
+ int isUsed() const {
+ return !isUnused();
+ }
+};
- int flags;
+/**
+ * This structure represents header data for a btree bucket. An object of
+ * this type is typically allocated inside of a buffer of size BucketSize,
+ * resulting in a full bucket with an appropriate header.
+ *
+ * The body of a btree bucket contains an array of _KeyNode objects starting
+ * from its lowest indexed bytes and growing to higher indexed bytes. The
+ * body also contains variable width bson keys, which are allocated from the
+ * highest indexed bytes toward lower indexed bytes.
+ *
+ * |hhhh|kkkkkkk--------bbbbbbbbbbbuuubbbuubbb|
+ * h = header data
+ * k = KeyNode data
+ * - = empty space
+ * b = bson key data
+ * u = unused (old) bson key data, that may be garbage collected
+ */
+struct BtreeBucketV0 {
+ /**
+ * Parent bucket of this bucket, which isNull() for the root bucket.
+ */
+ DiskLoc parent;
- /** basicInsert() assumes the next three members are consecutive and in this order: */
+ /**
+ * Given that there are n keys, this is the n index child.
+ */
+ DiskLoc nextChild;
- /** Size of the empty region. */
- int emptySize;
+ /**
+ * Can be reused, value is 8192 in current pdfile version Apr2010
+ */
+ unsigned short _wasSize;
- /** Size used for bson storage, including storage of old keys. */
- int topSize;
+ /**
+ * zero
+ */
+ unsigned short _reserved1;
- /* Number of keys in the bucket. */
- int n;
+ int flags;
- int reserved;
+ /** basicInsert() assumes the next three members are consecutive and in this order: */
- /* Beginning of the bucket's body */
- char data[4];
+ /** Size of the empty region. */
+ int emptySize;
- // Precalculated size constants
- enum { HeaderSize = 40 };
- };
+ /** Size used for bson storage, including storage of old keys. */
+ int topSize;
- // BtreeBucketV0 is part of the on-disk format, so it should never be changed
- BOOST_STATIC_ASSERT(
- sizeof(BtreeBucketV0) - sizeof(static_cast<BtreeBucketV0*>(NULL)->data)
- == BtreeBucketV0::HeaderSize);
+ /* Number of keys in the bucket. */
+ int n;
- /**
- * A variant of DiskLoc Used by the V1 bucket type.
- */
- struct DiskLoc56Bit {
- //
- // Data
- //
+ int reserved;
- int ofs;
+ /* Beginning of the bucket's body */
+ char data[4];
- unsigned char _a[3];
+ // Precalculated size constants
+ enum { HeaderSize = 40 };
+};
- //
- // Accessors XXX rename these, this is terrible
- //
+// BtreeBucketV0 is part of the on-disk format, so it should never be changed
+BOOST_STATIC_ASSERT(sizeof(BtreeBucketV0) - sizeof(static_cast<BtreeBucketV0*>(NULL)->data) ==
+ BtreeBucketV0::HeaderSize);
- int& GETOFS() { return ofs; }
+/**
+ * A variant of DiskLoc Used by the V1 bucket type.
+ */
+struct DiskLoc56Bit {
+ //
+ // Data
+ //
- int getOfs() const { return ofs; }
+ int ofs;
- //
- // Comparison
- //
+ unsigned char _a[3];
- bool isNull() const { return ofs < 0; }
+ //
+ // Accessors XXX rename these, this is terrible
+ //
- unsigned long long toLongLong() const {
- // endian
- unsigned long long result = ofs;
- char* cursor = reinterpret_cast<char *>(&result);
- *reinterpret_cast<uint16_t*>(cursor + 4) = *reinterpret_cast<const uint16_t*>(&_a[0]);
- *reinterpret_cast<uint8_t*>(cursor + 6) = *reinterpret_cast<const uint8_t*>(&_a[2]);
- *reinterpret_cast<uint8_t*>(cursor + 7) = uint8_t(0);
- return result;
- }
+ int& GETOFS() {
+ return ofs;
+ }
- bool operator<(const DiskLoc56Bit& rhs) const {
- // the orderering of dup keys in btrees isn't too critical, but we'd like to put items
- // that are close together on disk close together in the tree, so we do want the file #
- // to be the most significant bytes
- return toLongLong() < rhs.toLongLong();
- }
+ int getOfs() const {
+ return ofs;
+ }
- int compare(const DiskLoc56Bit& rhs) const {
- unsigned long long a = toLongLong();
- unsigned long long b = rhs.toLongLong();
- if ( a < b ) {
- return -1;
- }
- else {
- return a == b ? 0 : 1;
- }
- }
+ //
+ // Comparison
+ //
- bool operator==(const DiskLoc56Bit& rhs) const {
- return toLongLong() == rhs.toLongLong();
+ bool isNull() const {
+ return ofs < 0;
+ }
+
+ unsigned long long toLongLong() const {
+ // endian
+ unsigned long long result = ofs;
+ char* cursor = reinterpret_cast<char*>(&result);
+ *reinterpret_cast<uint16_t*>(cursor + 4) = *reinterpret_cast<const uint16_t*>(&_a[0]);
+ *reinterpret_cast<uint8_t*>(cursor + 6) = *reinterpret_cast<const uint8_t*>(&_a[2]);
+ *reinterpret_cast<uint8_t*>(cursor + 7) = uint8_t(0);
+ return result;
+ }
+
+ bool operator<(const DiskLoc56Bit& rhs) const {
+ // the orderering of dup keys in btrees isn't too critical, but we'd like to put items
+ // that are close together on disk close together in the tree, so we do want the file #
+ // to be the most significant bytes
+ return toLongLong() < rhs.toLongLong();
+ }
+
+ int compare(const DiskLoc56Bit& rhs) const {
+ unsigned long long a = toLongLong();
+ unsigned long long b = rhs.toLongLong();
+ if (a < b) {
+ return -1;
+ } else {
+ return a == b ? 0 : 1;
}
+ }
- bool operator!=(const DiskLoc56Bit& rhs) const {
- return toLongLong() != rhs.toLongLong();
- }
+ bool operator==(const DiskLoc56Bit& rhs) const {
+ return toLongLong() == rhs.toLongLong();
+ }
- bool operator==(const DiskLoc& rhs) const {
- return DiskLoc(*this) == rhs;
- }
+ bool operator!=(const DiskLoc56Bit& rhs) const {
+ return toLongLong() != rhs.toLongLong();
+ }
- bool operator!=(const DiskLoc& rhs) const {
- return !(*this==rhs);
- }
+ bool operator==(const DiskLoc& rhs) const {
+ return DiskLoc(*this) == rhs;
+ }
- //
- // Mutation
- //
+ bool operator!=(const DiskLoc& rhs) const {
+ return !(*this == rhs);
+ }
- enum {
- OurNullOfs = -2, // first bit of offsets used in _KeyNode we don't use -1 here
- OurMaxA = 0xffffff, // highest 3-byte value
- };
+ //
+ // Mutation
+ //
- void Null() {
- ofs = OurNullOfs;
- _a[0] = _a[1] = _a[2] = 0;
- }
+ enum {
+ OurNullOfs = -2, // first bit of offsets used in _KeyNode we don't use -1 here
+ OurMaxA = 0xffffff, // highest 3-byte value
+ };
- void operator=(const DiskLoc& loc);
+ void Null() {
+ ofs = OurNullOfs;
+ _a[0] = _a[1] = _a[2] = 0;
+ }
- //
- // Type Conversion
- //
+ void operator=(const DiskLoc& loc);
- RecordId toRecordId() const {
- return DiskLoc(*this).toRecordId();
- }
+ //
+ // Type Conversion
+ //
- operator DiskLoc() const {
- // endian
- if( isNull() ) return DiskLoc();
- unsigned a = *((unsigned *) (_a-1));
- return DiskLoc(a >> 8, ofs);
- }
+ RecordId toRecordId() const {
+ return DiskLoc(*this).toRecordId();
+ }
- std::string toString() const { return DiskLoc(*this).toString(); }
- };
+ operator DiskLoc() const {
+ // endian
+ if (isNull())
+ return DiskLoc();
+ unsigned a = *((unsigned*)(_a - 1));
+ return DiskLoc(a >> 8, ofs);
+ }
- struct BtreeBucketV1 {
- /** Parent bucket of this bucket, which isNull() for the root bucket. */
- DiskLoc56Bit parent;
+ std::string toString() const {
+ return DiskLoc(*this).toString();
+ }
+};
- /** Given that there are n keys, this is the n index child. */
- DiskLoc56Bit nextChild;
+struct BtreeBucketV1 {
+ /** Parent bucket of this bucket, which isNull() for the root bucket. */
+ DiskLoc56Bit parent;
- unsigned short flags;
+ /** Given that there are n keys, this is the n index child. */
+ DiskLoc56Bit nextChild;
- /** Size of the empty region. */
- unsigned short emptySize;
+ unsigned short flags;
- /** Size used for bson storage, including storage of old keys. */
- unsigned short topSize;
+ /** Size of the empty region. */
+ unsigned short emptySize;
- /* Number of keys in the bucket. */
- unsigned short n;
+ /** Size used for bson storage, including storage of old keys. */
+ unsigned short topSize;
- /* Beginning of the bucket's body */
- char data[4];
+ /* Number of keys in the bucket. */
+ unsigned short n;
- // Precalculated size constants
- enum { HeaderSize = 22 };
- };
+ /* Beginning of the bucket's body */
+ char data[4];
- // BtreeBucketV1 is part of the on-disk format, so it should never be changed
- BOOST_STATIC_ASSERT(
- sizeof(BtreeBucketV1) - sizeof(static_cast<BtreeBucketV1*>(NULL)->data)
- == BtreeBucketV1::HeaderSize);
+ // Precalculated size constants
+ enum { HeaderSize = 22 };
+};
- enum Flags {
- Packed = 1
- };
+// BtreeBucketV1 is part of the on-disk format, so it should never be changed
+BOOST_STATIC_ASSERT(sizeof(BtreeBucketV1) - sizeof(static_cast<BtreeBucketV1*>(NULL)->data) ==
+ BtreeBucketV1::HeaderSize);
- struct BtreeLayoutV0 {
- typedef FixedWidthKey<DiskLoc> FixedWidthKeyType;
- typedef DiskLoc LocType;
- typedef KeyBson KeyType;
- typedef KeyBson KeyOwnedType;
- typedef BtreeBucketV0 BucketType;
+enum Flags { Packed = 1 };
- enum { BucketSize = 8192,
- BucketBodySize = BucketSize - BucketType::HeaderSize
- };
+struct BtreeLayoutV0 {
+ typedef FixedWidthKey<DiskLoc> FixedWidthKeyType;
+ typedef DiskLoc LocType;
+ typedef KeyBson KeyType;
+ typedef KeyBson KeyOwnedType;
+ typedef BtreeBucketV0 BucketType;
- // largest key size we allow. note we very much need to support bigger keys (somehow) in
- // the future.
+ enum { BucketSize = 8192, BucketBodySize = BucketSize - BucketType::HeaderSize };
- static const int KeyMax = OldBucketSize / 10;
+ // largest key size we allow. note we very much need to support bigger keys (somehow) in
+ // the future.
- // A sentinel value sometimes used to identify a deallocated bucket.
- static const int INVALID_N_SENTINEL = -1;
+ static const int KeyMax = OldBucketSize / 10;
- static void initBucket(BucketType* bucket) {
- bucket->_reserved1 = 0;
- bucket->_wasSize = BucketSize;
- bucket->reserved = 0;
- }
- };
+ // A sentinel value sometimes used to identify a deallocated bucket.
+ static const int INVALID_N_SENTINEL = -1;
+
+ static void initBucket(BucketType* bucket) {
+ bucket->_reserved1 = 0;
+ bucket->_wasSize = BucketSize;
+ bucket->reserved = 0;
+ }
+};
- struct BtreeLayoutV1 {
- typedef FixedWidthKey<DiskLoc56Bit> FixedWidthKeyType;
- typedef KeyV1 KeyType;
- typedef KeyV1Owned KeyOwnedType;
- typedef DiskLoc56Bit LocType;
- typedef BtreeBucketV1 BucketType;
+struct BtreeLayoutV1 {
+ typedef FixedWidthKey<DiskLoc56Bit> FixedWidthKeyType;
+ typedef KeyV1 KeyType;
+ typedef KeyV1Owned KeyOwnedType;
+ typedef DiskLoc56Bit LocType;
+ typedef BtreeBucketV1 BucketType;
- enum { BucketSize = 8192 - 16, // The -16 is to leave room for the MmapV1RecordHeader header
- BucketBodySize = BucketSize - BucketType::HeaderSize
- };
+ enum {
+ BucketSize = 8192 - 16, // The -16 is to leave room for the MmapV1RecordHeader header
+ BucketBodySize = BucketSize - BucketType::HeaderSize
+ };
- static const int KeyMax = 1024;
+ static const int KeyMax = 1024;
- // A sentinel value sometimes used to identify a deallocated bucket.
- static const unsigned short INVALID_N_SENTINEL = 0xffff;
+ // A sentinel value sometimes used to identify a deallocated bucket.
+ static const unsigned short INVALID_N_SENTINEL = 0xffff;
- static void initBucket(BucketType* bucket) { }
- };
+ static void initBucket(BucketType* bucket) {}
+};
#pragma pack()
diff --git a/src/mongo/db/storage/mmap_v1/btree/btree_test_help.cpp b/src/mongo/db/storage/mmap_v1/btree/btree_test_help.cpp
index fe0cdf7e82e..760095898be 100644
--- a/src/mongo/db/storage/mmap_v1/btree/btree_test_help.cpp
+++ b/src/mongo/db/storage/mmap_v1/btree/btree_test_help.cpp
@@ -37,210 +37,203 @@
namespace mongo {
- using std::string;
-
- string bigNumString(long long n, int len) {
- char sub[17];
- sprintf(sub, "%.16llx", n);
- string val(len, ' ');
- for (int i = 0; i < len; ++i) {
- val[i] = sub[i % 16];
- }
- return val;
- }
-
- BSONObj simpleKey(char c, int n) {
- BSONObjBuilder builder;
- string val(n, c);
- builder.append("a", val);
- return builder.obj();
+using std::string;
+
+string bigNumString(long long n, int len) {
+ char sub[17];
+ sprintf(sub, "%.16llx", n);
+ string val(len, ' ');
+ for (int i = 0; i < len; ++i) {
+ val[i] = sub[i % 16];
}
+ return val;
+}
- //
- // BtreeLogicTestHelper
- //
-
- template <class OnDiskFormat>
- BtreeLogicTestHelper<OnDiskFormat>::BtreeLogicTestHelper(const BSONObj& order)
- : recordStore("TestRecordStore"),
- btree(&headManager,
- &recordStore,
- &cursorRegistry,
- Ordering::make(order),
- "TestIndex") {
- static const string randomData("RandomStuff");
-
- // Generate a valid record location for a "fake" record, which we will repeatedly use
- // thoughout the tests.
- OperationContextNoop txn;
- StatusWith<RecordId> s =
- recordStore.insertRecord(&txn, randomData.c_str(), randomData.length(), false);
-
- ASSERT_TRUE(s.isOK());
- ASSERT_EQUALS(1, recordStore.numRecords(NULL));
-
- dummyDiskLoc = DiskLoc::fromRecordId(s.getValue());
- }
+BSONObj simpleKey(char c, int n) {
+ BSONObjBuilder builder;
+ string val(n, c);
+ builder.append("a", val);
+ return builder.obj();
+}
+//
+// BtreeLogicTestHelper
+//
- //
- // ArtificialTreeBuilder
- //
+template <class OnDiskFormat>
+BtreeLogicTestHelper<OnDiskFormat>::BtreeLogicTestHelper(const BSONObj& order)
+ : recordStore("TestRecordStore"),
+ btree(&headManager, &recordStore, &cursorRegistry, Ordering::make(order), "TestIndex") {
+ static const string randomData("RandomStuff");
- template <class OnDiskFormat>
- void ArtificialTreeBuilder<OnDiskFormat>::makeTree(const string &spec) {
- _helper->headManager.setHead(_txn, makeTree(fromjson(spec)).toRecordId());
- }
+ // Generate a valid record location for a "fake" record, which we will repeatedly use
+ // thoughout the tests.
+ OperationContextNoop txn;
+ StatusWith<RecordId> s =
+ recordStore.insertRecord(&txn, randomData.c_str(), randomData.length(), false);
- template <class OnDiskFormat>
- DiskLoc ArtificialTreeBuilder<OnDiskFormat>::makeTree(const BSONObj &spec) {
- DiskLoc bucketLoc = _helper->btree._addBucket(_txn);
- BucketType* bucket = _helper->btree.getBucket(_txn, bucketLoc);
-
- BSONObjIterator i(spec);
- while (i.more()) {
- BSONElement e = i.next();
- DiskLoc child;
- if (e.type() == Object) {
- child = makeTree(e.embeddedObject());
- }
-
- if (e.fieldName() == string("_")) {
- bucket->nextChild = child;
- }
- else {
- KeyDataOwnedType key(BSON("" << expectedKey(e.fieldName())));
- invariant(_helper->btree.pushBack(bucket, _helper->dummyDiskLoc, key, child));
- }
- }
+ ASSERT_TRUE(s.isOK());
+ ASSERT_EQUALS(1, recordStore.numRecords(NULL));
- _helper->btree.fixParentPtrs(_txn, bucket, bucketLoc);
- return bucketLoc;
- }
+ dummyDiskLoc = DiskLoc::fromRecordId(s.getValue());
+}
- template <class OnDiskFormat>
- void ArtificialTreeBuilder<OnDiskFormat>::checkStructure(const string &spec) const {
- checkStructure(fromjson(spec), DiskLoc::fromRecordId(_helper->headManager.getHead(_txn)));
- }
- template <class OnDiskFormat>
- void ArtificialTreeBuilder<OnDiskFormat>::push(
- const DiskLoc bucketLoc, const BSONObj& key, const DiskLoc child) {
- KeyDataOwnedType k(key);
- BucketType* bucket = _helper->btree.getBucket(_txn, bucketLoc);
+//
+// ArtificialTreeBuilder
+//
- invariant(_helper->btree.pushBack(bucket, _helper->dummyDiskLoc, k, child));
- _helper->btree.fixParentPtrs(_txn, bucket, bucketLoc);
- }
+template <class OnDiskFormat>
+void ArtificialTreeBuilder<OnDiskFormat>::makeTree(const string& spec) {
+ _helper->headManager.setHead(_txn, makeTree(fromjson(spec)).toRecordId());
+}
- template <class OnDiskFormat>
- void ArtificialTreeBuilder<OnDiskFormat>::checkStructure(
- const BSONObj &spec, const DiskLoc node) const {
- BucketType* bucket = _helper->btree.getBucket(_txn, node);
-
- BSONObjIterator j(spec);
- for (int i = 0; i < bucket->n; ++i) {
- ASSERT(j.more());
- BSONElement e = j.next();
- KeyHeaderType kn = BtreeLogic<OnDiskFormat>::getKeyHeader(bucket, i);
- string expected = expectedKey(e.fieldName());
- ASSERT(isPresent(BSON("" << expected), 1));
- ASSERT(isPresent(BSON("" << expected), -1));
-
- // ASSERT_EQUALS(expected, kn.key.toBson().firstElement().valuestr());
- if (kn.prevChildBucket.isNull()) {
- ASSERT(e.type() == jstNULL);
- }
- else {
- ASSERT(e.type() == Object);
- checkStructure(e.embeddedObject(), kn.prevChildBucket);
- }
+template <class OnDiskFormat>
+DiskLoc ArtificialTreeBuilder<OnDiskFormat>::makeTree(const BSONObj& spec) {
+ DiskLoc bucketLoc = _helper->btree._addBucket(_txn);
+ BucketType* bucket = _helper->btree.getBucket(_txn, bucketLoc);
+
+ BSONObjIterator i(spec);
+ while (i.more()) {
+ BSONElement e = i.next();
+ DiskLoc child;
+ if (e.type() == Object) {
+ child = makeTree(e.embeddedObject());
}
- if (bucket->nextChild.isNull()) {
- // maybe should allow '_' field with null value?
- ASSERT(!j.more());
- }
- else {
- BSONElement e = j.next();
- ASSERT_EQUALS(string("_"), e.fieldName());
- ASSERT(e.type() == Object);
- checkStructure(e.embeddedObject(), bucket->nextChild);
- }
- ASSERT(!j.more());
- }
- template <class OnDiskFormat>
- bool ArtificialTreeBuilder<OnDiskFormat>::isPresent(const BSONObj &key, int direction) const {
- int pos;
- DiskLoc loc;
- OperationContextNoop txn;
- return _helper->btree.locate(&txn, key, _helper->dummyDiskLoc, direction, &pos, &loc);
+ if (e.fieldName() == string("_")) {
+ bucket->nextChild = child;
+ } else {
+ KeyDataOwnedType key(BSON("" << expectedKey(e.fieldName())));
+ invariant(_helper->btree.pushBack(bucket, _helper->dummyDiskLoc, key, child));
+ }
}
- // Static
- template <class OnDiskFormat>
- string ArtificialTreeBuilder<OnDiskFormat>::expectedKey(const char *spec) {
- if (spec[0] != '$') {
- return spec;
- }
- char *endPtr;
+ _helper->btree.fixParentPtrs(_txn, bucket, bucketLoc);
+ return bucketLoc;
+}
- // parsing a long long is a pain, so just allow shorter keys for now
- unsigned long long num = strtol(spec + 1, &endPtr, 16);
- int len = 800;
- if (*endPtr == '$') {
- len = strtol(endPtr + 1, 0, 16);
- }
+template <class OnDiskFormat>
+void ArtificialTreeBuilder<OnDiskFormat>::checkStructure(const string& spec) const {
+ checkStructure(fromjson(spec), DiskLoc::fromRecordId(_helper->headManager.getHead(_txn)));
+}
- return bigNumString(num, len);
- }
+template <class OnDiskFormat>
+void ArtificialTreeBuilder<OnDiskFormat>::push(const DiskLoc bucketLoc,
+ const BSONObj& key,
+ const DiskLoc child) {
+ KeyDataOwnedType k(key);
+ BucketType* bucket = _helper->btree.getBucket(_txn, bucketLoc);
- template <class OnDiskFormat>
- int ArtificialTreeBuilder<OnDiskFormat>::fillBucketToExactSize(
- const DiskLoc bucketLoc, int targetSize, char startKey) {
- ASSERT_FALSE(bucketLoc.isNull());
+ invariant(_helper->btree.pushBack(bucket, _helper->dummyDiskLoc, k, child));
+ _helper->btree.fixParentPtrs(_txn, bucket, bucketLoc);
+}
- BucketType* bucket = _helper->btree.getBucket(_txn, bucketLoc);
- ASSERT_EQUALS(0, bucket->n);
+template <class OnDiskFormat>
+void ArtificialTreeBuilder<OnDiskFormat>::checkStructure(const BSONObj& spec,
+ const DiskLoc node) const {
+ BucketType* bucket = _helper->btree.getBucket(_txn, node);
+
+ BSONObjIterator j(spec);
+ for (int i = 0; i < bucket->n; ++i) {
+ ASSERT(j.more());
+ BSONElement e = j.next();
+ KeyHeaderType kn = BtreeLogic<OnDiskFormat>::getKeyHeader(bucket, i);
+ string expected = expectedKey(e.fieldName());
+ ASSERT(isPresent(BSON("" << expected), 1));
+ ASSERT(isPresent(BSON("" << expected), -1));
+
+ // ASSERT_EQUALS(expected, kn.key.toBson().firstElement().valuestr());
+ if (kn.prevChildBucket.isNull()) {
+ ASSERT(e.type() == jstNULL);
+ } else {
+ ASSERT(e.type() == Object);
+ checkStructure(e.embeddedObject(), kn.prevChildBucket);
+ }
+ }
+ if (bucket->nextChild.isNull()) {
+ // maybe should allow '_' field with null value?
+ ASSERT(!j.more());
+ } else {
+ BSONElement e = j.next();
+ ASSERT_EQUALS(string("_"), e.fieldName());
+ ASSERT(e.type() == Object);
+ checkStructure(e.embeddedObject(), bucket->nextChild);
+ }
+ ASSERT(!j.more());
+}
- static const int bigSize = KeyDataOwnedType(simpleKey('a', 801)).dataSize();
+template <class OnDiskFormat>
+bool ArtificialTreeBuilder<OnDiskFormat>::isPresent(const BSONObj& key, int direction) const {
+ int pos;
+ DiskLoc loc;
+ OperationContextNoop txn;
+ return _helper->btree.locate(&txn, key, _helper->dummyDiskLoc, direction, &pos, &loc);
+}
- int size = 0;
- int keyCount = 0;
- while (size < targetSize) {
- int space = targetSize - size;
- int nextSize = space - sizeof(FixedWidthKeyType);
- verify(nextSize > 0);
+// Static
+template <class OnDiskFormat>
+string ArtificialTreeBuilder<OnDiskFormat>::expectedKey(const char* spec) {
+ if (spec[0] != '$') {
+ return spec;
+ }
+ char* endPtr;
- BSONObj newKey;
- if (nextSize >= bigSize) {
- newKey = simpleKey(startKey++, 801);
- }
- else {
- newKey = simpleKey(startKey++, nextSize - (bigSize - 801));
- }
+ // parsing a long long is a pain, so just allow shorter keys for now
+ unsigned long long num = strtol(spec + 1, &endPtr, 16);
+ int len = 800;
+ if (*endPtr == '$') {
+ len = strtol(endPtr + 1, 0, 16);
+ }
- push(bucketLoc, newKey, DiskLoc());
+ return bigNumString(num, len);
+}
- size += KeyDataOwnedType(newKey).dataSize() +
- sizeof(FixedWidthKeyType);
- keyCount += 1;
+template <class OnDiskFormat>
+int ArtificialTreeBuilder<OnDiskFormat>::fillBucketToExactSize(const DiskLoc bucketLoc,
+ int targetSize,
+ char startKey) {
+ ASSERT_FALSE(bucketLoc.isNull());
+
+ BucketType* bucket = _helper->btree.getBucket(_txn, bucketLoc);
+ ASSERT_EQUALS(0, bucket->n);
+
+ static const int bigSize = KeyDataOwnedType(simpleKey('a', 801)).dataSize();
+
+ int size = 0;
+ int keyCount = 0;
+ while (size < targetSize) {
+ int space = targetSize - size;
+ int nextSize = space - sizeof(FixedWidthKeyType);
+ verify(nextSize > 0);
+
+ BSONObj newKey;
+ if (nextSize >= bigSize) {
+ newKey = simpleKey(startKey++, 801);
+ } else {
+ newKey = simpleKey(startKey++, nextSize - (bigSize - 801));
}
- ASSERT_EQUALS(_helper->btree._packedDataSize(bucket, 0), targetSize);
+ push(bucketLoc, newKey, DiskLoc());
- return keyCount;
+ size += KeyDataOwnedType(newKey).dataSize() + sizeof(FixedWidthKeyType);
+ keyCount += 1;
}
- //
- // This causes actual code to be generated for the usages of the templates in this file.
- //
+ ASSERT_EQUALS(_helper->btree._packedDataSize(bucket, 0), targetSize);
+
+ return keyCount;
+}
+
+//
+// This causes actual code to be generated for the usages of the templates in this file.
+//
- // V0 format.
- template struct BtreeLogicTestHelper<BtreeLayoutV0>;
- template class ArtificialTreeBuilder<BtreeLayoutV0>;
+// V0 format.
+template struct BtreeLogicTestHelper<BtreeLayoutV0>;
+template class ArtificialTreeBuilder<BtreeLayoutV0>;
- // V1 format.
- template struct BtreeLogicTestHelper<BtreeLayoutV1>;
- template class ArtificialTreeBuilder<BtreeLayoutV1>;
+// V1 format.
+template struct BtreeLogicTestHelper<BtreeLayoutV1>;
+template class ArtificialTreeBuilder<BtreeLayoutV1>;
}
diff --git a/src/mongo/db/storage/mmap_v1/btree/btree_test_help.h b/src/mongo/db/storage/mmap_v1/btree/btree_test_help.h
index b282e72d827..5aeec516528 100644
--- a/src/mongo/db/storage/mmap_v1/btree/btree_test_help.h
+++ b/src/mongo/db/storage/mmap_v1/btree/btree_test_help.h
@@ -37,118 +37,114 @@
namespace mongo {
+/**
+ * Generates a string of the specified length containing repeated concatenation of the
+ * hexadecimal representation of the input value.
+ */
+std::string bigNumString(long long n, int len);
+
+/**
+ * Generates key on a field 'a', with the specified number of repetitions of the character.
+ */
+BSONObj simpleKey(char c, int n = 1);
+
+/**
+ * Simple head manager, which performs no validity checking or persistence.
+ */
+class TestHeadManager : public HeadManager {
+public:
+ virtual const RecordId getHead(OperationContext* txn) const {
+ return _head;
+ }
+
+ virtual void setHead(OperationContext* txn, const RecordId newHead) {
+ _head = newHead;
+ }
+
+private:
+ RecordId _head;
+};
+
+
+/**
+ * This structure encapsulates a Btree and all the infrastructure needed by it (head manager,
+ * record store and a valid disk location to use by the tests).
+ */
+template <class OnDiskFormat>
+struct BtreeLogicTestHelper {
+ BtreeLogicTestHelper(const BSONObj& order);
+
+ // Everything needed for a fully-functional Btree logic
+ TestHeadManager headManager;
+ HeapRecordStoreBtree recordStore;
+ SavedCursorRegistry cursorRegistry;
+ BtreeLogic<OnDiskFormat> btree;
+ DiskLoc dummyDiskLoc;
+};
+
+
+/**
+ * Tool to construct custom tree shapes for tests.
+ */
+template <class OnDiskFormat>
+class ArtificialTreeBuilder {
+public:
+ typedef typename BtreeLogic<OnDiskFormat>::BucketType BucketType;
+ typedef typename BtreeLogic<OnDiskFormat>::KeyDataOwnedType KeyDataOwnedType;
+ typedef typename BtreeLogic<OnDiskFormat>::KeyHeaderType KeyHeaderType;
+
+ typedef typename OnDiskFormat::FixedWidthKeyType FixedWidthKeyType;
+
/**
- * Generates a string of the specified length containing repeated concatenation of the
- * hexadecimal representation of the input value.
+ * The tree builder wraps around the passed-in helper and will invoke methods on it. It
+ * does not do any cleanup, so constructing multiple trees over the same helper will
+ * cause leaked records.
*/
- std::string bigNumString(long long n, int len);
+ ArtificialTreeBuilder(OperationContext* txn, BtreeLogicTestHelper<OnDiskFormat>* helper)
+ : _txn(txn), _helper(helper) {}
/**
- * Generates key on a field 'a', with the specified number of repetitions of the character.
+ * Causes the specified tree shape to be built on the associated helper and the tree's
+ * root installed as the head. Uses a custom JSON-based language with the following
+ * syntax:
+ *
+ * Btree := BTreeBucket
+ * BtreeBucket := { Child_1_Key: <BtreeBucket | null>,
+ * Child_2_Key: <BtreeBucket | null>,
+ * ...,
+ * _: <BtreeBucket | null> }
+ *
+ * The _ key name specifies the content of the nextChild pointer. The value null means
+ * use a fixed disk loc.
*/
- BSONObj simpleKey(char c, int n = 1);
+ void makeTree(const std::string& spec);
/**
- * Simple head manager, which performs no validity checking or persistence.
+ * Validates that the structure of the Btree in the helper matches the specification.
*/
- class TestHeadManager : public HeadManager {
- public:
- virtual const RecordId getHead( OperationContext* txn ) const {
- return _head;
- }
-
- virtual void setHead(OperationContext* txn, const RecordId newHead) {
- _head = newHead;
- }
-
- private:
- RecordId _head;
- };
+ void checkStructure(const std::string& spec) const;
+ /**
+ * Adds the following key to the bucket and fixes up the child pointers.
+ */
+ void push(const DiskLoc bucketLoc, const BSONObj& key, const DiskLoc child);
/**
- * This structure encapsulates a Btree and all the infrastructure needed by it (head manager,
- * record store and a valid disk location to use by the tests).
+ * @return The number of keys inserted.
*/
- template <class OnDiskFormat>
- struct BtreeLogicTestHelper {
- BtreeLogicTestHelper(const BSONObj& order);
+ int fillBucketToExactSize(const DiskLoc bucketLoc, int targetSize, char startKey);
- // Everything needed for a fully-functional Btree logic
- TestHeadManager headManager;
- HeapRecordStoreBtree recordStore;
- SavedCursorRegistry cursorRegistry;
- BtreeLogic<OnDiskFormat> btree;
- DiskLoc dummyDiskLoc;
- };
+private:
+ DiskLoc makeTree(const BSONObj& spec);
+ void checkStructure(const BSONObj& spec, const DiskLoc node) const;
- /**
- * Tool to construct custom tree shapes for tests.
- */
- template <class OnDiskFormat>
- class ArtificialTreeBuilder {
- public:
-
- typedef typename BtreeLogic<OnDiskFormat>::BucketType BucketType;
- typedef typename BtreeLogic<OnDiskFormat>::KeyDataOwnedType KeyDataOwnedType;
- typedef typename BtreeLogic<OnDiskFormat>::KeyHeaderType KeyHeaderType;
-
- typedef typename OnDiskFormat::FixedWidthKeyType FixedWidthKeyType;
-
- /**
- * The tree builder wraps around the passed-in helper and will invoke methods on it. It
- * does not do any cleanup, so constructing multiple trees over the same helper will
- * cause leaked records.
- */
- ArtificialTreeBuilder(OperationContext* txn,
- BtreeLogicTestHelper<OnDiskFormat>* helper)
- : _txn(txn), _helper(helper) {
-
- }
-
- /**
- * Causes the specified tree shape to be built on the associated helper and the tree's
- * root installed as the head. Uses a custom JSON-based language with the following
- * syntax:
- *
- * Btree := BTreeBucket
- * BtreeBucket := { Child_1_Key: <BtreeBucket | null>,
- * Child_2_Key: <BtreeBucket | null>,
- * ...,
- * _: <BtreeBucket | null> }
- *
- * The _ key name specifies the content of the nextChild pointer. The value null means
- * use a fixed disk loc.
- */
- void makeTree(const std::string& spec);
-
- /**
- * Validates that the structure of the Btree in the helper matches the specification.
- */
- void checkStructure(const std::string& spec) const;
-
- /**
- * Adds the following key to the bucket and fixes up the child pointers.
- */
- void push(const DiskLoc bucketLoc, const BSONObj& key, const DiskLoc child);
-
- /**
- * @return The number of keys inserted.
- */
- int fillBucketToExactSize(const DiskLoc bucketLoc, int targetSize, char startKey);
-
- private:
- DiskLoc makeTree(const BSONObj& spec);
-
- void checkStructure(const BSONObj& spec, const DiskLoc node) const;
-
- bool isPresent(const BSONObj& key, int direction) const;
-
- static std::string expectedKey(const char* spec);
-
- OperationContext* _txn;
- BtreeLogicTestHelper<OnDiskFormat>* _helper;
- };
-
-} // namespace mongo
+ bool isPresent(const BSONObj& key, int direction) const;
+
+ static std::string expectedKey(const char* spec);
+
+ OperationContext* _txn;
+ BtreeLogicTestHelper<OnDiskFormat>* _helper;
+};
+
+} // namespace mongo
diff --git a/src/mongo/db/storage/mmap_v1/btree/key.cpp b/src/mongo/db/storage/mmap_v1/btree/key.cpp
index 5cc1afbdc69..cbb89d8fab9 100644
--- a/src/mongo/db/storage/mmap_v1/btree/key.cpp
+++ b/src/mongo/db/storage/mmap_v1/btree/key.cpp
@@ -39,26 +39,26 @@
namespace mongo {
- using std::endl;
- using std::numeric_limits;
- using std::min;
+using std::endl;
+using std::numeric_limits;
+using std::min;
- extern const Ordering nullOrdering = Ordering::make(BSONObj());
+extern const Ordering nullOrdering = Ordering::make(BSONObj());
- // KeyBson is for V0 (version #0) indexes
+// KeyBson is for V0 (version #0) indexes
- int oldCompare(const BSONObj& l,const BSONObj& r, const Ordering &o);
+int oldCompare(const BSONObj& l, const BSONObj& r, const Ordering& o);
- // "old" = pre signed dates & such; i.e. btree V0
- /* must be same canon type when called */
- int oldCompareElementValues(const BSONElement& l, const BSONElement& r) {
- dassert( l.canonicalType() == r.canonicalType() );
- int f;
- double x;
+// "old" = pre signed dates & such; i.e. btree V0
+/* must be same canon type when called */
+int oldCompareElementValues(const BSONElement& l, const BSONElement& r) {
+ dassert(l.canonicalType() == r.canonicalType());
+ int f;
+ double x;
- switch ( l.type() ) {
+ switch (l.type()) {
case EOO:
- case Undefined: // EOO and Undefined are same canonicalType
+ case Undefined: // EOO and Undefined are same canonicalType
case jstNULL:
case MaxKey:
case MinKey:
@@ -75,35 +75,36 @@ namespace mongo {
return lULL == rULL ? 0 : 1;
}
case NumberLong:
- if( r.type() == NumberLong ) {
+ if (r.type() == NumberLong) {
long long L = l._numberLong();
long long R = r._numberLong();
- if( L < R ) return -1;
- if( L == R ) return 0;
+ if (L < R)
+ return -1;
+ if (L == R)
+ return 0;
return 1;
}
- // else fall through
+ // else fall through
case NumberInt:
case NumberDouble: {
double left = l.number();
double right = r.number();
- bool lNan = !( left <= numeric_limits< double >::max() &&
- left >= -numeric_limits< double >::max() );
- bool rNan = !( right <= numeric_limits< double >::max() &&
- right >= -numeric_limits< double >::max() );
- if ( lNan ) {
- if ( rNan ) {
+ bool lNan =
+ !(left <= numeric_limits<double>::max() && left >= -numeric_limits<double>::max());
+ bool rNan = !(right <= numeric_limits<double>::max() &&
+ right >= -numeric_limits<double>::max());
+ if (lNan) {
+ if (rNan) {
return 0;
- }
- else {
+ } else {
return -1;
}
- }
- else if ( rNan ) {
+ } else if (rNan) {
return 1;
}
x = left - right;
- if ( x < 0 ) return -1;
+ if (x < 0)
+ return -1;
return x == 0 ? 0 : 1;
}
case jstOID:
@@ -119,562 +120,569 @@ namespace mongo {
case DBRef: {
int lsz = l.valuesize();
int rsz = r.valuesize();
- if ( lsz - rsz != 0 ) return lsz - rsz;
+ if (lsz - rsz != 0)
+ return lsz - rsz;
return memcmp(l.value(), r.value(), lsz);
}
case BinData: {
- int lsz = l.objsize(); // our bin data size in bytes, not including the subtype byte
+ int lsz = l.objsize(); // our bin data size in bytes, not including the subtype byte
int rsz = r.objsize();
- if ( lsz - rsz != 0 ) return lsz - rsz;
- return memcmp(l.value()+4, r.value()+4, lsz+1);
+ if (lsz - rsz != 0)
+ return lsz - rsz;
+ return memcmp(l.value() + 4, r.value() + 4, lsz + 1);
}
case RegEx: {
int c = strcmp(l.regex(), r.regex());
- if ( c )
+ if (c)
return c;
return strcmp(l.regexFlags(), r.regexFlags());
}
- case CodeWScope : {
+ case CodeWScope: {
f = l.canonicalType() - r.canonicalType();
- if ( f )
+ if (f)
return f;
- f = strcmp( l.codeWScopeCode() , r.codeWScopeCode() );
- if ( f )
+ f = strcmp(l.codeWScopeCode(), r.codeWScopeCode());
+ if (f)
return f;
- f = strcmp( l.codeWScopeScopeDataUnsafe() , r.codeWScopeScopeDataUnsafe() );
- if ( f )
+ f = strcmp(l.codeWScopeScopeDataUnsafe(), r.codeWScopeScopeDataUnsafe());
+ if (f)
return f;
return 0;
}
default:
- log() << "oldCompareElementValues: bad type " << (int) l.type() << endl;
+ log() << "oldCompareElementValues: bad type " << (int)l.type() << endl;
verify(false);
- }
- return -1;
- }
-
- int oldElemCompare(const BSONElement&l , const BSONElement& r) {
- int lt = (int) l.canonicalType();
- int rt = (int) r.canonicalType();
- int x = lt - rt;
- if( x )
- return x;
- return oldCompareElementValues(l, r);
}
-
- // pre signed dates & such
- int oldCompare(const BSONObj& l,const BSONObj& r, const Ordering &o) {
- BSONObjIterator i(l);
- BSONObjIterator j(r);
- unsigned mask = 1;
- while ( 1 ) {
- // so far, equal...
-
- BSONElement l = i.next();
- BSONElement r = j.next();
- if ( l.eoo() )
- return r.eoo() ? 0 : -1;
- if ( r.eoo() )
- return 1;
-
- int x;
- {
- x = oldElemCompare(l, r);
- if( o.descending(mask) )
- x = -x;
- }
- if ( x != 0 )
- return x;
- mask <<= 1;
+ return -1;
+}
+
+int oldElemCompare(const BSONElement& l, const BSONElement& r) {
+ int lt = (int)l.canonicalType();
+ int rt = (int)r.canonicalType();
+ int x = lt - rt;
+ if (x)
+ return x;
+ return oldCompareElementValues(l, r);
+}
+
+// pre signed dates & such
+int oldCompare(const BSONObj& l, const BSONObj& r, const Ordering& o) {
+ BSONObjIterator i(l);
+ BSONObjIterator j(r);
+ unsigned mask = 1;
+ while (1) {
+ // so far, equal...
+
+ BSONElement l = i.next();
+ BSONElement r = j.next();
+ if (l.eoo())
+ return r.eoo() ? 0 : -1;
+ if (r.eoo())
+ return 1;
+
+ int x;
+ {
+ x = oldElemCompare(l, r);
+ if (o.descending(mask))
+ x = -x;
}
- return -1;
- }
-
- /* old style compares:
- - dates are unsigned
- - strings no nulls
- */
- int KeyBson::woCompare(const KeyBson& r, const Ordering &o) const {
- return oldCompare(_o, r._o, o);
- }
-
- // woEqual could be made faster than woCompare but this is for backward compatibility so not worth a big effort
- bool KeyBson::woEqual(const KeyBson& r) const {
- return oldCompare(_o, r._o, nullOrdering) == 0;
- }
-
- // [ ][HASMORE][x][y][canontype_4bits]
- enum CanonicalsEtc {
- cminkey=1,
- cnull=2,
- cdouble=4,
- cstring=6,
- cbindata=7,
- coid=8,
- cfalse=10,
- ctrue=11,
- cdate=12,
- cmaxkey=14,
- cCANONTYPEMASK = 0xf,
- cY = 0x10,
- cint = cY | cdouble,
- cX = 0x20,
- clong = cX | cdouble,
- cHASMORE = 0x40,
- cNOTUSED = 0x80 // but see IsBSON sentinel - this bit not usable without great care
- };
-
- // bindata bson type
- const unsigned BinDataLenMask = 0xf0; // lengths are powers of 2 of this value
- const unsigned BinDataTypeMask = 0x0f; // 0-7 as you would expect, 8-15 are 128+value. see BinDataType.
- const int BinDataLenMax = 32;
- const int BinDataLengthToCode[] = {
- 0x00, 0x10, 0x20, 0x30, 0x40, 0x50, 0x60, 0x70,
- 0x80, -1/*9*/, 0x90/*10*/, -1/*11*/, 0xa0/*12*/, -1/*13*/, 0xb0/*14*/, -1/*15*/,
- 0xc0/*16*/, -1, -1, -1, 0xd0/*20*/, -1, -1, -1,
- 0xe0/*24*/, -1, -1, -1, -1, -1, -1, -1,
- 0xf0/*32*/
- };
- const int BinDataCodeToLength[] = {
- 0, 1, 2, 3, 4, 5, 6, 7, 8, 10, 12, 14, 16, 20, 24, 32
- };
-
- int binDataCodeToLength(int codeByte) {
- return BinDataCodeToLength[codeByte >> 4];
- }
-
- /** object cannot be represented in compact format. so store in traditional bson format
- with a leading sentinel byte IsBSON to indicate it's in that format.
-
- Given that the KeyV1Owned constructor already grabbed a bufbuilder, we reuse it here
- so that we don't have to do an extra malloc.
- */
- void KeyV1Owned::traditional(const BSONObj& obj) {
- b.reset();
- b.appendUChar(IsBSON);
- b.appendBuf(obj.objdata(), obj.objsize());
- _keyData = (const unsigned char *) b.buf();
- }
-
- KeyV1Owned::KeyV1Owned(const KeyV1& rhs) {
- b.appendBuf( rhs.data(), rhs.dataSize() );
- _keyData = (const unsigned char *) b.buf();
- dassert( b.len() == dataSize() ); // check datasize method is correct
- dassert( (*_keyData & cNOTUSED) == 0 );
+ if (x != 0)
+ return x;
+ mask <<= 1;
}
-
- // fromBSON to Key format
- KeyV1Owned::KeyV1Owned(const BSONObj& obj) {
- BSONObj::iterator i(obj);
- unsigned char bits = 0;
- while( 1 ) {
- BSONElement e = i.next();
- if( i.more() )
- bits |= cHASMORE;
- switch( e.type() ) {
+ return -1;
+}
+
+/* old style compares:
+ - dates are unsigned
+ - strings no nulls
+*/
+int KeyBson::woCompare(const KeyBson& r, const Ordering& o) const {
+ return oldCompare(_o, r._o, o);
+}
+
+// woEqual could be made faster than woCompare but this is for backward compatibility so not worth a big effort
+bool KeyBson::woEqual(const KeyBson& r) const {
+ return oldCompare(_o, r._o, nullOrdering) == 0;
+}
+
+// [ ][HASMORE][x][y][canontype_4bits]
+enum CanonicalsEtc {
+ cminkey = 1,
+ cnull = 2,
+ cdouble = 4,
+ cstring = 6,
+ cbindata = 7,
+ coid = 8,
+ cfalse = 10,
+ ctrue = 11,
+ cdate = 12,
+ cmaxkey = 14,
+ cCANONTYPEMASK = 0xf,
+ cY = 0x10,
+ cint = cY | cdouble,
+ cX = 0x20,
+ clong = cX | cdouble,
+ cHASMORE = 0x40,
+ cNOTUSED = 0x80 // but see IsBSON sentinel - this bit not usable without great care
+};
+
+// bindata bson type
+const unsigned BinDataLenMask = 0xf0; // lengths are powers of 2 of this value
+const unsigned BinDataTypeMask =
+ 0x0f; // 0-7 as you would expect, 8-15 are 128+value. see BinDataType.
+const int BinDataLenMax = 32;
+const int BinDataLengthToCode[] = {
+ 0x00, 0x10, 0x20, 0x30, 0x40, 0x50, 0x60,
+ 0x70, 0x80, -1 /*9*/, 0x90 /*10*/, -1 /*11*/, 0xa0 /*12*/, -1 /*13*/,
+ 0xb0 /*14*/, -1 /*15*/, 0xc0 /*16*/, -1, -1, -1, 0xd0 /*20*/,
+ -1, -1, -1, 0xe0 /*24*/, -1, -1, -1,
+ -1, -1, -1, -1, 0xf0 /*32*/
+};
+const int BinDataCodeToLength[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 10, 12, 14, 16, 20, 24, 32};
+
+int binDataCodeToLength(int codeByte) {
+ return BinDataCodeToLength[codeByte >> 4];
+}
+
+/** object cannot be represented in compact format. so store in traditional bson format
+ with a leading sentinel byte IsBSON to indicate it's in that format.
+
+ Given that the KeyV1Owned constructor already grabbed a bufbuilder, we reuse it here
+ so that we don't have to do an extra malloc.
+*/
+void KeyV1Owned::traditional(const BSONObj& obj) {
+ b.reset();
+ b.appendUChar(IsBSON);
+ b.appendBuf(obj.objdata(), obj.objsize());
+ _keyData = (const unsigned char*)b.buf();
+}
+
+KeyV1Owned::KeyV1Owned(const KeyV1& rhs) {
+ b.appendBuf(rhs.data(), rhs.dataSize());
+ _keyData = (const unsigned char*)b.buf();
+ dassert(b.len() == dataSize()); // check datasize method is correct
+ dassert((*_keyData & cNOTUSED) == 0);
+}
+
+// fromBSON to Key format
+KeyV1Owned::KeyV1Owned(const BSONObj& obj) {
+ BSONObj::iterator i(obj);
+ unsigned char bits = 0;
+ while (1) {
+ BSONElement e = i.next();
+ if (i.more())
+ bits |= cHASMORE;
+ switch (e.type()) {
case MinKey:
- b.appendUChar(cminkey|bits);
+ b.appendUChar(cminkey | bits);
break;
case jstNULL:
- b.appendUChar(cnull|bits);
+ b.appendUChar(cnull | bits);
break;
case MaxKey:
- b.appendUChar(cmaxkey|bits);
+ b.appendUChar(cmaxkey | bits);
break;
case Bool:
- b.appendUChar( (e.boolean()?ctrue:cfalse) | bits );
+ b.appendUChar((e.boolean() ? ctrue : cfalse) | bits);
break;
case jstOID:
- b.appendUChar(coid|bits);
+ b.appendUChar(coid | bits);
b.appendBuf(e.__oid().view().view(), OID::kOIDSize);
break;
- case BinData:
- {
- int t = e.binDataType();
- // 0-7 and 0x80 to 0x87 are supported by Key
- if( (t & 0x78) == 0 && t != ByteArrayDeprecated ) {
- int len;
- const char * d = e.binData(len);
- if( len <= BinDataLenMax ) {
- int code = BinDataLengthToCode[len];
- if( code >= 0 ) {
- if( t >= 128 )
- t = (t-128) | 0x08;
- dassert( (code&t) == 0 );
- b.appendUChar( cbindata|bits );
- b.appendUChar( code | t );
- b.appendBuf(d, len);
- break;
- }
+ case BinData: {
+ int t = e.binDataType();
+ // 0-7 and 0x80 to 0x87 are supported by Key
+ if ((t & 0x78) == 0 && t != ByteArrayDeprecated) {
+ int len;
+ const char* d = e.binData(len);
+ if (len <= BinDataLenMax) {
+ int code = BinDataLengthToCode[len];
+ if (code >= 0) {
+ if (t >= 128)
+ t = (t - 128) | 0x08;
+ dassert((code & t) == 0);
+ b.appendUChar(cbindata | bits);
+ b.appendUChar(code | t);
+ b.appendBuf(d, len);
+ break;
}
}
- traditional(obj);
- return;
}
+ traditional(obj);
+ return;
+ }
case Date:
- b.appendUChar(cdate|bits);
+ b.appendUChar(cdate | bits);
b.appendStruct(e.date());
break;
- case String:
- {
- b.appendUChar(cstring|bits);
- // note we do not store the terminating null, to save space.
- unsigned x = (unsigned) e.valuestrsize() - 1;
- if( x > 255 ) {
- traditional(obj);
- return;
- }
- b.appendUChar(x);
- b.appendBuf(e.valuestr(), x);
- break;
+ case String: {
+ b.appendUChar(cstring | bits);
+ // note we do not store the terminating null, to save space.
+ unsigned x = (unsigned)e.valuestrsize() - 1;
+ if (x > 255) {
+ traditional(obj);
+ return;
}
+ b.appendUChar(x);
+ b.appendBuf(e.valuestr(), x);
+ break;
+ }
case NumberInt:
- b.appendUChar(cint|bits);
- b.appendNum((double) e._numberInt());
+ b.appendUChar(cint | bits);
+ b.appendNum((double)e._numberInt());
break;
- case NumberLong:
- {
- long long n = e._numberLong();
- long long m = 2LL << 52;
- DEV {
- long long d = m-1;
- verify( ((long long) ((double) -d)) == -d );
- }
- if( n >= m || n <= -m ) {
- // can't represent exactly as a double
- traditional(obj);
- return;
- }
- b.appendUChar(clong|bits);
- b.appendNum((double) n);
- break;
+ case NumberLong: {
+ long long n = e._numberLong();
+ long long m = 2LL << 52;
+ DEV {
+ long long d = m - 1;
+ verify(((long long)((double)-d)) == -d);
}
- case NumberDouble:
- {
- double d = e._numberDouble();
- if( std::isnan(d) ) {
- traditional(obj);
- return;
- }
- b.appendUChar(cdouble|bits);
- b.appendNum(d);
- break;
+ if (n >= m || n <= -m) {
+ // can't represent exactly as a double
+ traditional(obj);
+ return;
+ }
+ b.appendUChar(clong | bits);
+ b.appendNum((double)n);
+ break;
+ }
+ case NumberDouble: {
+ double d = e._numberDouble();
+ if (std::isnan(d)) {
+ traditional(obj);
+ return;
}
+ b.appendUChar(cdouble | bits);
+ b.appendNum(d);
+ break;
+ }
default:
// if other types involved, store as traditional BSON
traditional(obj);
return;
- }
- if( !i.more() )
- break;
- bits = 0;
}
- _keyData = (const unsigned char *) b.buf();
- dassert( b.len() == dataSize() ); // check datasize method is correct
- dassert( (*_keyData & cNOTUSED) == 0 );
+ if (!i.more())
+ break;
+ bits = 0;
}
-
- BSONObj KeyV1::toBson() const {
- verify( _keyData != 0 );
- if( !isCompactFormat() )
- return bson();
-
- BSONObjBuilder b(512);
- const unsigned char *p = _keyData;
- while( 1 ) {
- unsigned bits = *p++;
-
- switch( bits & 0x3f ) {
- case cminkey: b.appendMinKey(""); break;
- case cnull: b.appendNull(""); break;
- case cfalse: b.appendBool("", false); break;
- case ctrue: b.appendBool("", true); break;
- case cmaxkey:
- b.appendMaxKey("");
- break;
- case cstring:
- {
- unsigned sz = *p++;
- // we build the element ourself as we have to null terminate it
- BufBuilder &bb = b.bb();
- bb.appendNum((char) String);
- bb.appendUChar(0); // fieldname ""
- bb.appendNum(sz+1);
- bb.appendBuf(p, sz);
- bb.appendUChar(0); // null char at end of string
- p += sz;
- break;
- }
- case coid:
- {
- OID oid = OID::from(p);
- b.appendOID("", &oid);
- p += OID::kOIDSize;
- break;
- }
- case cbindata:
- {
- int len = binDataCodeToLength(*p);
- int subtype = (*p) & BinDataTypeMask;
- if( subtype & 0x8 ) {
- subtype = (subtype & 0x7) | 0x80;
- }
- b.appendBinData("", len, (BinDataType) subtype, ++p);
- p += len;
- break;
- }
- case cdate:
- b.appendDate("", (Date_t&) *p);
- p += 8;
- break;
- case cdouble:
- b.append("", (double&) *p);
- p += sizeof(double);
- break;
- case cint:
- b.append("", static_cast< int >((reinterpret_cast< const PackedDouble& >(*p)).d));
- p += sizeof(double);
- break;
- case clong:
- b.append("", static_cast< long long>((reinterpret_cast< const PackedDouble& >(*p)).d));
- p += sizeof(double);
- break;
- default:
- verify(false);
- }
-
- if( (bits & cHASMORE) == 0 )
+ _keyData = (const unsigned char*)b.buf();
+ dassert(b.len() == dataSize()); // check datasize method is correct
+ dassert((*_keyData & cNOTUSED) == 0);
+}
+
+BSONObj KeyV1::toBson() const {
+ verify(_keyData != 0);
+ if (!isCompactFormat())
+ return bson();
+
+ BSONObjBuilder b(512);
+ const unsigned char* p = _keyData;
+ while (1) {
+ unsigned bits = *p++;
+
+ switch (bits & 0x3f) {
+ case cminkey:
+ b.appendMinKey("");
break;
- }
- return b.obj();
- }
-
- static int compare(const unsigned char *&l, const unsigned char *&r) {
- int lt = (*l & cCANONTYPEMASK);
- int rt = (*r & cCANONTYPEMASK);
- int x = lt - rt;
- if( x )
- return x;
-
- l++; r++;
-
- // same type
- switch( lt ) {
- case cdouble:
- {
- double L = (reinterpret_cast< const PackedDouble* >(l))->d;
- double R = (reinterpret_cast< const PackedDouble* >(r))->d;
- if( L < R )
- return -1;
- if( L != R )
- return 1;
- l += 8; r += 8;
+ case cnull:
+ b.appendNull("");
+ break;
+ case cfalse:
+ b.appendBool("", false);
+ break;
+ case ctrue:
+ b.appendBool("", true);
+ break;
+ case cmaxkey:
+ b.appendMaxKey("");
+ break;
+ case cstring: {
+ unsigned sz = *p++;
+ // we build the element ourself as we have to null terminate it
+ BufBuilder& bb = b.bb();
+ bb.appendNum((char)String);
+ bb.appendUChar(0); // fieldname ""
+ bb.appendNum(sz + 1);
+ bb.appendBuf(p, sz);
+ bb.appendUChar(0); // null char at end of string
+ p += sz;
break;
}
- case cstring:
- {
- int lsz = *l;
- int rsz = *r;
- int common = min(lsz, rsz);
- l++; r++; // skip the size byte
- // use memcmp as we (will) allow zeros in UTF8 strings
- int res = memcmp(l, r, common);
- if( res )
- return res;
- // longer string is the greater one
- int diff = lsz-rsz;
- if( diff )
- return diff;
- l += lsz; r += lsz;
+ case coid: {
+ OID oid = OID::from(p);
+ b.appendOID("", &oid);
+ p += OID::kOIDSize;
break;
}
- case cbindata:
- {
- int L = *l;
- int R = *r;
- int llen = binDataCodeToLength(L);
- int diff = L-R; // checks length and subtype simultaneously
- if( diff ) {
- // unfortunately nibbles are backwards to do subtype and len in one check (could bit swap...)
- int rlen = binDataCodeToLength(R);
- if( llen != rlen )
- return llen - rlen;
- return diff;
+ case cbindata: {
+ int len = binDataCodeToLength(*p);
+ int subtype = (*p) & BinDataTypeMask;
+ if (subtype & 0x8) {
+ subtype = (subtype & 0x7) | 0x80;
}
- // same length, same type
- l++; r++;
- int res = memcmp(l, r, llen);
- if( res )
- return res;
- l += llen; r += llen;
+ b.appendBinData("", len, (BinDataType)subtype, ++p);
+ p += len;
break;
}
- case cdate:
- {
- long long L = *((long long *) l);
- long long R = *((long long *) r);
- if( L < R )
- return -1;
- if( L > R )
- return 1;
- l += 8; r += 8;
+ case cdate:
+ b.appendDate("", (Date_t&)*p);
+ p += 8;
break;
- }
- case coid:
- {
- int res = memcmp(l, r, OID::kOIDSize);
- if( res )
- return res;
- l += OID::kOIDSize; r += OID::kOIDSize;
+ case cdouble:
+ b.append("", (double&)*p);
+ p += sizeof(double);
+ break;
+ case cint:
+ b.append("", static_cast<int>((reinterpret_cast<const PackedDouble&>(*p)).d));
+ p += sizeof(double);
break;
+ case clong:
+ b.append("", static_cast<long long>((reinterpret_cast<const PackedDouble&>(*p)).d));
+ p += sizeof(double);
+ break;
+ default:
+ verify(false);
+ }
+
+ if ((bits & cHASMORE) == 0)
+ break;
+ }
+ return b.obj();
+}
+
+static int compare(const unsigned char*& l, const unsigned char*& r) {
+ int lt = (*l & cCANONTYPEMASK);
+ int rt = (*r & cCANONTYPEMASK);
+ int x = lt - rt;
+ if (x)
+ return x;
+
+ l++;
+ r++;
+
+ // same type
+ switch (lt) {
+ case cdouble: {
+ double L = (reinterpret_cast<const PackedDouble*>(l))->d;
+ double R = (reinterpret_cast<const PackedDouble*>(r))->d;
+ if (L < R)
+ return -1;
+ if (L != R)
+ return 1;
+ l += 8;
+ r += 8;
+ break;
+ }
+ case cstring: {
+ int lsz = *l;
+ int rsz = *r;
+ int common = min(lsz, rsz);
+ l++;
+ r++; // skip the size byte
+ // use memcmp as we (will) allow zeros in UTF8 strings
+ int res = memcmp(l, r, common);
+ if (res)
+ return res;
+ // longer string is the greater one
+ int diff = lsz - rsz;
+ if (diff)
+ return diff;
+ l += lsz;
+ r += lsz;
+ break;
+ }
+ case cbindata: {
+ int L = *l;
+ int R = *r;
+ int llen = binDataCodeToLength(L);
+ int diff = L - R; // checks length and subtype simultaneously
+ if (diff) {
+ // unfortunately nibbles are backwards to do subtype and len in one check (could bit swap...)
+ int rlen = binDataCodeToLength(R);
+ if (llen != rlen)
+ return llen - rlen;
+ return diff;
}
+ // same length, same type
+ l++;
+ r++;
+ int res = memcmp(l, r, llen);
+ if (res)
+ return res;
+ l += llen;
+ r += llen;
+ break;
+ }
+ case cdate: {
+ long long L = *((long long*)l);
+ long long R = *((long long*)r);
+ if (L < R)
+ return -1;
+ if (L > R)
+ return 1;
+ l += 8;
+ r += 8;
+ break;
+ }
+ case coid: {
+ int res = memcmp(l, r, OID::kOIDSize);
+ if (res)
+ return res;
+ l += OID::kOIDSize;
+ r += OID::kOIDSize;
+ break;
+ }
default:
// all the others are a match -- e.g. null == null
;
- }
-
- return 0;
- }
-
- // at least one of this and right are traditional BSON format
- int NOINLINE_DECL KeyV1::compareHybrid(const KeyV1& right, const Ordering& order) const {
- BSONObj L = toBson();
- BSONObj R = right.toBson();
- return L.woCompare(R, order, /*considerfieldname*/false);
}
- int KeyV1::woCompare(const KeyV1& right, const Ordering &order) const {
- const unsigned char *l = _keyData;
- const unsigned char *r = right._keyData;
-
- if( (*l|*r) == IsBSON ) // only can do this if cNOTUSED maintained
- return compareHybrid(right, order);
-
- unsigned mask = 1;
- while( 1 ) {
- char lval = *l;
- char rval = *r;
- {
- int x = compare(l, r); // updates l and r pointers
- if( x ) {
- if( order.descending(mask) )
- x = -x;
- return x;
- }
- }
-
- {
- int x = ((int)(lval & cHASMORE)) - ((int)(rval & cHASMORE));
- if( x )
- return x;
- if( (lval & cHASMORE) == 0 )
- break;
+ return 0;
+}
+
+// at least one of this and right are traditional BSON format
+int NOINLINE_DECL KeyV1::compareHybrid(const KeyV1& right, const Ordering& order) const {
+ BSONObj L = toBson();
+ BSONObj R = right.toBson();
+ return L.woCompare(R, order, /*considerfieldname*/ false);
+}
+
+int KeyV1::woCompare(const KeyV1& right, const Ordering& order) const {
+ const unsigned char* l = _keyData;
+ const unsigned char* r = right._keyData;
+
+ if ((*l | *r) == IsBSON) // only can do this if cNOTUSED maintained
+ return compareHybrid(right, order);
+
+ unsigned mask = 1;
+ while (1) {
+ char lval = *l;
+ char rval = *r;
+ {
+ int x = compare(l, r); // updates l and r pointers
+ if (x) {
+ if (order.descending(mask))
+ x = -x;
+ return x;
}
+ }
- mask <<= 1;
+ {
+ int x = ((int)(lval & cHASMORE)) - ((int)(rval & cHASMORE));
+ if (x)
+ return x;
+ if ((lval & cHASMORE) == 0)
+ break;
}
- return 0;
+ mask <<= 1;
}
- static unsigned sizes[] = {
- 0,
- 1, //cminkey=1,
- 1, //cnull=2,
- 0,
- 9, //cdouble=4,
- 0,
- 0, //cstring=6,
- 0,
- 13, //coid=8,
- 0,
- 1, //cfalse=10,
- 1, //ctrue=11,
- 9, //cdate=12,
- 0,
- 1, //cmaxkey=14,
- 0
- };
-
- inline unsigned sizeOfElement(const unsigned char *p) {
- unsigned type = *p & cCANONTYPEMASK;
- unsigned sz = sizes[type];
- if( sz == 0 ) {
- if( type == cstring ) {
- sz = ((unsigned) p[1]) + 2;
- }
- else {
- verify( type == cbindata );
- sz = binDataCodeToLength(p[1]) + 2;
- }
+ return 0;
+}
+
+static unsigned sizes[] = {0,
+ 1, // cminkey=1,
+ 1, // cnull=2,
+ 0,
+ 9, // cdouble=4,
+ 0,
+ 0, // cstring=6,
+ 0,
+ 13, // coid=8,
+ 0,
+ 1, // cfalse=10,
+ 1, // ctrue=11,
+ 9, // cdate=12,
+ 0,
+ 1, // cmaxkey=14,
+ 0};
+
+inline unsigned sizeOfElement(const unsigned char* p) {
+ unsigned type = *p & cCANONTYPEMASK;
+ unsigned sz = sizes[type];
+ if (sz == 0) {
+ if (type == cstring) {
+ sz = ((unsigned)p[1]) + 2;
+ } else {
+ verify(type == cbindata);
+ sz = binDataCodeToLength(p[1]) + 2;
}
- return sz;
}
+ return sz;
+}
- int KeyV1::dataSize() const {
- const unsigned char *p = _keyData;
- if( !isCompactFormat() ) {
- return bson().objsize() + 1;
- }
-
- bool more;
- do {
- unsigned z = sizeOfElement(p);
- more = (*p & cHASMORE) != 0;
- p += z;
- } while( more );
- return p - _keyData;
+int KeyV1::dataSize() const {
+ const unsigned char* p = _keyData;
+ if (!isCompactFormat()) {
+ return bson().objsize() + 1;
}
- bool KeyV1::woEqual(const KeyV1& right) const {
- const unsigned char *l = _keyData;
- const unsigned char *r = right._keyData;
-
- if( (*l|*r) == IsBSON ) {
- return toBson().equal(right.toBson());
- }
+ bool more;
+ do {
+ unsigned z = sizeOfElement(p);
+ more = (*p & cHASMORE) != 0;
+ p += z;
+ } while (more);
+ return p - _keyData;
+}
+
+bool KeyV1::woEqual(const KeyV1& right) const {
+ const unsigned char* l = _keyData;
+ const unsigned char* r = right._keyData;
+
+ if ((*l | *r) == IsBSON) {
+ return toBson().equal(right.toBson());
+ }
- while( 1 ) {
- char lval = *l;
- char rval = *r;
- if( (lval&(cCANONTYPEMASK|cHASMORE)) != (rval&(cCANONTYPEMASK|cHASMORE)) )
- return false;
- l++; r++;
- switch( lval&cCANONTYPEMASK ) {
+ while (1) {
+ char lval = *l;
+ char rval = *r;
+ if ((lval & (cCANONTYPEMASK | cHASMORE)) != (rval & (cCANONTYPEMASK | cHASMORE)))
+ return false;
+ l++;
+ r++;
+ switch (lval & cCANONTYPEMASK) {
case coid:
- if( *((unsigned*) l) != *((unsigned*) r) )
+ if (*((unsigned*)l) != *((unsigned*)r))
return false;
- l += 4; r += 4;
+ l += 4;
+ r += 4;
case cdate:
- if( *((unsigned long long *) l) != *((unsigned long long *) r) )
+ if (*((unsigned long long*)l) != *((unsigned long long*)r))
return false;
- l += 8; r += 8;
+ l += 8;
+ r += 8;
break;
case cdouble:
- if( (reinterpret_cast< const PackedDouble* > (l))->d != (reinterpret_cast< const PackedDouble* >(r))->d )
+ if ((reinterpret_cast<const PackedDouble*>(l))->d !=
+ (reinterpret_cast<const PackedDouble*>(r))->d)
return false;
- l += 8; r += 8;
+ l += 8;
+ r += 8;
break;
- case cstring:
- {
- if( *l != *r )
- return false; // not same length
- unsigned sz = ((unsigned) *l) + 1;
- if( memcmp(l, r, sz) )
- return false;
- l += sz; r += sz;
- break;
- }
- case cbindata:
- {
- if( *l != *r )
- return false; // len or subtype mismatch
- int len = binDataCodeToLength(*l) + 1;
- if( memcmp(l, r, len) )
- return false;
- l += len; r += len;
- break;
- }
+ case cstring: {
+ if (*l != *r)
+ return false; // not same length
+ unsigned sz = ((unsigned)*l) + 1;
+ if (memcmp(l, r, sz))
+ return false;
+ l += sz;
+ r += sz;
+ break;
+ }
+ case cbindata: {
+ if (*l != *r)
+ return false; // len or subtype mismatch
+ int len = binDataCodeToLength(*l) + 1;
+ if (memcmp(l, r, len))
+ return false;
+ l += len;
+ r += len;
+ break;
+ }
case cminkey:
case cnull:
case cfalse:
@@ -683,23 +691,23 @@ namespace mongo {
break;
default:
verify(false);
- }
- if( (lval&cHASMORE) == 0 )
- break;
}
- return true;
+ if ((lval & cHASMORE) == 0)
+ break;
}
-
- struct CmpUnitTest : public StartupTest {
- void run() {
- char a[2];
- char b[2];
- a[0] = -3;
- a[1] = 0;
- b[0] = 3;
- b[1] = 0;
- verify( strcmp(a,b)>0 && memcmp(a,b,2)>0 );
- }
- } cunittest;
+ return true;
+}
+
+struct CmpUnitTest : public StartupTest {
+ void run() {
+ char a[2];
+ char b[2];
+ a[0] = -3;
+ a[1] = 0;
+ b[0] = 3;
+ b[1] = 0;
+ verify(strcmp(a, b) > 0 && memcmp(a, b, 2) > 0);
+ }
+} cunittest;
} // namespace mongo
diff --git a/src/mongo/db/storage/mmap_v1/btree/key.h b/src/mongo/db/storage/mmap_v1/btree/key.h
index 7f886552067..4787d83281a 100644
--- a/src/mongo/db/storage/mmap_v1/btree/key.h
+++ b/src/mongo/db/storage/mmap_v1/btree/key.h
@@ -35,97 +35,132 @@
namespace mongo {
- /** Key class for precomputing a small format index key that is denser than a traditional BSONObj.
+/** Key class for precomputing a small format index key that is denser than a traditional BSONObj.
- KeyBson is a legacy wrapper implementation for old BSONObj style keys for v:0 indexes.
+ KeyBson is a legacy wrapper implementation for old BSONObj style keys for v:0 indexes.
- KeyV1 is the new implementation.
+ KeyV1 is the new implementation.
+*/
+class KeyBson /* "KeyV0" */ {
+public:
+ KeyBson() {}
+ explicit KeyBson(const char* keyData) : _o(keyData) {}
+ explicit KeyBson(const BSONObj& obj) : _o(obj) {}
+ int woCompare(const KeyBson& r, const Ordering& o) const;
+ BSONObj toBson() const {
+ return _o;
+ }
+ std::string toString() const {
+ return _o.toString();
+ }
+ int dataSize() const {
+ return _o.objsize();
+ }
+ const char* data() const {
+ return _o.objdata();
+ }
+ BSONElement _firstElement() const {
+ return _o.firstElement();
+ }
+ bool isCompactFormat() const {
+ return false;
+ }
+ bool woEqual(const KeyBson& r) const;
+ void assign(const KeyBson& rhs) {
+ *this = rhs;
+ }
+ bool isValid() const {
+ return true;
+ }
+
+private:
+ BSONObj _o;
+};
+
+class KeyV1Owned;
+
+// corresponding to BtreeData_V1
+class KeyV1 {
+ void operator=(
+ const KeyV1&); // disallowed just to make people be careful as we don't own the buffer
+ KeyV1(
+ const KeyV1Owned&); // disallowed as this is not a great idea as KeyV1Owned likely will go out of scope
+public:
+ KeyV1() {
+ _keyData = 0;
+ }
+ ~KeyV1() {
+ DEV _keyData = (const unsigned char*)1;
+ }
+
+ KeyV1(const KeyV1& rhs) : _keyData(rhs._keyData) {
+ dassert(_keyData > (const unsigned char*)1);
+ }
+
+ // explicit version of operator= to be safe
+ void assign(const KeyV1& rhs) {
+ _keyData = rhs._keyData;
+ }
+
+ /** @param keyData can be a buffer containing data in either BSON format, OR in KeyV1 format.
+ when BSON, we are just a wrapper
*/
- class KeyBson /* "KeyV0" */ {
- public:
- KeyBson() { }
- explicit KeyBson(const char *keyData) : _o(keyData) { }
- explicit KeyBson(const BSONObj& obj) : _o(obj) { }
- int woCompare(const KeyBson& r, const Ordering &o) const;
- BSONObj toBson() const { return _o; }
- std::string toString() const { return _o.toString(); }
- int dataSize() const { return _o.objsize(); }
- const char * data() const { return _o.objdata(); }
- BSONElement _firstElement() const { return _o.firstElement(); }
- bool isCompactFormat() const { return false; }
- bool woEqual(const KeyBson& r) const;
- void assign(const KeyBson& rhs) { *this = rhs; }
- bool isValid() const { return true; }
- private:
- BSONObj _o;
- };
-
- class KeyV1Owned;
-
- // corresponding to BtreeData_V1
- class KeyV1 {
- void operator=(const KeyV1&); // disallowed just to make people be careful as we don't own the buffer
- KeyV1(const KeyV1Owned&); // disallowed as this is not a great idea as KeyV1Owned likely will go out of scope
- public:
- KeyV1() { _keyData = 0; }
- ~KeyV1() { DEV _keyData = (const unsigned char *) 1; }
-
- KeyV1(const KeyV1& rhs) : _keyData(rhs._keyData) {
- dassert( _keyData > (const unsigned char *) 1 );
- }
-
- // explicit version of operator= to be safe
- void assign(const KeyV1& rhs) {
- _keyData = rhs._keyData;
- }
-
- /** @param keyData can be a buffer containing data in either BSON format, OR in KeyV1 format.
- when BSON, we are just a wrapper
- */
- explicit KeyV1(const char *keyData) : _keyData((unsigned char *) keyData) { }
-
- int woCompare(const KeyV1& r, const Ordering &o) const;
- bool woEqual(const KeyV1& r) const;
- BSONObj toBson() const;
- std::string toString() const { return toBson().toString(); }
-
- /** get the key data we want to store in the btree bucket */
- const char * data() const { return (const char *) _keyData; }
-
- /** @return size of data() */
- int dataSize() const;
-
- /** only used by geo, which always has bson keys */
- BSONElement _firstElement() const { return bson().firstElement(); }
- bool isCompactFormat() const { return *_keyData != IsBSON; }
-
- bool isValid() const { return _keyData > (const unsigned char*)1; }
- protected:
- enum { IsBSON = 0xff };
- const unsigned char *_keyData;
- BSONObj bson() const {
- dassert( !isCompactFormat() );
- return BSONObj((const char *) _keyData+1);
- }
- private:
- int compareHybrid(const KeyV1& right, const Ordering& order) const;
- };
-
- class KeyV1Owned : public KeyV1 {
- void operator=(const KeyV1Owned&);
- public:
- /** @obj a BSON object to be translated to KeyV1 format. If the object isn't
- representable in KeyV1 format (which happens, intentionally, at times)
- it will stay as bson herein.
- */
- KeyV1Owned(const BSONObj& obj);
-
- /** makes a copy (memcpy's the whole thing) */
- KeyV1Owned(const KeyV1& rhs);
-
- private:
- StackBufBuilder b;
- void traditional(const BSONObj& obj); // store as traditional bson not as compact format
- };
+ explicit KeyV1(const char* keyData) : _keyData((unsigned char*)keyData) {}
+
+ int woCompare(const KeyV1& r, const Ordering& o) const;
+ bool woEqual(const KeyV1& r) const;
+ BSONObj toBson() const;
+ std::string toString() const {
+ return toBson().toString();
+ }
+
+ /** get the key data we want to store in the btree bucket */
+ const char* data() const {
+ return (const char*)_keyData;
+ }
+
+ /** @return size of data() */
+ int dataSize() const;
+
+ /** only used by geo, which always has bson keys */
+ BSONElement _firstElement() const {
+ return bson().firstElement();
+ }
+ bool isCompactFormat() const {
+ return *_keyData != IsBSON;
+ }
+
+ bool isValid() const {
+ return _keyData > (const unsigned char*)1;
+ }
+
+protected:
+ enum { IsBSON = 0xff };
+ const unsigned char* _keyData;
+ BSONObj bson() const {
+ dassert(!isCompactFormat());
+ return BSONObj((const char*)_keyData + 1);
+ }
+
+private:
+ int compareHybrid(const KeyV1& right, const Ordering& order) const;
+};
+
+class KeyV1Owned : public KeyV1 {
+ void operator=(const KeyV1Owned&);
+public:
+ /** @obj a BSON object to be translated to KeyV1 format. If the object isn't
+ representable in KeyV1 format (which happens, intentionally, at times)
+ it will stay as bson herein.
+ */
+ KeyV1Owned(const BSONObj& obj);
+
+ /** makes a copy (memcpy's the whole thing) */
+ KeyV1Owned(const KeyV1& rhs);
+
+private:
+ StackBufBuilder b;
+ void traditional(const BSONObj& obj); // store as traditional bson not as compact format
+};
};
diff --git a/src/mongo/db/storage/mmap_v1/catalog/hashtab.cpp b/src/mongo/db/storage/mmap_v1/catalog/hashtab.cpp
index 9c86a4fffba..df766917fac 100644
--- a/src/mongo/db/storage/mmap_v1/catalog/hashtab.cpp
+++ b/src/mongo/db/storage/mmap_v1/catalog/hashtab.cpp
@@ -36,52 +36,50 @@
namespace mongo {
- int NamespaceHashTable::_find(const Namespace& k, bool& found) const {
- found = false;
- int h = k.hash();
- int i = h % n;
- int start = i;
- int chain = 0;
- int firstNonUsed = -1;
- while ( 1 ) {
- if ( !_nodes(i).inUse() ) {
- if ( firstNonUsed < 0 )
- firstNonUsed = i;
- }
-
- if ( _nodes(i).hash == h && _nodes(i).key == k ) {
- if ( chain >= 200 )
- log() << "warning: hashtable " << _name << " long chain " << std::endl;
- found = true;
- return i;
- }
- chain++;
- i = (i+1) % n;
- if ( i == start ) {
- // shouldn't get here / defensive for infinite loops
- log() << "error: hashtable " << _name << " is full n:" << n << std::endl;
- return -1;
- }
- if( chain >= maxChain ) {
- if ( firstNonUsed >= 0 )
- return firstNonUsed;
- log() << "error: hashtable " << _name << " max chain reached:" << maxChain << std::endl;
- return -1;
- }
+int NamespaceHashTable::_find(const Namespace& k, bool& found) const {
+ found = false;
+ int h = k.hash();
+ int i = h % n;
+ int start = i;
+ int chain = 0;
+ int firstNonUsed = -1;
+ while (1) {
+ if (!_nodes(i).inUse()) {
+ if (firstNonUsed < 0)
+ firstNonUsed = i;
}
- }
- /* buf must be all zeroes on initialization. */
- NamespaceHashTable::NamespaceHashTable(void* buf, int buflen, const char* name)
- : _name(name),
- _buf(buf) {
-
- n = buflen / sizeof(Node);
- if ((n & 1) == 0) {
- n--;
+ if (_nodes(i).hash == h && _nodes(i).key == k) {
+ if (chain >= 200)
+ log() << "warning: hashtable " << _name << " long chain " << std::endl;
+ found = true;
+ return i;
+ }
+ chain++;
+ i = (i + 1) % n;
+ if (i == start) {
+ // shouldn't get here / defensive for infinite loops
+ log() << "error: hashtable " << _name << " is full n:" << n << std::endl;
+ return -1;
}
+ if (chain >= maxChain) {
+ if (firstNonUsed >= 0)
+ return firstNonUsed;
+ log() << "error: hashtable " << _name << " max chain reached:" << maxChain << std::endl;
+ return -1;
+ }
+ }
+}
- maxChain = (int)(n * 0.05);
+/* buf must be all zeroes on initialization. */
+NamespaceHashTable::NamespaceHashTable(void* buf, int buflen, const char* name)
+ : _name(name), _buf(buf) {
+ n = buflen / sizeof(Node);
+ if ((n & 1) == 0) {
+ n--;
}
+ maxChain = (int)(n * 0.05);
+}
+
} // namespace mongo
diff --git a/src/mongo/db/storage/mmap_v1/catalog/hashtab.h b/src/mongo/db/storage/mmap_v1/catalog/hashtab.h
index b4ab9d858fa..286de349138 100644
--- a/src/mongo/db/storage/mmap_v1/catalog/hashtab.h
+++ b/src/mongo/db/storage/mmap_v1/catalog/hashtab.h
@@ -35,105 +35,103 @@
namespace mongo {
- /**
- * Simple, fixed size hash table used for namespace mapping (effectively the contents of the
- * MMAP V1 .ns file). Uses a contiguous block of memory, so you can put it in a memory mapped
- * file very easily.
- */
- class NamespaceHashTable {
- MONGO_DISALLOW_COPYING(NamespaceHashTable);
- public:
-
- typedef stdx::function< void(const Namespace& k, NamespaceDetails& v) > IteratorCallback;
-
-
- /* buf must be all zeroes on initialization. */
- NamespaceHashTable(void* buf, int buflen, const char *name);
-
- NamespaceDetails* get(const Namespace& k) const {
- bool found;
- int i = _find(k, found);
- if (found) {
- return &_nodes(i).value;
- }
+/**
+ * Simple, fixed size hash table used for namespace mapping (effectively the contents of the
+ * MMAP V1 .ns file). Uses a contiguous block of memory, so you can put it in a memory mapped
+ * file very easily.
+ */
+class NamespaceHashTable {
+ MONGO_DISALLOW_COPYING(NamespaceHashTable);
- return 0;
- }
+public:
+ typedef stdx::function<void(const Namespace& k, NamespaceDetails& v)> IteratorCallback;
- void kill(OperationContext* txn, const Namespace& k) {
- bool found;
- int i = _find(k, found);
- if ( i >= 0 && found ) {
- Node* n = &_nodes(i);
- n = txn->recoveryUnit()->writing(n);
- n->key.kill();
- n->setUnused();
- }
- }
- /** returns false if too full */
- bool put(OperationContext* txn, const Namespace& k, const NamespaceDetails& value) {
- bool found;
- int i = _find(k, found);
- if (i < 0)
- return false;
-
- Node* n = txn->recoveryUnit()->writing(&_nodes(i));
- if (!found) {
- n->key = k;
- n->hash = k.hash();
- }
- else {
- invariant(n->hash == k.hash());
- }
+ /* buf must be all zeroes on initialization. */
+ NamespaceHashTable(void* buf, int buflen, const char* name);
- n->value = value;
- return true;
+ NamespaceDetails* get(const Namespace& k) const {
+ bool found;
+ int i = _find(k, found);
+ if (found) {
+ return &_nodes(i).value;
}
- void iterAll(IteratorCallback callback) {
- for (int i = 0; i < n; i++) {
- if (_nodes(i).inUse()) {
- callback(_nodes(i).key, _nodes(i).value);
- }
- }
+ return 0;
+ }
+
+ void kill(OperationContext* txn, const Namespace& k) {
+ bool found;
+ int i = _find(k, found);
+ if (i >= 0 && found) {
+ Node* n = &_nodes(i);
+ n = txn->recoveryUnit()->writing(n);
+ n->key.kill();
+ n->setUnused();
+ }
+ }
+
+ /** returns false if too full */
+ bool put(OperationContext* txn, const Namespace& k, const NamespaceDetails& value) {
+ bool found;
+ int i = _find(k, found);
+ if (i < 0)
+ return false;
+
+ Node* n = txn->recoveryUnit()->writing(&_nodes(i));
+ if (!found) {
+ n->key = k;
+ n->hash = k.hash();
+ } else {
+ invariant(n->hash == k.hash());
}
+ n->value = value;
+ return true;
+ }
+
+ void iterAll(IteratorCallback callback) {
+ for (int i = 0; i < n; i++) {
+ if (_nodes(i).inUse()) {
+ callback(_nodes(i).key, _nodes(i).value);
+ }
+ }
+ }
- private:
+private:
#pragma pack(1)
- struct Node {
- int hash;
- Namespace key;
- NamespaceDetails value;
+ struct Node {
+ int hash;
+ Namespace key;
+ NamespaceDetails value;
- bool inUse() const {
- return hash != 0;
- }
+ bool inUse() const {
+ return hash != 0;
+ }
- void setUnused() {
- hash = 0;
- }
- };
+ void setUnused() {
+ hash = 0;
+ }
+ };
#pragma pack()
- BOOST_STATIC_ASSERT(sizeof(Node) == 628);
+ BOOST_STATIC_ASSERT(sizeof(Node) == 628);
- int _find(const Namespace& k, bool& found) const;
+ int _find(const Namespace& k, bool& found) const;
- Node& _nodes(int i) const {
- Node *nodes = (Node *)_buf;
- return nodes[i];
- }
+ Node& _nodes(int i) const {
+ Node* nodes = (Node*)_buf;
+ return nodes[i];
+ }
- const char* _name;
- void* const _buf;
+ const char* _name;
+ void* const _buf;
- int n; // number of hashtable buckets
- int maxChain;
- };
+ int n; // number of hashtable buckets
+ int maxChain;
+};
-} // namespace mongo
+} // namespace mongo
diff --git a/src/mongo/db/storage/mmap_v1/catalog/index_details.cpp b/src/mongo/db/storage/mmap_v1/catalog/index_details.cpp
index bc9cc3ee791..fa9093196f8 100644
--- a/src/mongo/db/storage/mmap_v1/catalog/index_details.cpp
+++ b/src/mongo/db/storage/mmap_v1/catalog/index_details.cpp
@@ -32,9 +32,8 @@
namespace mongo {
- void IndexDetails::_reset() {
- head.setInvalid();
- info.setInvalid();
- }
-
+void IndexDetails::_reset() {
+ head.setInvalid();
+ info.setInvalid();
+}
}
diff --git a/src/mongo/db/storage/mmap_v1/catalog/index_details.h b/src/mongo/db/storage/mmap_v1/catalog/index_details.h
index 8b343d2ee66..1ee5387c57c 100644
--- a/src/mongo/db/storage/mmap_v1/catalog/index_details.h
+++ b/src/mongo/db/storage/mmap_v1/catalog/index_details.h
@@ -34,38 +34,37 @@
namespace mongo {
- /* Details about a particular index. There is one of these effectively for each object in
- system.namespaces (although this also includes the head pointer, which is not in that
- collection).
+/* Details about a particular index. There is one of these effectively for each object in
+ system.namespaces (although this also includes the head pointer, which is not in that
+ collection).
- This is an internal part of the catalog. Nothing outside of the catalog should use this.
+ This is an internal part of the catalog. Nothing outside of the catalog should use this.
- ** MemoryMapped in NamespaceDetails ** (i.e., this is on disk data)
- */
+ ** MemoryMapped in NamespaceDetails ** (i.e., this is on disk data)
+ */
#pragma pack(1)
- struct IndexDetails {
- /**
- * btree head disk location
- */
- DiskLoc head;
-
- /* Location of index info object. Format:
+struct IndexDetails {
+ /**
+ * btree head disk location
+ */
+ DiskLoc head;
- { name:"nameofindex", ns:"parentnsname", key: {keypattobject}
- [, unique: <bool>, background: <bool>, v:<version>]
- }
+ /* Location of index info object. Format:
- This object is in the system.indexes collection. Note that since we
- have a pointer to the object here, the object in system.indexes MUST NEVER MOVE.
- */
- DiskLoc info;
+ { name:"nameofindex", ns:"parentnsname", key: {keypattobject}
+ [, unique: <bool>, background: <bool>, v:<version>]
+ }
- /**
- * makes head and info invalid
- */
- void _reset();
+ This object is in the system.indexes collection. Note that since we
+ have a pointer to the object here, the object in system.indexes MUST NEVER MOVE.
+ */
+ DiskLoc info;
- };
+ /**
+ * makes head and info invalid
+ */
+ void _reset();
+};
#pragma pack()
-} // namespace mongo
+} // namespace mongo
diff --git a/src/mongo/db/storage/mmap_v1/catalog/namespace-inl.h b/src/mongo/db/storage/mmap_v1/catalog/namespace-inl.h
index 318106dc5a7..6ed1bd661ca 100644
--- a/src/mongo/db/storage/mmap_v1/catalog/namespace-inl.h
+++ b/src/mongo/db/storage/mmap_v1/catalog/namespace-inl.h
@@ -36,43 +36,44 @@
namespace mongo {
- inline Namespace& Namespace::operator=(StringData ns) {
- // we fill the remaining space with all zeroes here. as the full Namespace struct is in
- // the datafiles (the .ns files specifically), that is helpful as then they are deterministic
- // in the bytes they have for a given sequence of operations. that makes testing and debugging
- // the data files easier.
- //
- // if profiling indicates this method is a significant bottleneck, we could have a version we
- // use for reads which does not fill with zeroes, and keep the zeroing behavior on writes.
- //
- memset( buf, 0, sizeof(buf) );
- uassert( 10080 , "ns name too long, max size is 127 bytes", ns.size() <= MaxNsLen);
- uassert( 17380 , "ns name can't contain embedded '\0' byte", ns.find('\0') == std::string::npos);
- ns.copyTo( buf, true );
- return *this;
- }
+inline Namespace& Namespace::operator=(StringData ns) {
+ // we fill the remaining space with all zeroes here. as the full Namespace struct is in
+ // the datafiles (the .ns files specifically), that is helpful as then they are deterministic
+ // in the bytes they have for a given sequence of operations. that makes testing and debugging
+ // the data files easier.
+ //
+ // if profiling indicates this method is a significant bottleneck, we could have a version we
+ // use for reads which does not fill with zeroes, and keep the zeroing behavior on writes.
+ //
+ memset(buf, 0, sizeof(buf));
+ uassert(10080, "ns name too long, max size is 127 bytes", ns.size() <= MaxNsLen);
+ uassert(17380, "ns name can't contain embedded '\0' byte", ns.find('\0') == std::string::npos);
+ ns.copyTo(buf, true);
+ return *this;
+}
- inline std::string Namespace::extraName(int i) const {
- char ex[] = "$extra";
- ex[5] += i;
- std::string s = std::string(buf) + ex;
- massert( 10348 , "$extra: ns name too long", s.size() <= MaxNsLen);
- return s;
- }
+inline std::string Namespace::extraName(int i) const {
+ char ex[] = "$extra";
+ ex[5] += i;
+ std::string s = std::string(buf) + ex;
+ massert(10348, "$extra: ns name too long", s.size() <= MaxNsLen);
+ return s;
+}
- inline bool Namespace::isExtra() const {
- const char *p = strstr(buf, "$extr");
- return p && p[5] && p[6] == 0; //==0 important in case an index uses name "$extra_1" for example
- }
+inline bool Namespace::isExtra() const {
+ const char* p = strstr(buf, "$extr");
+ return p && p[5] &&
+ p[6] == 0; //==0 important in case an index uses name "$extra_1" for example
+}
- inline int Namespace::hash() const {
- unsigned x = 0;
- const char *p = buf;
- while ( *p ) {
- x = x * 131 + *p;
- p++;
- }
- return (x & 0x7fffffff) | 0x8000000; // must be > 0
+inline int Namespace::hash() const {
+ unsigned x = 0;
+ const char* p = buf;
+ while (*p) {
+ x = x * 131 + *p;
+ p++;
}
+ return (x & 0x7fffffff) | 0x8000000; // must be > 0
+}
} // namespace mongo
diff --git a/src/mongo/db/storage/mmap_v1/catalog/namespace.cpp b/src/mongo/db/storage/mmap_v1/catalog/namespace.cpp
index 374761fe386..c9dec65d520 100644
--- a/src/mongo/db/storage/mmap_v1/catalog/namespace.cpp
+++ b/src/mongo/db/storage/mmap_v1/catalog/namespace.cpp
@@ -37,13 +37,12 @@
#include "mongo/db/namespace_string.h"
namespace mongo {
- namespace {
- BOOST_STATIC_ASSERT( sizeof(Namespace) == 128 );
- BOOST_STATIC_ASSERT( Namespace::MaxNsLenWithNUL == MaxDatabaseNameLen );
- BOOST_STATIC_ASSERT((int)Namespace::MaxNsLenWithNUL == (int)NamespaceString::MaxNsLenWithNUL);
- BOOST_STATIC_ASSERT((int)Namespace::MaxNsLen == (int)NamespaceString::MaxNsLen);
- // Note the typo.
- BOOST_STATIC_ASSERT((int)Namespace::MaxNsColletionLen == (int)NamespaceString::MaxNsCollectionLen);
- }
+namespace {
+BOOST_STATIC_ASSERT(sizeof(Namespace) == 128);
+BOOST_STATIC_ASSERT(Namespace::MaxNsLenWithNUL == MaxDatabaseNameLen);
+BOOST_STATIC_ASSERT((int)Namespace::MaxNsLenWithNUL == (int)NamespaceString::MaxNsLenWithNUL);
+BOOST_STATIC_ASSERT((int)Namespace::MaxNsLen == (int)NamespaceString::MaxNsLen);
+// Note the typo.
+BOOST_STATIC_ASSERT((int)Namespace::MaxNsColletionLen == (int)NamespaceString::MaxNsCollectionLen);
+}
}
-
diff --git a/src/mongo/db/storage/mmap_v1/catalog/namespace.h b/src/mongo/db/storage/mmap_v1/catalog/namespace.h
index 556e7adf889..f93112de47f 100644
--- a/src/mongo/db/storage/mmap_v1/catalog/namespace.h
+++ b/src/mongo/db/storage/mmap_v1/catalog/namespace.h
@@ -38,55 +38,77 @@
namespace mongo {
#pragma pack(1)
- /**
- * This is used for storing a namespace on disk in a fixed witdh form
- * it should only be used for that, not for passing internally
- * for that, please use NamespaceString
- */
- class Namespace {
- public:
- Namespace(StringData ns) { *this = ns; }
- Namespace& operator=(StringData ns);
-
- void kill() { buf[0] = 0x7f; }
-
- bool operator==(const char *r) const { return strcmp(buf, r) == 0; }
- bool operator==(const Namespace& r) const { return strcmp(buf, r.buf) == 0; }
- bool operator!=(const char *r) const { return strcmp(buf, r) != 0; }
- bool operator!=(const Namespace& r) const { return strcmp(buf, r.buf) != 0; }
-
- bool hasDollarSign() const { return strchr( buf , '$' ) != NULL; }
-
- int hash() const; // value returned is always > 0
-
- size_t size() const { return strlen( buf ); }
-
- std::string toString() const { return buf; }
- operator std::string() const { return buf; }
-
- /* NamespaceDetails::Extra was added after fact to allow chaining of data blocks to support more than 10 indexes
- (more than 10 IndexDetails). It's a bit hacky because of this late addition with backward
- file support. */
- std::string extraName(int i) const;
- bool isExtra() const; /* ends with $extr... -- when true an extra block not a normal NamespaceDetails block */
-
- enum MaxNsLenValue {
- // Maximum possible length of name any namespace, including special ones like $extra.
- // This includes rum for the NUL byte so it can be used when sizing buffers.
- MaxNsLenWithNUL = 128,
-
- // MaxNsLenWithNUL excluding the NUL byte. Use this when comparing std::string lengths.
- MaxNsLen = MaxNsLenWithNUL - 1,
-
- // Maximum allowed length of fully qualified namespace name of any real collection.
- // Does not include NUL so it can be directly compared to std::string lengths.
- MaxNsColletionLen = MaxNsLen - 7/*strlen(".$extra")*/,
- };
- private:
- char buf[MaxNsLenWithNUL];
+/**
+ * This is used for storing a namespace on disk in a fixed witdh form
+ * it should only be used for that, not for passing internally
+ * for that, please use NamespaceString
+ */
+class Namespace {
+public:
+ Namespace(StringData ns) {
+ *this = ns;
+ }
+ Namespace& operator=(StringData ns);
+
+ void kill() {
+ buf[0] = 0x7f;
+ }
+
+ bool operator==(const char* r) const {
+ return strcmp(buf, r) == 0;
+ }
+ bool operator==(const Namespace& r) const {
+ return strcmp(buf, r.buf) == 0;
+ }
+ bool operator!=(const char* r) const {
+ return strcmp(buf, r) != 0;
+ }
+ bool operator!=(const Namespace& r) const {
+ return strcmp(buf, r.buf) != 0;
+ }
+
+ bool hasDollarSign() const {
+ return strchr(buf, '$') != NULL;
+ }
+
+ int hash() const; // value returned is always > 0
+
+ size_t size() const {
+ return strlen(buf);
+ }
+
+ std::string toString() const {
+ return buf;
+ }
+ operator std::string() const {
+ return buf;
+ }
+
+ /* NamespaceDetails::Extra was added after fact to allow chaining of data blocks to support more than 10 indexes
+ (more than 10 IndexDetails). It's a bit hacky because of this late addition with backward
+ file support. */
+ std::string extraName(int i) const;
+ bool isExtra()
+ const; /* ends with $extr... -- when true an extra block not a normal NamespaceDetails block */
+
+ enum MaxNsLenValue {
+ // Maximum possible length of name any namespace, including special ones like $extra.
+ // This includes rum for the NUL byte so it can be used when sizing buffers.
+ MaxNsLenWithNUL = 128,
+
+ // MaxNsLenWithNUL excluding the NUL byte. Use this when comparing std::string lengths.
+ MaxNsLen = MaxNsLenWithNUL - 1,
+
+ // Maximum allowed length of fully qualified namespace name of any real collection.
+ // Does not include NUL so it can be directly compared to std::string lengths.
+ MaxNsColletionLen = MaxNsLen - 7 /*strlen(".$extra")*/,
};
+
+private:
+ char buf[MaxNsLenWithNUL];
+};
#pragma pack()
-} // namespace mongo
+} // namespace mongo
#include "mongo/db/storage/mmap_v1/catalog/namespace-inl.h"
diff --git a/src/mongo/db/storage/mmap_v1/catalog/namespace_details.cpp b/src/mongo/db/storage/mmap_v1/catalog/namespace_details.cpp
index 38fa8a7ae00..538a4500906 100644
--- a/src/mongo/db/storage/mmap_v1/catalog/namespace_details.cpp
+++ b/src/mongo/db/storage/mmap_v1/catalog/namespace_details.cpp
@@ -51,195 +51,193 @@
namespace mongo {
- NamespaceDetails::NamespaceDetails( const DiskLoc &loc, bool capped ) {
- BOOST_STATIC_ASSERT( sizeof(NamespaceDetails::Extra) <= sizeof(NamespaceDetails) );
-
- /* be sure to initialize new fields here -- doesn't default to zeroes the way we use it */
- firstExtent = lastExtent = capExtent = loc;
- stats.datasize = stats.nrecords = 0;
- lastExtentSize = 0;
- nIndexes = 0;
- isCapped = capped;
- maxDocsInCapped = 0x7fffffff; // no limit (value is for pre-v2.3.2 compatibility)
- paddingFactorOldDoNotUse = 1.0;
- systemFlagsOldDoNotUse = 0;
- userFlags = 0;
- capFirstNewRecord = DiskLoc();
- // Signal that we are on first allocation iteration through extents.
- capFirstNewRecord.setInvalid();
- // For capped case, signal that we are doing initial extent allocation.
- if ( capped ) {
- // WAS: cappedLastDelRecLastExtent().setInvalid();
- deletedListSmall[1].setInvalid();
- }
- verify( sizeof(_dataFileVersion) == 2 );
- _dataFileVersion = 0;
- _indexFileVersion = 0;
- multiKeyIndexBits = 0;
- _reservedA = 0;
- _extraOffset = 0;
- indexBuildsInProgress = 0;
- memset(_reserved, 0, sizeof(_reserved));
+NamespaceDetails::NamespaceDetails(const DiskLoc& loc, bool capped) {
+ BOOST_STATIC_ASSERT(sizeof(NamespaceDetails::Extra) <= sizeof(NamespaceDetails));
+
+ /* be sure to initialize new fields here -- doesn't default to zeroes the way we use it */
+ firstExtent = lastExtent = capExtent = loc;
+ stats.datasize = stats.nrecords = 0;
+ lastExtentSize = 0;
+ nIndexes = 0;
+ isCapped = capped;
+ maxDocsInCapped = 0x7fffffff; // no limit (value is for pre-v2.3.2 compatibility)
+ paddingFactorOldDoNotUse = 1.0;
+ systemFlagsOldDoNotUse = 0;
+ userFlags = 0;
+ capFirstNewRecord = DiskLoc();
+ // Signal that we are on first allocation iteration through extents.
+ capFirstNewRecord.setInvalid();
+ // For capped case, signal that we are doing initial extent allocation.
+ if (capped) {
+ // WAS: cappedLastDelRecLastExtent().setInvalid();
+ deletedListSmall[1].setInvalid();
}
-
- NamespaceDetails::Extra* NamespaceDetails::allocExtra( OperationContext* txn,
- StringData ns,
- NamespaceIndex& ni,
- int nindexessofar) {
-
- // Namespace details must always be changed under an exclusive DB lock
- const NamespaceString nss(ns);
- invariant(txn->lockState()->isDbLockedForMode(nss.db(), MODE_X));
-
- int i = (nindexessofar - NIndexesBase) / NIndexesExtra;
- verify( i >= 0 && i <= 1 );
-
- Namespace fullns( ns );
- Namespace extrans( fullns.extraName(i) ); // throws UserException if ns name too long
-
- massert( 10350, "allocExtra: base ns missing?", this );
- massert( 10351, "allocExtra: extra already exists", ni.details(extrans) == 0 );
-
- Extra temp;
- temp.init();
-
- ni.add_ns( txn, extrans, reinterpret_cast<NamespaceDetails*>( &temp ) );
- Extra* e = reinterpret_cast<NamespaceDetails::Extra*>( ni.details( extrans ) );
-
- long ofs = e->ofsFrom(this);
- if( i == 0 ) {
- verify( _extraOffset == 0 );
- *txn->recoveryUnit()->writing(&_extraOffset) = ofs;
- verify( extra() == e );
- }
- else {
- Extra *hd = extra();
- verify( hd->next(this) == 0 );
- hd->setNext(txn, ofs);
- }
- return e;
+ verify(sizeof(_dataFileVersion) == 2);
+ _dataFileVersion = 0;
+ _indexFileVersion = 0;
+ multiKeyIndexBits = 0;
+ _reservedA = 0;
+ _extraOffset = 0;
+ indexBuildsInProgress = 0;
+ memset(_reserved, 0, sizeof(_reserved));
+}
+
+NamespaceDetails::Extra* NamespaceDetails::allocExtra(OperationContext* txn,
+ StringData ns,
+ NamespaceIndex& ni,
+ int nindexessofar) {
+ // Namespace details must always be changed under an exclusive DB lock
+ const NamespaceString nss(ns);
+ invariant(txn->lockState()->isDbLockedForMode(nss.db(), MODE_X));
+
+ int i = (nindexessofar - NIndexesBase) / NIndexesExtra;
+ verify(i >= 0 && i <= 1);
+
+ Namespace fullns(ns);
+ Namespace extrans(fullns.extraName(i)); // throws UserException if ns name too long
+
+ massert(10350, "allocExtra: base ns missing?", this);
+ massert(10351, "allocExtra: extra already exists", ni.details(extrans) == 0);
+
+ Extra temp;
+ temp.init();
+
+ ni.add_ns(txn, extrans, reinterpret_cast<NamespaceDetails*>(&temp));
+ Extra* e = reinterpret_cast<NamespaceDetails::Extra*>(ni.details(extrans));
+
+ long ofs = e->ofsFrom(this);
+ if (i == 0) {
+ verify(_extraOffset == 0);
+ *txn->recoveryUnit()->writing(&_extraOffset) = ofs;
+ verify(extra() == e);
+ } else {
+ Extra* hd = extra();
+ verify(hd->next(this) == 0);
+ hd->setNext(txn, ofs);
}
+ return e;
+}
- IndexDetails& NamespaceDetails::idx(int idxNo, bool missingExpected) {
- if( idxNo < NIndexesBase ) {
- IndexDetails& id = _indexes[idxNo];
- return id;
- }
- Extra *e = extra();
- if ( ! e ) {
- if ( missingExpected )
- throw MsgAssertionException( 13283 , "Missing Extra" );
- massert(14045, "missing Extra", e);
- }
- int i = idxNo - NIndexesBase;
- if( i >= NIndexesExtra ) {
- e = e->next(this);
- if ( ! e ) {
- if ( missingExpected )
- throw MsgAssertionException( 14823 , "missing extra" );
- massert(14824, "missing Extra", e);
- }
- i -= NIndexesExtra;
- }
- return e->details[i];
+IndexDetails& NamespaceDetails::idx(int idxNo, bool missingExpected) {
+ if (idxNo < NIndexesBase) {
+ IndexDetails& id = _indexes[idxNo];
+ return id;
}
-
-
- const IndexDetails& NamespaceDetails::idx(int idxNo, bool missingExpected) const {
- if( idxNo < NIndexesBase ) {
- const IndexDetails& id = _indexes[idxNo];
- return id;
- }
- const Extra *e = extra();
- if ( ! e ) {
- if ( missingExpected )
- throw MsgAssertionException( 17421 , "Missing Extra" );
- massert(17422, "missing Extra", e);
- }
- int i = idxNo - NIndexesBase;
- if( i >= NIndexesExtra ) {
- e = e->next(this);
- if ( ! e ) {
- if ( missingExpected )
- throw MsgAssertionException( 17423 , "missing extra" );
- massert(17424, "missing Extra", e);
- }
- i -= NIndexesExtra;
+ Extra* e = extra();
+ if (!e) {
+ if (missingExpected)
+ throw MsgAssertionException(13283, "Missing Extra");
+ massert(14045, "missing Extra", e);
+ }
+ int i = idxNo - NIndexesBase;
+ if (i >= NIndexesExtra) {
+ e = e->next(this);
+ if (!e) {
+ if (missingExpected)
+ throw MsgAssertionException(14823, "missing extra");
+ massert(14824, "missing Extra", e);
}
- return e->details[i];
+ i -= NIndexesExtra;
}
+ return e->details[i];
+}
- NamespaceDetails::IndexIterator::IndexIterator(const NamespaceDetails *_d,
- bool includeBackgroundInProgress) {
- d = _d;
- i = 0;
- n = d->nIndexes;
- if ( includeBackgroundInProgress )
- n += d->indexBuildsInProgress;
- }
- // must be called when renaming a NS to fix up extra
- void NamespaceDetails::copyingFrom( OperationContext* txn,
- StringData thisns,
- NamespaceIndex& ni,
- NamespaceDetails* src) {
- _extraOffset = 0; // we are a copy -- the old value is wrong. fixing it up below.
- Extra *se = src->extra();
- int n = NIndexesBase;
- if( se ) {
- Extra *e = allocExtra(txn, thisns, ni, n);
- while( 1 ) {
- n += NIndexesExtra;
- e->copy(this, *se);
- se = se->next(src);
- if( se == 0 ) break;
- Extra *nxt = allocExtra(txn, thisns, ni, n);
- e->setNext( txn, nxt->ofsFrom(this) );
- e = nxt;
- }
- verify( _extraOffset );
- }
+const IndexDetails& NamespaceDetails::idx(int idxNo, bool missingExpected) const {
+ if (idxNo < NIndexesBase) {
+ const IndexDetails& id = _indexes[idxNo];
+ return id;
}
-
- NamespaceDetails* NamespaceDetails::writingWithoutExtra( OperationContext* txn ) {
- return txn->recoveryUnit()->writing( this );
+ const Extra* e = extra();
+ if (!e) {
+ if (missingExpected)
+ throw MsgAssertionException(17421, "Missing Extra");
+ massert(17422, "missing Extra", e);
}
-
-
- // XXX - this method should go away
- NamespaceDetails *NamespaceDetails::writingWithExtra( OperationContext* txn ) {
- for( Extra *e = extra(); e; e = e->next( this ) ) {
- txn->recoveryUnit()->writing( e );
+ int i = idxNo - NIndexesBase;
+ if (i >= NIndexesExtra) {
+ e = e->next(this);
+ if (!e) {
+ if (missingExpected)
+ throw MsgAssertionException(17423, "missing extra");
+ massert(17424, "missing Extra", e);
}
- return writingWithoutExtra( txn );
+ i -= NIndexesExtra;
}
-
- void NamespaceDetails::setMaxCappedDocs( OperationContext* txn, long long max ) {
- massert( 16499,
- "max in a capped collection has to be < 2^31 or -1",
- CollectionOptions::validMaxCappedDocs( &max ) );
- maxDocsInCapped = max;
+ return e->details[i];
+}
+
+NamespaceDetails::IndexIterator::IndexIterator(const NamespaceDetails* _d,
+ bool includeBackgroundInProgress) {
+ d = _d;
+ i = 0;
+ n = d->nIndexes;
+ if (includeBackgroundInProgress)
+ n += d->indexBuildsInProgress;
+}
+
+// must be called when renaming a NS to fix up extra
+void NamespaceDetails::copyingFrom(OperationContext* txn,
+ StringData thisns,
+ NamespaceIndex& ni,
+ NamespaceDetails* src) {
+ _extraOffset = 0; // we are a copy -- the old value is wrong. fixing it up below.
+ Extra* se = src->extra();
+ int n = NIndexesBase;
+ if (se) {
+ Extra* e = allocExtra(txn, thisns, ni, n);
+ while (1) {
+ n += NIndexesExtra;
+ e->copy(this, *se);
+ se = se->next(src);
+ if (se == 0)
+ break;
+ Extra* nxt = allocExtra(txn, thisns, ni, n);
+ e->setNext(txn, nxt->ofsFrom(this));
+ e = nxt;
+ }
+ verify(_extraOffset);
}
+}
- /* ------------------------------------------------------------------------- */
+NamespaceDetails* NamespaceDetails::writingWithoutExtra(OperationContext* txn) {
+ return txn->recoveryUnit()->writing(this);
+}
- int NamespaceDetails::_catalogFindIndexByName(OperationContext* txn,
- const Collection* coll,
- StringData name,
- bool includeBackgroundInProgress) const {
- IndexIterator i = ii(includeBackgroundInProgress);
- while( i.more() ) {
- const BSONObj obj = coll->docFor(txn, i.next().info.toRecordId()).value();
- if ( name == obj.getStringField("name") )
- return i.pos()-1;
- }
- return -1;
+// XXX - this method should go away
+NamespaceDetails* NamespaceDetails::writingWithExtra(OperationContext* txn) {
+ for (Extra* e = extra(); e; e = e->next(this)) {
+ txn->recoveryUnit()->writing(e);
}
-
- void NamespaceDetails::Extra::setNext( OperationContext* txn,
- long ofs ) {
- *txn->recoveryUnit()->writing(&_next) = ofs;
+ return writingWithoutExtra(txn);
+}
+
+void NamespaceDetails::setMaxCappedDocs(OperationContext* txn, long long max) {
+ massert(16499,
+ "max in a capped collection has to be < 2^31 or -1",
+ CollectionOptions::validMaxCappedDocs(&max));
+ maxDocsInCapped = max;
+}
+
+/* ------------------------------------------------------------------------- */
+
+
+int NamespaceDetails::_catalogFindIndexByName(OperationContext* txn,
+ const Collection* coll,
+ StringData name,
+ bool includeBackgroundInProgress) const {
+ IndexIterator i = ii(includeBackgroundInProgress);
+ while (i.more()) {
+ const BSONObj obj = coll->docFor(txn, i.next().info.toRecordId()).value();
+ if (name == obj.getStringField("name"))
+ return i.pos() - 1;
}
+ return -1;
+}
+
+void NamespaceDetails::Extra::setNext(OperationContext* txn, long ofs) {
+ *txn->recoveryUnit()->writing(&_next) = ofs;
+}
-} // namespace mongo
+} // namespace mongo
diff --git a/src/mongo/db/storage/mmap_v1/catalog/namespace_details.h b/src/mongo/db/storage/mmap_v1/catalog/namespace_details.h
index 9011d6d27f3..5002bf267c7 100644
--- a/src/mongo/db/storage/mmap_v1/catalog/namespace_details.h
+++ b/src/mongo/db/storage/mmap_v1/catalog/namespace_details.h
@@ -35,200 +35,216 @@
namespace mongo {
- class Collection;
- class NamespaceIndex;
- class OperationContext;
+class Collection;
+class NamespaceIndex;
+class OperationContext;
#pragma pack(1)
- /* NamespaceDetails : this is the "header" for a collection that has all its details.
- It's in the .ns file and this is a memory mapped region (thus the pack pragma above).
+/* NamespaceDetails : this is the "header" for a collection that has all its details.
+ It's in the .ns file and this is a memory mapped region (thus the pack pragma above).
+*/
+class NamespaceDetails {
+public:
+ enum { NIndexesMax = 64, NIndexesExtra = 30, NIndexesBase = 10 };
+
+ // deleted lists -- linked lists of deleted records -- are placed in 'buckets' of various
+ // sizes so you can look for a deleted record of about the right size. These buckets are
+ // split into small and large groups for compatibility with old versions.
+ static const int SmallBuckets = 18;
+ static const int LargeBuckets = 8;
+
+
+ /*-------- data fields, as present on disk : */
+
+ DiskLoc firstExtent;
+ DiskLoc lastExtent;
+
+ /* NOTE: capped collections v1 override the meaning of deletedList.
+ deletedList[0] points to a list of free records (DeletedRecord's) for all extents in
+ the capped namespace.
+ deletedList[1] points to the last record in the prev extent. When the "current extent"
+ changes, this value is updated. !deletedList[1].isValid() when this value is not
+ yet computed.
*/
- class NamespaceDetails {
- public:
- enum { NIndexesMax = 64, NIndexesExtra = 30, NIndexesBase = 10 };
+ DiskLoc deletedListSmall[SmallBuckets];
+ DiskLoc deletedListLegacyGrabBag; // old implementations put records of multiple sizes here.
- // deleted lists -- linked lists of deleted records -- are placed in 'buckets' of various
- // sizes so you can look for a deleted record of about the right size. These buckets are
- // split into small and large groups for compatibility with old versions.
- static const int SmallBuckets = 18;
- static const int LargeBuckets = 8;
+ // ofs 168 (8 byte aligned)
+ struct Stats {
+ // datasize and nrecords MUST Be adjacent code assumes!
+ long long datasize; // this includes padding, but not record headers
+ long long nrecords;
+ } stats;
- /*-------- data fields, as present on disk : */
+ int lastExtentSize;
- DiskLoc firstExtent;
- DiskLoc lastExtent;
+ int nIndexes;
- /* NOTE: capped collections v1 override the meaning of deletedList.
- deletedList[0] points to a list of free records (DeletedRecord's) for all extents in
- the capped namespace.
- deletedList[1] points to the last record in the prev extent. When the "current extent"
- changes, this value is updated. !deletedList[1].isValid() when this value is not
- yet computed.
- */
- DiskLoc deletedListSmall[SmallBuckets];
- DiskLoc deletedListLegacyGrabBag; // old implementations put records of multiple sizes here.
+ // ofs 192
+ IndexDetails _indexes[NIndexesBase];
- // ofs 168 (8 byte aligned)
- struct Stats {
- // datasize and nrecords MUST Be adjacent code assumes!
- long long datasize; // this includes padding, but not record headers
- long long nrecords;
- } stats;
+public:
+ // ofs 352 (16 byte aligned)
+ int isCapped; // there is wasted space here if I'm right (ERH)
+ int maxDocsInCapped; // max # of objects for a capped table, -1 for inf.
- int lastExtentSize;
+ double paddingFactorOldDoNotUse;
+ // ofs 368 (16)
+ int systemFlagsOldDoNotUse; // things that the system sets/cares about
- int nIndexes;
+ DiskLoc capExtent; // the "current" extent we're writing too for a capped collection
+ DiskLoc capFirstNewRecord;
- // ofs 192
- IndexDetails _indexes[NIndexesBase];
+ unsigned short
+ _dataFileVersion; // NamespaceDetails version. So we can do backward compatibility in the future. See filever.h
+ unsigned short _indexFileVersion;
- public:
- // ofs 352 (16 byte aligned)
- int isCapped; // there is wasted space here if I'm right (ERH)
+ unsigned long long multiKeyIndexBits;
- int maxDocsInCapped; // max # of objects for a capped table, -1 for inf.
+ // ofs 400 (16)
+ unsigned long long _reservedA;
+ long long _extraOffset; // where the $extra info is located (bytes relative to this)
- double paddingFactorOldDoNotUse;
- // ofs 368 (16)
- int systemFlagsOldDoNotUse; // things that the system sets/cares about
+public:
+ int indexBuildsInProgress; // Number of indexes currently being built
- DiskLoc capExtent; // the "current" extent we're writing too for a capped collection
- DiskLoc capFirstNewRecord;
+ int userFlags;
- unsigned short _dataFileVersion; // NamespaceDetails version. So we can do backward compatibility in the future. See filever.h
- unsigned short _indexFileVersion;
+ DiskLoc deletedListLarge[LargeBuckets];
- unsigned long long multiKeyIndexBits;
+ // Think carefully before using this. We need at least 8 bytes reserved to leave room for a
+ // DiskLoc pointing to more data (eg in a dummy MmapV1RecordHeader or Extent). There is still _reservedA
+ // above, but these are the final two reserved 8-byte regions.
+ char _reserved[8];
+ /*-------- end data 496 bytes */
+public:
+ explicit NamespaceDetails(const DiskLoc& loc, bool _capped);
- // ofs 400 (16)
- unsigned long long _reservedA;
- long long _extraOffset; // where the $extra info is located (bytes relative to this)
+ class Extra {
+ long long _next;
public:
- int indexBuildsInProgress; // Number of indexes currently being built
-
- int userFlags;
+ IndexDetails details[NIndexesExtra];
- DiskLoc deletedListLarge[LargeBuckets];
+ private:
+ unsigned reserved2;
+ unsigned reserved3;
+ Extra(const Extra&) {
+ verify(false);
+ }
+ Extra& operator=(const Extra& r) {
+ verify(false);
+ return *this;
+ }
- // Think carefully before using this. We need at least 8 bytes reserved to leave room for a
- // DiskLoc pointing to more data (eg in a dummy MmapV1RecordHeader or Extent). There is still _reservedA
- // above, but these are the final two reserved 8-byte regions.
- char _reserved[8];
- /*-------- end data 496 bytes */
public:
- explicit NamespaceDetails( const DiskLoc &loc, bool _capped );
-
- class Extra {
- long long _next;
- public:
- IndexDetails details[NIndexesExtra];
- private:
- unsigned reserved2;
- unsigned reserved3;
- Extra(const Extra&) { verify(false); }
- Extra& operator=(const Extra& r) { verify(false); return *this; }
- public:
- Extra() { }
- long ofsFrom(NamespaceDetails *d) {
- return ((char *) this) - ((char *) d);
- }
- void init() { memset(this, 0, sizeof(Extra)); }
- Extra* next(const NamespaceDetails *d) const {
- if( _next == 0 ) return 0;
- return (Extra*) (((char *) d) + _next);
- }
- void setNext(OperationContext* txn, long ofs);
- void copy(NamespaceDetails *d, const Extra& e) {
- memcpy(this, &e, sizeof(Extra));
- _next = 0;
- }
- };
- Extra* extra() const {
- if( _extraOffset == 0 ) return 0;
- return (Extra *) (((char *) this) + _extraOffset);
+ Extra() {}
+ long ofsFrom(NamespaceDetails* d) {
+ return ((char*)this) - ((char*)d);
}
- /* add extra space for indexes when more than 10 */
- Extra* allocExtra( OperationContext* txn,
- StringData ns,
- NamespaceIndex& ni,
- int nindexessofar );
-
- void copyingFrom( OperationContext* txn,
- StringData thisns,
- NamespaceIndex& ni,
- NamespaceDetails *src); // must be called when renaming a NS to fix up extra
-
+ void init() {
+ memset(this, 0, sizeof(Extra));
+ }
+ Extra* next(const NamespaceDetails* d) const {
+ if (_next == 0)
+ return 0;
+ return (Extra*)(((char*)d) + _next);
+ }
+ void setNext(OperationContext* txn, long ofs);
+ void copy(NamespaceDetails* d, const Extra& e) {
+ memcpy(this, &e, sizeof(Extra));
+ _next = 0;
+ }
+ };
+ Extra* extra() const {
+ if (_extraOffset == 0)
+ return 0;
+ return (Extra*)(((char*)this) + _extraOffset);
+ }
+ /* add extra space for indexes when more than 10 */
+ Extra* allocExtra(OperationContext* txn, StringData ns, NamespaceIndex& ni, int nindexessofar);
+
+ void copyingFrom(OperationContext* txn,
+ StringData thisns,
+ NamespaceIndex& ni,
+ NamespaceDetails* src); // must be called when renaming a NS to fix up extra
+
+public:
+ void setMaxCappedDocs(OperationContext* txn, long long max);
+
+ enum UserFlags {
+ Flag_UsePowerOf2Sizes = 1 << 0,
+ Flag_NoPadding = 1 << 1,
+ };
+
+ IndexDetails& idx(int idxNo, bool missingExpected = false);
+ const IndexDetails& idx(int idxNo, bool missingExpected = false) const;
+
+ class IndexIterator {
public:
- void setMaxCappedDocs( OperationContext* txn, long long max );
-
- enum UserFlags {
- Flag_UsePowerOf2Sizes = 1 << 0,
- Flag_NoPadding = 1 << 1,
- };
-
- IndexDetails& idx(int idxNo, bool missingExpected = false );
- const IndexDetails& idx(int idxNo, bool missingExpected = false ) const;
-
- class IndexIterator {
- public:
- int pos() { return i; } // note this is the next one to come
- bool more() { return i < n; }
- const IndexDetails& next() { return d->idx(i++); }
- private:
- friend class NamespaceDetails;
- int i, n;
- const NamespaceDetails *d;
- IndexIterator(const NamespaceDetails *_d, bool includeBackgroundInProgress);
- };
-
- IndexIterator ii( bool includeBackgroundInProgress = false ) const {
- return IndexIterator(this, includeBackgroundInProgress);
+ int pos() {
+ return i;
+ } // note this is the next one to come
+ bool more() {
+ return i < n;
+ }
+ const IndexDetails& next() {
+ return d->idx(i++);
}
-
- /**
- * This fetches the IndexDetails for the next empty index slot. The caller must populate
- * returned object. This handles allocating extra index space, if necessary.
- */
- IndexDetails& getNextIndexDetails(OperationContext* txn, Collection* collection);
-
- NamespaceDetails *writingWithoutExtra( OperationContext* txn );
-
- /** Make all linked Extra objects writeable as well */
- NamespaceDetails *writingWithExtra( OperationContext* txn );
-
- /**
- * Returns the offset of the specified index name within the array of indexes. Must be
- * passed-in the owning collection to resolve the index record entries to objects.
- *
- * @return > 0 if index name was found, -1 otherwise.
- */
- int _catalogFindIndexByName(OperationContext* txn,
- const Collection* coll,
- StringData name,
- bool includeBackgroundInProgress) const;
private:
-
- /**
- * swaps all meta data for 2 indexes
- * a and b are 2 index ids, whose contents will be swapped
- * must have a lock on the entire collection to do this
- */
- void swapIndex( OperationContext* txn, int a, int b );
-
- friend class IndexCatalog;
- friend class IndexCatalogEntry;
-
- /** Update cappedLastDelRecLastExtent() after capExtent changed in cappedTruncateAfter() */
- void cappedTruncateLastDelUpdate();
- BOOST_STATIC_ASSERT( NIndexesMax <= NIndexesBase + NIndexesExtra*2 );
- BOOST_STATIC_ASSERT( NIndexesMax <= 64 ); // multiKey bits
- BOOST_STATIC_ASSERT( sizeof(NamespaceDetails::Extra) == 496 );
- }; // NamespaceDetails
- BOOST_STATIC_ASSERT( sizeof(NamespaceDetails) == 496 );
+ friend class NamespaceDetails;
+ int i, n;
+ const NamespaceDetails* d;
+ IndexIterator(const NamespaceDetails* _d, bool includeBackgroundInProgress);
+ };
+
+ IndexIterator ii(bool includeBackgroundInProgress = false) const {
+ return IndexIterator(this, includeBackgroundInProgress);
+ }
+
+ /**
+ * This fetches the IndexDetails for the next empty index slot. The caller must populate
+ * returned object. This handles allocating extra index space, if necessary.
+ */
+ IndexDetails& getNextIndexDetails(OperationContext* txn, Collection* collection);
+
+ NamespaceDetails* writingWithoutExtra(OperationContext* txn);
+
+ /** Make all linked Extra objects writeable as well */
+ NamespaceDetails* writingWithExtra(OperationContext* txn);
+
+ /**
+ * Returns the offset of the specified index name within the array of indexes. Must be
+ * passed-in the owning collection to resolve the index record entries to objects.
+ *
+ * @return > 0 if index name was found, -1 otherwise.
+ */
+ int _catalogFindIndexByName(OperationContext* txn,
+ const Collection* coll,
+ StringData name,
+ bool includeBackgroundInProgress) const;
+
+private:
+ /**
+ * swaps all meta data for 2 indexes
+ * a and b are 2 index ids, whose contents will be swapped
+ * must have a lock on the entire collection to do this
+ */
+ void swapIndex(OperationContext* txn, int a, int b);
+
+ friend class IndexCatalog;
+ friend class IndexCatalogEntry;
+
+ /** Update cappedLastDelRecLastExtent() after capExtent changed in cappedTruncateAfter() */
+ void cappedTruncateLastDelUpdate();
+ BOOST_STATIC_ASSERT(NIndexesMax <= NIndexesBase + NIndexesExtra * 2);
+ BOOST_STATIC_ASSERT(NIndexesMax <= 64); // multiKey bits
+ BOOST_STATIC_ASSERT(sizeof(NamespaceDetails::Extra) == 496);
+}; // NamespaceDetails
+BOOST_STATIC_ASSERT(sizeof(NamespaceDetails) == 496);
#pragma pack()
-} // namespace mongo
+} // namespace mongo
diff --git a/src/mongo/db/storage/mmap_v1/catalog/namespace_details_collection_entry.cpp b/src/mongo/db/storage/mmap_v1/catalog/namespace_details_collection_entry.cpp
index 1d3fef7b918..7e79cfdca9d 100644
--- a/src/mongo/db/storage/mmap_v1/catalog/namespace_details_collection_entry.cpp
+++ b/src/mongo/db/storage/mmap_v1/catalog/namespace_details_collection_entry.cpp
@@ -43,359 +43,350 @@
namespace mongo {
- using std::string;
-
- NamespaceDetailsCollectionCatalogEntry::NamespaceDetailsCollectionCatalogEntry(
- StringData ns,
- NamespaceDetails* details,
- RecordStore* namespacesRecordStore,
- RecordStore* indexRecordStore,
- MMAPV1DatabaseCatalogEntry* db )
- : CollectionCatalogEntry( ns ),
- _details( details ),
- _namespacesRecordStore(namespacesRecordStore),
- _indexRecordStore( indexRecordStore ),
- _db( db ) {
- }
-
- CollectionOptions NamespaceDetailsCollectionCatalogEntry::getCollectionOptions(OperationContext* txn) const {
- CollectionOptions options = _db->getCollectionOptions( txn, ns().ns() );
-
- if (options.flagsSet) {
- if (options.flags != _details->userFlags) {
- warning() << "system.namespaces and NamespaceDetails disagree about userFlags."
- << " system.namespaces: " << options.flags
- << " NamespaceDetails: " << _details->userFlags;
- dassert(options.flags == _details->userFlags);
- }
+using std::string;
+
+NamespaceDetailsCollectionCatalogEntry::NamespaceDetailsCollectionCatalogEntry(
+ StringData ns,
+ NamespaceDetails* details,
+ RecordStore* namespacesRecordStore,
+ RecordStore* indexRecordStore,
+ MMAPV1DatabaseCatalogEntry* db)
+ : CollectionCatalogEntry(ns),
+ _details(details),
+ _namespacesRecordStore(namespacesRecordStore),
+ _indexRecordStore(indexRecordStore),
+ _db(db) {}
+
+CollectionOptions NamespaceDetailsCollectionCatalogEntry::getCollectionOptions(
+ OperationContext* txn) const {
+ CollectionOptions options = _db->getCollectionOptions(txn, ns().ns());
+
+ if (options.flagsSet) {
+ if (options.flags != _details->userFlags) {
+ warning() << "system.namespaces and NamespaceDetails disagree about userFlags."
+ << " system.namespaces: " << options.flags
+ << " NamespaceDetails: " << _details->userFlags;
+ dassert(options.flags == _details->userFlags);
}
-
- // Fill in the actual flags from the NamespaceDetails.
- // Leaving flagsSet alone since it indicates whether the user actively set the flags.
- options.flags = _details->userFlags;
-
- return options;
}
- int NamespaceDetailsCollectionCatalogEntry::getTotalIndexCount( OperationContext* txn ) const {
- return _details->nIndexes + _details->indexBuildsInProgress;
- }
+ // Fill in the actual flags from the NamespaceDetails.
+ // Leaving flagsSet alone since it indicates whether the user actively set the flags.
+ options.flags = _details->userFlags;
- int NamespaceDetailsCollectionCatalogEntry::getCompletedIndexCount( OperationContext* txn ) const {
- return _details->nIndexes;
- }
+ return options;
+}
- int NamespaceDetailsCollectionCatalogEntry::getMaxAllowedIndexes() const {
- return NamespaceDetails::NIndexesMax;
- }
+int NamespaceDetailsCollectionCatalogEntry::getTotalIndexCount(OperationContext* txn) const {
+ return _details->nIndexes + _details->indexBuildsInProgress;
+}
- void NamespaceDetailsCollectionCatalogEntry::getAllIndexes( OperationContext* txn,
- std::vector<std::string>* names ) const {
- NamespaceDetails::IndexIterator i = _details->ii( true );
- while ( i.more() ) {
- const IndexDetails& id = i.next();
- const BSONObj obj = _indexRecordStore->dataFor( txn, id.info.toRecordId() ).toBson();
- names->push_back( obj.getStringField("name") );
- }
- }
+int NamespaceDetailsCollectionCatalogEntry::getCompletedIndexCount(OperationContext* txn) const {
+ return _details->nIndexes;
+}
- bool NamespaceDetailsCollectionCatalogEntry::isIndexMultikey(OperationContext* txn,
- StringData idxName) const {
- int idxNo = _findIndexNumber( txn, idxName );
- invariant( idxNo >= 0 );
- return isIndexMultikey( idxNo );
- }
+int NamespaceDetailsCollectionCatalogEntry::getMaxAllowedIndexes() const {
+ return NamespaceDetails::NIndexesMax;
+}
- bool NamespaceDetailsCollectionCatalogEntry::isIndexMultikey(int idxNo) const {
- return (_details->multiKeyIndexBits & (((unsigned long long) 1) << idxNo)) != 0;
+void NamespaceDetailsCollectionCatalogEntry::getAllIndexes(OperationContext* txn,
+ std::vector<std::string>* names) const {
+ NamespaceDetails::IndexIterator i = _details->ii(true);
+ while (i.more()) {
+ const IndexDetails& id = i.next();
+ const BSONObj obj = _indexRecordStore->dataFor(txn, id.info.toRecordId()).toBson();
+ names->push_back(obj.getStringField("name"));
}
+}
- bool NamespaceDetailsCollectionCatalogEntry::setIndexIsMultikey(OperationContext* txn,
- StringData indexName,
- bool multikey ) {
+bool NamespaceDetailsCollectionCatalogEntry::isIndexMultikey(OperationContext* txn,
+ StringData idxName) const {
+ int idxNo = _findIndexNumber(txn, idxName);
+ invariant(idxNo >= 0);
+ return isIndexMultikey(idxNo);
+}
- int idxNo = _findIndexNumber( txn, indexName );
- invariant( idxNo >= 0 );
- return setIndexIsMultikey( txn, idxNo, multikey );
- }
+bool NamespaceDetailsCollectionCatalogEntry::isIndexMultikey(int idxNo) const {
+ return (_details->multiKeyIndexBits & (((unsigned long long)1) << idxNo)) != 0;
+}
- bool NamespaceDetailsCollectionCatalogEntry::setIndexIsMultikey(OperationContext* txn,
- int idxNo,
- bool multikey ) {
- unsigned long long mask = 1ULL << idxNo;
+bool NamespaceDetailsCollectionCatalogEntry::setIndexIsMultikey(OperationContext* txn,
+ StringData indexName,
+ bool multikey) {
+ int idxNo = _findIndexNumber(txn, indexName);
+ invariant(idxNo >= 0);
+ return setIndexIsMultikey(txn, idxNo, multikey);
+}
- if (multikey) {
- // Shortcut if the bit is already set correctly
- if (_details->multiKeyIndexBits & mask) {
- return false;
- }
+bool NamespaceDetailsCollectionCatalogEntry::setIndexIsMultikey(OperationContext* txn,
+ int idxNo,
+ bool multikey) {
+ unsigned long long mask = 1ULL << idxNo;
- *txn->recoveryUnit()->writing(&_details->multiKeyIndexBits) |= mask;
+ if (multikey) {
+ // Shortcut if the bit is already set correctly
+ if (_details->multiKeyIndexBits & mask) {
+ return false;
}
- else {
- // Shortcut if the bit is already set correctly
- if (!(_details->multiKeyIndexBits & mask)) {
- return false;
- }
-
- // Invert mask: all 1's except a 0 at the ith bit
- mask = ~mask;
- *txn->recoveryUnit()->writing(&_details->multiKeyIndexBits) &= mask;
+
+ *txn->recoveryUnit()->writing(&_details->multiKeyIndexBits) |= mask;
+ } else {
+ // Shortcut if the bit is already set correctly
+ if (!(_details->multiKeyIndexBits & mask)) {
+ return false;
}
- return true;
+ // Invert mask: all 1's except a 0 at the ith bit
+ mask = ~mask;
+ *txn->recoveryUnit()->writing(&_details->multiKeyIndexBits) &= mask;
}
- RecordId NamespaceDetailsCollectionCatalogEntry::getIndexHead(OperationContext* txn,
- StringData idxName) const {
- int idxNo = _findIndexNumber( txn, idxName );
- invariant( idxNo >= 0 );
- return _details->idx( idxNo ).head.toRecordId();
- }
+ return true;
+}
- BSONObj NamespaceDetailsCollectionCatalogEntry::getIndexSpec( OperationContext* txn,
- StringData idxName ) const {
- int idxNo = _findIndexNumber( txn, idxName );
- invariant( idxNo >= 0 );
- const IndexDetails& id = _details->idx( idxNo );
- return _indexRecordStore->dataFor( txn, id.info.toRecordId() ).toBson();
- }
+RecordId NamespaceDetailsCollectionCatalogEntry::getIndexHead(OperationContext* txn,
+ StringData idxName) const {
+ int idxNo = _findIndexNumber(txn, idxName);
+ invariant(idxNo >= 0);
+ return _details->idx(idxNo).head.toRecordId();
+}
- void NamespaceDetailsCollectionCatalogEntry::setIndexHead( OperationContext* txn,
- StringData idxName,
- const RecordId& newHead ) {
- int idxNo = _findIndexNumber( txn, idxName );
- invariant( idxNo >= 0 );
- *txn->recoveryUnit()->writing(&_details->idx(idxNo).head) = DiskLoc::fromRecordId(newHead);
- }
+BSONObj NamespaceDetailsCollectionCatalogEntry::getIndexSpec(OperationContext* txn,
+ StringData idxName) const {
+ int idxNo = _findIndexNumber(txn, idxName);
+ invariant(idxNo >= 0);
+ const IndexDetails& id = _details->idx(idxNo);
+ return _indexRecordStore->dataFor(txn, id.info.toRecordId()).toBson();
+}
- bool NamespaceDetailsCollectionCatalogEntry::isIndexReady( OperationContext* txn,
- StringData idxName ) const {
- int idxNo = _findIndexNumber( txn, idxName );
- invariant( idxNo >= 0 );
- return idxNo < getCompletedIndexCount( txn );
- }
+void NamespaceDetailsCollectionCatalogEntry::setIndexHead(OperationContext* txn,
+ StringData idxName,
+ const RecordId& newHead) {
+ int idxNo = _findIndexNumber(txn, idxName);
+ invariant(idxNo >= 0);
+ *txn->recoveryUnit()->writing(&_details->idx(idxNo).head) = DiskLoc::fromRecordId(newHead);
+}
- int NamespaceDetailsCollectionCatalogEntry::_findIndexNumber( OperationContext* txn,
- StringData idxName ) const {
- NamespaceDetails::IndexIterator i = _details->ii( true );
- while ( i.more() ) {
- const IndexDetails& id = i.next();
- int idxNo = i.pos() - 1;
- const BSONObj obj = _indexRecordStore->dataFor( txn, id.info.toRecordId() ).toBson();
- if ( idxName == obj.getStringField("name") )
- return idxNo;
- }
- return -1;
+bool NamespaceDetailsCollectionCatalogEntry::isIndexReady(OperationContext* txn,
+ StringData idxName) const {
+ int idxNo = _findIndexNumber(txn, idxName);
+ invariant(idxNo >= 0);
+ return idxNo < getCompletedIndexCount(txn);
+}
+
+int NamespaceDetailsCollectionCatalogEntry::_findIndexNumber(OperationContext* txn,
+ StringData idxName) const {
+ NamespaceDetails::IndexIterator i = _details->ii(true);
+ while (i.more()) {
+ const IndexDetails& id = i.next();
+ int idxNo = i.pos() - 1;
+ const BSONObj obj = _indexRecordStore->dataFor(txn, id.info.toRecordId()).toBson();
+ if (idxName == obj.getStringField("name"))
+ return idxNo;
}
+ return -1;
+}
+
+/* remove bit from a bit array - actually remove its slot, not a clear
+ note: this function does not work with x == 63 -- that is ok
+ but keep in mind in the future if max indexes were extended to
+ exactly 64 it would be a problem
+*/
+unsigned long long removeAndSlideBit(unsigned long long b, int x) {
+ unsigned long long tmp = b;
+ return (tmp & ((((unsigned long long)1) << x) - 1)) | ((tmp >> (x + 1)) << x);
+}
- /* remove bit from a bit array - actually remove its slot, not a clear
- note: this function does not work with x == 63 -- that is ok
- but keep in mind in the future if max indexes were extended to
- exactly 64 it would be a problem
- */
- unsigned long long removeAndSlideBit(unsigned long long b, int x) {
- unsigned long long tmp = b;
- return
- (tmp & ((((unsigned long long) 1) << x)-1)) |
- ((tmp >> (x+1)) << x);
+class IndexUpdateTest : public StartupTest {
+public:
+ void run() {
+ verify(removeAndSlideBit(1, 0) == 0);
+ verify(removeAndSlideBit(2, 0) == 1);
+ verify(removeAndSlideBit(2, 1) == 0);
+ verify(removeAndSlideBit(255, 1) == 127);
+ verify(removeAndSlideBit(21, 2) == 9);
+ verify(removeAndSlideBit(0x4000000000000001ULL, 62) == 1);
}
+} iu_unittest;
- class IndexUpdateTest : public StartupTest {
- public:
- void run() {
- verify( removeAndSlideBit(1, 0) == 0 );
- verify( removeAndSlideBit(2, 0) == 1 );
- verify( removeAndSlideBit(2, 1) == 0 );
- verify( removeAndSlideBit(255, 1) == 127 );
- verify( removeAndSlideBit(21, 2) == 9 );
- verify( removeAndSlideBit(0x4000000000000001ULL, 62) == 1 );
- }
- } iu_unittest;
+Status NamespaceDetailsCollectionCatalogEntry::removeIndex(OperationContext* txn,
+ StringData indexName) {
+ int idxNo = _findIndexNumber(txn, indexName);
+ if (idxNo < 0)
+ return Status(ErrorCodes::NamespaceNotFound, "index not found to remove");
- Status NamespaceDetailsCollectionCatalogEntry::removeIndex( OperationContext* txn,
- StringData indexName ) {
- int idxNo = _findIndexNumber( txn, indexName );
- if ( idxNo < 0 )
- return Status( ErrorCodes::NamespaceNotFound, "index not found to remove" );
+ RecordId infoLocation = _details->idx(idxNo).info.toRecordId();
- RecordId infoLocation = _details->idx( idxNo ).info.toRecordId();
+ { // sanity check
+ BSONObj info = _indexRecordStore->dataFor(txn, infoLocation).toBson();
+ invariant(info["name"].String() == indexName);
+ }
- { // sanity check
- BSONObj info = _indexRecordStore->dataFor( txn, infoLocation ).toBson();
- invariant( info["name"].String() == indexName );
+ { // drop the namespace
+ string indexNamespace = IndexDescriptor::makeIndexNamespace(ns().ns(), indexName);
+ Status status = _db->dropCollection(txn, indexNamespace);
+ if (!status.isOK()) {
+ return status;
}
+ }
- { // drop the namespace
- string indexNamespace = IndexDescriptor::makeIndexNamespace( ns().ns(), indexName );
- Status status = _db->dropCollection( txn, indexNamespace );
- if ( !status.isOK() ) {
- return status;
- }
- }
+ { // all info in the .ns file
+ NamespaceDetails* d = _details->writingWithExtra(txn);
- { // all info in the .ns file
- NamespaceDetails* d = _details->writingWithExtra( txn );
+ // fix the _multiKeyIndexBits, by moving all bits above me down one
+ d->multiKeyIndexBits = removeAndSlideBit(d->multiKeyIndexBits, idxNo);
- // fix the _multiKeyIndexBits, by moving all bits above me down one
- d->multiKeyIndexBits = removeAndSlideBit(d->multiKeyIndexBits, idxNo);
+ if (idxNo >= d->nIndexes)
+ d->indexBuildsInProgress--;
+ else
+ d->nIndexes--;
- if ( idxNo >= d->nIndexes )
- d->indexBuildsInProgress--;
- else
- d->nIndexes--;
+ for (int i = idxNo; i < getTotalIndexCount(txn); i++)
+ d->idx(i) = d->idx(i + 1);
- for ( int i = idxNo; i < getTotalIndexCount( txn ); i++ )
- d->idx(i) = d->idx(i+1);
+ d->idx(getTotalIndexCount(txn)) = IndexDetails();
+ }
- d->idx( getTotalIndexCount( txn ) ) = IndexDetails();
- }
+ // remove from system.indexes
+ _indexRecordStore->deleteRecord(txn, infoLocation);
- // remove from system.indexes
- _indexRecordStore->deleteRecord( txn, infoLocation );
+ return Status::OK();
+}
- return Status::OK();
+Status NamespaceDetailsCollectionCatalogEntry::prepareForIndexBuild(OperationContext* txn,
+ const IndexDescriptor* desc) {
+ BSONObj spec = desc->infoObj();
+ // 1) entry in system.indexs
+ StatusWith<RecordId> systemIndexesEntry =
+ _indexRecordStore->insertRecord(txn, spec.objdata(), spec.objsize(), false);
+ if (!systemIndexesEntry.isOK())
+ return systemIndexesEntry.getStatus();
+
+ // 2) NamespaceDetails mods
+ IndexDetails* id;
+ try {
+ id = &_details->idx(getTotalIndexCount(txn), true);
+ } catch (DBException&) {
+ _details->allocExtra(txn, ns().ns(), _db->_namespaceIndex, getTotalIndexCount(txn));
+ id = &_details->idx(getTotalIndexCount(txn), false);
}
- Status NamespaceDetailsCollectionCatalogEntry::prepareForIndexBuild( OperationContext* txn,
- const IndexDescriptor* desc ) {
- BSONObj spec = desc->infoObj();
- // 1) entry in system.indexs
- StatusWith<RecordId> systemIndexesEntry = _indexRecordStore->insertRecord( txn,
- spec.objdata(),
- spec.objsize(),
- false );
- if ( !systemIndexesEntry.isOK() )
- return systemIndexesEntry.getStatus();
-
- // 2) NamespaceDetails mods
- IndexDetails *id;
- try {
- id = &_details->idx(getTotalIndexCount( txn ), true);
- }
- catch( DBException& ) {
- _details->allocExtra(txn,
- ns().ns(),
- _db->_namespaceIndex,
- getTotalIndexCount( txn ));
- id = &_details->idx(getTotalIndexCount( txn ), false);
- }
-
- const DiskLoc infoLoc = DiskLoc::fromRecordId(systemIndexesEntry.getValue());
- *txn->recoveryUnit()->writing( &id->info ) = infoLoc;
- *txn->recoveryUnit()->writing( &id->head ) = DiskLoc();
+ const DiskLoc infoLoc = DiskLoc::fromRecordId(systemIndexesEntry.getValue());
+ *txn->recoveryUnit()->writing(&id->info) = infoLoc;
+ *txn->recoveryUnit()->writing(&id->head) = DiskLoc();
- txn->recoveryUnit()->writingInt( _details->indexBuildsInProgress ) += 1;
+ txn->recoveryUnit()->writingInt(_details->indexBuildsInProgress) += 1;
- // 3) indexes entry in .ns file and system.namespaces
- _db->createNamespaceForIndex(txn, desc->indexNamespace());
+ // 3) indexes entry in .ns file and system.namespaces
+ _db->createNamespaceForIndex(txn, desc->indexNamespace());
- return Status::OK();
- }
+ return Status::OK();
+}
- void NamespaceDetailsCollectionCatalogEntry::indexBuildSuccess( OperationContext* txn,
- StringData indexName ) {
- int idxNo = _findIndexNumber( txn, indexName );
- fassert( 17202, idxNo >= 0 );
+void NamespaceDetailsCollectionCatalogEntry::indexBuildSuccess(OperationContext* txn,
+ StringData indexName) {
+ int idxNo = _findIndexNumber(txn, indexName);
+ fassert(17202, idxNo >= 0);
- // Make sure the newly created index is relocated to nIndexes, if it isn't already there
- if ( idxNo != getCompletedIndexCount( txn ) ) {
- int toIdxNo = getCompletedIndexCount( txn );
+ // Make sure the newly created index is relocated to nIndexes, if it isn't already there
+ if (idxNo != getCompletedIndexCount(txn)) {
+ int toIdxNo = getCompletedIndexCount(txn);
- //_details->swapIndex( txn, idxNo, toIdxNo );
+ //_details->swapIndex( txn, idxNo, toIdxNo );
- // flip main meta data
- IndexDetails temp = _details->idx(idxNo);
- *txn->recoveryUnit()->writing(&_details->idx(idxNo)) = _details->idx(toIdxNo);
- *txn->recoveryUnit()->writing(&_details->idx(toIdxNo)) = temp;
+ // flip main meta data
+ IndexDetails temp = _details->idx(idxNo);
+ *txn->recoveryUnit()->writing(&_details->idx(idxNo)) = _details->idx(toIdxNo);
+ *txn->recoveryUnit()->writing(&_details->idx(toIdxNo)) = temp;
- // flip multi key bits
- bool tempMultikey = isIndexMultikey(idxNo);
- setIndexIsMultikey( txn, idxNo, isIndexMultikey(toIdxNo) );
- setIndexIsMultikey( txn, toIdxNo, tempMultikey );
+ // flip multi key bits
+ bool tempMultikey = isIndexMultikey(idxNo);
+ setIndexIsMultikey(txn, idxNo, isIndexMultikey(toIdxNo));
+ setIndexIsMultikey(txn, toIdxNo, tempMultikey);
- idxNo = toIdxNo;
- invariant( (idxNo = _findIndexNumber( txn, indexName )) );
- }
+ idxNo = toIdxNo;
+ invariant((idxNo = _findIndexNumber(txn, indexName)));
+ }
- txn->recoveryUnit()->writingInt( _details->indexBuildsInProgress ) -= 1;
- txn->recoveryUnit()->writingInt( _details->nIndexes ) += 1;
+ txn->recoveryUnit()->writingInt(_details->indexBuildsInProgress) -= 1;
+ txn->recoveryUnit()->writingInt(_details->nIndexes) += 1;
- invariant( isIndexReady( txn, indexName ) );
- }
+ invariant(isIndexReady(txn, indexName));
+}
- void NamespaceDetailsCollectionCatalogEntry::updateTTLSetting( OperationContext* txn,
- StringData idxName,
- long long newExpireSeconds ) {
- int idx = _findIndexNumber( txn, idxName );
- invariant( idx >= 0 );
+void NamespaceDetailsCollectionCatalogEntry::updateTTLSetting(OperationContext* txn,
+ StringData idxName,
+ long long newExpireSeconds) {
+ int idx = _findIndexNumber(txn, idxName);
+ invariant(idx >= 0);
- IndexDetails& indexDetails = _details->idx( idx );
+ IndexDetails& indexDetails = _details->idx(idx);
- BSONObj obj = _indexRecordStore->dataFor( txn, indexDetails.info.toRecordId() ).toBson();
- const BSONElement oldExpireSecs = obj.getField("expireAfterSeconds");
+ BSONObj obj = _indexRecordStore->dataFor(txn, indexDetails.info.toRecordId()).toBson();
+ const BSONElement oldExpireSecs = obj.getField("expireAfterSeconds");
- // Important that we set the new value in-place. We are writing directly to the
- // object here so must be careful not to overwrite with a longer numeric type.
+ // Important that we set the new value in-place. We are writing directly to the
+ // object here so must be careful not to overwrite with a longer numeric type.
- char* nonConstPtr = const_cast<char*>(oldExpireSecs.value());
- switch( oldExpireSecs.type() ) {
+ char* nonConstPtr = const_cast<char*>(oldExpireSecs.value());
+ switch (oldExpireSecs.type()) {
case EOO:
- massert( 16631, "index does not have an 'expireAfterSeconds' field", false );
+ massert(16631, "index does not have an 'expireAfterSeconds' field", false);
break;
case NumberInt:
*txn->recoveryUnit()->writing(reinterpret_cast<int*>(nonConstPtr)) = newExpireSeconds;
break;
case NumberDouble:
- *txn->recoveryUnit()->writing(reinterpret_cast<double*>(nonConstPtr)) = newExpireSeconds;
+ *txn->recoveryUnit()->writing(reinterpret_cast<double*>(nonConstPtr)) =
+ newExpireSeconds;
break;
case NumberLong:
- *txn->recoveryUnit()->writing(reinterpret_cast<long long*>(nonConstPtr)) = newExpireSeconds;
+ *txn->recoveryUnit()->writing(reinterpret_cast<long long*>(nonConstPtr)) =
+ newExpireSeconds;
break;
default:
- massert( 16632, "current 'expireAfterSeconds' is not a number", false );
- }
+ massert(16632, "current 'expireAfterSeconds' is not a number", false);
}
+}
namespace {
- void updateSystemNamespaces(OperationContext* txn, RecordStore* namespaces,
- const NamespaceString& ns, const BSONObj& update) {
-
- if (!namespaces)
- return;
-
- auto cursor = namespaces->getCursor(txn);
- while (auto record = cursor->next()) {
- BSONObj oldEntry = record->data.releaseToBson();
- BSONElement e = oldEntry["name"];
- if (e.type() != String)
- continue;
-
- if (e.String() != ns.ns())
- continue;
-
- const BSONObj newEntry = applyUpdateOperators(oldEntry, update);
- StatusWith<RecordId> result = namespaces->updateRecord(txn, record->id,
- newEntry.objdata(),
- newEntry.objsize(),
- false, NULL);
- fassert(17486, result.getStatus());
- return;
- }
- fassertFailed(17488);
+void updateSystemNamespaces(OperationContext* txn,
+ RecordStore* namespaces,
+ const NamespaceString& ns,
+ const BSONObj& update) {
+ if (!namespaces)
+ return;
+
+ auto cursor = namespaces->getCursor(txn);
+ while (auto record = cursor->next()) {
+ BSONObj oldEntry = record->data.releaseToBson();
+ BSONElement e = oldEntry["name"];
+ if (e.type() != String)
+ continue;
+
+ if (e.String() != ns.ns())
+ continue;
+
+ const BSONObj newEntry = applyUpdateOperators(oldEntry, update);
+ StatusWith<RecordId> result = namespaces->updateRecord(
+ txn, record->id, newEntry.objdata(), newEntry.objsize(), false, NULL);
+ fassert(17486, result.getStatus());
+ return;
}
+ fassertFailed(17488);
+}
}
- void NamespaceDetailsCollectionCatalogEntry::updateFlags(OperationContext* txn, int newValue) {
- NamespaceDetailsRSV1MetaData md(ns().ns(), _details);
- md.replaceUserFlags(txn, newValue);
- updateSystemNamespaces(txn, _namespacesRecordStore, ns(),
- BSON("$set" << BSON("options.flags" << newValue)));
- }
+void NamespaceDetailsCollectionCatalogEntry::updateFlags(OperationContext* txn, int newValue) {
+ NamespaceDetailsRSV1MetaData md(ns().ns(), _details);
+ md.replaceUserFlags(txn, newValue);
+ updateSystemNamespaces(
+ txn, _namespacesRecordStore, ns(), BSON("$set" << BSON("options.flags" << newValue)));
+}
- void NamespaceDetailsCollectionCatalogEntry::updateValidator(OperationContext* txn,
- const BSONObj& validator) {
- updateSystemNamespaces(txn, _namespacesRecordStore, ns(),
- BSON("$set" << BSON("options.validator" << validator)));
- }
+void NamespaceDetailsCollectionCatalogEntry::updateValidator(OperationContext* txn,
+ const BSONObj& validator) {
+ updateSystemNamespaces(
+ txn, _namespacesRecordStore, ns(), BSON("$set" << BSON("options.validator" << validator)));
+}
}
diff --git a/src/mongo/db/storage/mmap_v1/catalog/namespace_details_collection_entry.h b/src/mongo/db/storage/mmap_v1/catalog/namespace_details_collection_entry.h
index 9080c24c776..2d6751345d6 100644
--- a/src/mongo/db/storage/mmap_v1/catalog/namespace_details_collection_entry.h
+++ b/src/mongo/db/storage/mmap_v1/catalog/namespace_details_collection_entry.h
@@ -37,84 +37,73 @@
namespace mongo {
- class NamespaceDetails;
+class NamespaceDetails;
- class MMAPV1DatabaseCatalogEntry;;
- class RecordStore;
- class OperationContext;
+class MMAPV1DatabaseCatalogEntry;
+;
+class RecordStore;
+class OperationContext;
- class NamespaceDetailsCollectionCatalogEntry : public CollectionCatalogEntry {
- public:
- NamespaceDetailsCollectionCatalogEntry( StringData ns,
- NamespaceDetails* details,
- RecordStore* namespacesRecordStore,
- RecordStore* indexRecordStore,
- MMAPV1DatabaseCatalogEntry* db );
+class NamespaceDetailsCollectionCatalogEntry : public CollectionCatalogEntry {
+public:
+ NamespaceDetailsCollectionCatalogEntry(StringData ns,
+ NamespaceDetails* details,
+ RecordStore* namespacesRecordStore,
+ RecordStore* indexRecordStore,
+ MMAPV1DatabaseCatalogEntry* db);
- ~NamespaceDetailsCollectionCatalogEntry(){}
+ ~NamespaceDetailsCollectionCatalogEntry() {}
- CollectionOptions getCollectionOptions(OperationContext* txn) const final;
+ CollectionOptions getCollectionOptions(OperationContext* txn) const final;
- int getTotalIndexCount(OperationContext* txn) const final;
+ int getTotalIndexCount(OperationContext* txn) const final;
- int getCompletedIndexCount(OperationContext* txn) const final;
+ int getCompletedIndexCount(OperationContext* txn) const final;
- int getMaxAllowedIndexes() const final;
+ int getMaxAllowedIndexes() const final;
- void getAllIndexes( OperationContext* txn,
- std::vector<std::string>* names ) const final;
+ void getAllIndexes(OperationContext* txn, std::vector<std::string>* names) const final;
- BSONObj getIndexSpec( OperationContext* txn,
- StringData idxName ) const final;
+ BSONObj getIndexSpec(OperationContext* txn, StringData idxName) const final;
- bool isIndexMultikey(OperationContext* txn,
- StringData indexName) const final;
- bool isIndexMultikey(int idxNo) const;
+ bool isIndexMultikey(OperationContext* txn, StringData indexName) const final;
+ bool isIndexMultikey(int idxNo) const;
- bool setIndexIsMultikey(OperationContext* txn,
- int idxNo,
- bool multikey = true);
- bool setIndexIsMultikey(OperationContext* txn,
- StringData indexName,
- bool multikey = true) final;
+ bool setIndexIsMultikey(OperationContext* txn, int idxNo, bool multikey = true);
+ bool setIndexIsMultikey(OperationContext* txn,
+ StringData indexName,
+ bool multikey = true) final;
- RecordId getIndexHead( OperationContext* txn,
- StringData indexName ) const final;
+ RecordId getIndexHead(OperationContext* txn, StringData indexName) const final;
- void setIndexHead( OperationContext* txn,
- StringData indexName,
- const RecordId& newHead ) final;
+ void setIndexHead(OperationContext* txn, StringData indexName, const RecordId& newHead) final;
- bool isIndexReady( OperationContext* txn,
- StringData indexName ) const final;
+ bool isIndexReady(OperationContext* txn, StringData indexName) const final;
- Status removeIndex( OperationContext* txn,
- StringData indexName ) final;
+ Status removeIndex(OperationContext* txn, StringData indexName) final;
- Status prepareForIndexBuild( OperationContext* txn,
- const IndexDescriptor* spec ) final;
+ Status prepareForIndexBuild(OperationContext* txn, const IndexDescriptor* spec) final;
- void indexBuildSuccess( OperationContext* txn,
- StringData indexName ) final;
+ void indexBuildSuccess(OperationContext* txn, StringData indexName) final;
- void updateTTLSetting( OperationContext* txn,
- StringData idxName,
- long long newExpireSeconds ) final;
+ void updateTTLSetting(OperationContext* txn,
+ StringData idxName,
+ long long newExpireSeconds) final;
- void updateFlags(OperationContext* txn, int newValue) final;
+ void updateFlags(OperationContext* txn, int newValue) final;
- void updateValidator(OperationContext* txn, const BSONObj& validator) final;
+ void updateValidator(OperationContext* txn, const BSONObj& validator) final;
- // not part of interface, but available to my storage engine
+ // not part of interface, but available to my storage engine
- int _findIndexNumber( OperationContext* txn, StringData indexName) const;
+ int _findIndexNumber(OperationContext* txn, StringData indexName) const;
- private:
- NamespaceDetails* _details;
- RecordStore* _namespacesRecordStore;
- RecordStore* _indexRecordStore;
- MMAPV1DatabaseCatalogEntry* _db;
+private:
+ NamespaceDetails* _details;
+ RecordStore* _namespacesRecordStore;
+ RecordStore* _indexRecordStore;
+ MMAPV1DatabaseCatalogEntry* _db;
- friend class MMAPV1DatabaseCatalogEntry;
- };
+ friend class MMAPV1DatabaseCatalogEntry;
+};
}
diff --git a/src/mongo/db/storage/mmap_v1/catalog/namespace_details_rsv1_metadata.cpp b/src/mongo/db/storage/mmap_v1/catalog/namespace_details_rsv1_metadata.cpp
index 5c95ec2bbc7..51fc1c1ed75 100644
--- a/src/mongo/db/storage/mmap_v1/catalog/namespace_details_rsv1_metadata.cpp
+++ b/src/mongo/db/storage/mmap_v1/catalog/namespace_details_rsv1_metadata.cpp
@@ -35,169 +35,165 @@
namespace mongo {
- using std::unique_ptr;
- using std::numeric_limits;
+using std::unique_ptr;
+using std::numeric_limits;
- BOOST_STATIC_ASSERT(RecordStoreV1Base::Buckets
- == NamespaceDetails::SmallBuckets + NamespaceDetails::LargeBuckets);
+BOOST_STATIC_ASSERT(RecordStoreV1Base::Buckets ==
+ NamespaceDetails::SmallBuckets + NamespaceDetails::LargeBuckets);
- NamespaceDetailsRSV1MetaData::NamespaceDetailsRSV1MetaData( StringData ns,
- NamespaceDetails* details )
- : _ns( ns.toString() ),
- _details( details ) {
- }
-
- const DiskLoc& NamespaceDetailsRSV1MetaData::capExtent() const {
- return _details->capExtent;
- }
+NamespaceDetailsRSV1MetaData::NamespaceDetailsRSV1MetaData(StringData ns, NamespaceDetails* details)
+ : _ns(ns.toString()), _details(details) {}
- void NamespaceDetailsRSV1MetaData::setCapExtent( OperationContext* txn, const DiskLoc& loc ) {
- *txn->recoveryUnit()->writing( &_details->capExtent ) = loc;
- }
+const DiskLoc& NamespaceDetailsRSV1MetaData::capExtent() const {
+ return _details->capExtent;
+}
- const DiskLoc& NamespaceDetailsRSV1MetaData::capFirstNewRecord() const {
- return _details->capFirstNewRecord;
- }
+void NamespaceDetailsRSV1MetaData::setCapExtent(OperationContext* txn, const DiskLoc& loc) {
+ *txn->recoveryUnit()->writing(&_details->capExtent) = loc;
+}
- void NamespaceDetailsRSV1MetaData::setCapFirstNewRecord( OperationContext* txn,
- const DiskLoc& loc ) {
- *txn->recoveryUnit()->writing( &_details->capFirstNewRecord ) = loc;
- }
+const DiskLoc& NamespaceDetailsRSV1MetaData::capFirstNewRecord() const {
+ return _details->capFirstNewRecord;
+}
- bool NamespaceDetailsRSV1MetaData::capLooped() const {
- return _details->capFirstNewRecord.isValid();
- }
+void NamespaceDetailsRSV1MetaData::setCapFirstNewRecord(OperationContext* txn, const DiskLoc& loc) {
+ *txn->recoveryUnit()->writing(&_details->capFirstNewRecord) = loc;
+}
- long long NamespaceDetailsRSV1MetaData::dataSize() const {
- return _details->stats.datasize;
- }
- long long NamespaceDetailsRSV1MetaData::numRecords() const {
- return _details->stats.nrecords;
- }
+bool NamespaceDetailsRSV1MetaData::capLooped() const {
+ return _details->capFirstNewRecord.isValid();
+}
- void NamespaceDetailsRSV1MetaData::incrementStats( OperationContext* txn,
- long long dataSizeIncrement,
- long long numRecordsIncrement ) {
- // durability todo : this could be a bit annoying / slow to record constantly
- NamespaceDetails::Stats* s = txn->recoveryUnit()->writing( &_details->stats );
- s->datasize += dataSizeIncrement;
- s->nrecords += numRecordsIncrement;
- }
+long long NamespaceDetailsRSV1MetaData::dataSize() const {
+ return _details->stats.datasize;
+}
+long long NamespaceDetailsRSV1MetaData::numRecords() const {
+ return _details->stats.nrecords;
+}
- void NamespaceDetailsRSV1MetaData::setStats( OperationContext* txn,
- long long dataSize,
- long long numRecords ) {
- NamespaceDetails::Stats* s = txn->recoveryUnit()->writing( &_details->stats );
- s->datasize = dataSize;
- s->nrecords = numRecords;
- }
+void NamespaceDetailsRSV1MetaData::incrementStats(OperationContext* txn,
+ long long dataSizeIncrement,
+ long long numRecordsIncrement) {
+ // durability todo : this could be a bit annoying / slow to record constantly
+ NamespaceDetails::Stats* s = txn->recoveryUnit()->writing(&_details->stats);
+ s->datasize += dataSizeIncrement;
+ s->nrecords += numRecordsIncrement;
+}
- DiskLoc NamespaceDetailsRSV1MetaData::deletedListEntry( int bucket ) const {
- invariant(bucket >= 0 && bucket < RecordStoreV1Base::Buckets);
- const DiskLoc head = (bucket < NamespaceDetails::SmallBuckets)
- ? _details->deletedListSmall[bucket]
- : _details->deletedListLarge[bucket - NamespaceDetails::SmallBuckets];
+void NamespaceDetailsRSV1MetaData::setStats(OperationContext* txn,
+ long long dataSize,
+ long long numRecords) {
+ NamespaceDetails::Stats* s = txn->recoveryUnit()->writing(&_details->stats);
+ s->datasize = dataSize;
+ s->nrecords = numRecords;
+}
- if (head == DiskLoc(0,0)) {
- // This will happen the first time we use a "large" bucket since they were previously
- // zero-initialized.
- return DiskLoc();
- }
+DiskLoc NamespaceDetailsRSV1MetaData::deletedListEntry(int bucket) const {
+ invariant(bucket >= 0 && bucket < RecordStoreV1Base::Buckets);
+ const DiskLoc head = (bucket < NamespaceDetails::SmallBuckets)
+ ? _details->deletedListSmall[bucket]
+ : _details->deletedListLarge[bucket - NamespaceDetails::SmallBuckets];
- return head;
+ if (head == DiskLoc(0, 0)) {
+ // This will happen the first time we use a "large" bucket since they were previously
+ // zero-initialized.
+ return DiskLoc();
}
- void NamespaceDetailsRSV1MetaData::setDeletedListEntry( OperationContext* txn,
- int bucket,
- const DiskLoc& loc ) {
- DiskLoc* head = (bucket < NamespaceDetails::SmallBuckets)
- ? &_details->deletedListSmall[bucket]
- : &_details->deletedListLarge[bucket - NamespaceDetails::SmallBuckets];
- *txn->recoveryUnit()->writing( head ) = loc;
- }
+ return head;
+}
- DiskLoc NamespaceDetailsRSV1MetaData::deletedListLegacyGrabBag() const {
- return _details->deletedListLegacyGrabBag;
- }
+void NamespaceDetailsRSV1MetaData::setDeletedListEntry(OperationContext* txn,
+ int bucket,
+ const DiskLoc& loc) {
+ DiskLoc* head = (bucket < NamespaceDetails::SmallBuckets)
+ ? &_details->deletedListSmall[bucket]
+ : &_details->deletedListLarge[bucket - NamespaceDetails::SmallBuckets];
+ *txn->recoveryUnit()->writing(head) = loc;
+}
- void NamespaceDetailsRSV1MetaData::setDeletedListLegacyGrabBag(OperationContext* txn,
- const DiskLoc& loc) {
- *txn->recoveryUnit()->writing(&_details->deletedListLegacyGrabBag) = loc;
- }
+DiskLoc NamespaceDetailsRSV1MetaData::deletedListLegacyGrabBag() const {
+ return _details->deletedListLegacyGrabBag;
+}
- void NamespaceDetailsRSV1MetaData::orphanDeletedList( OperationContext* txn ) {
- for( int i = 0; i < RecordStoreV1Base::Buckets; i++ ) {
- setDeletedListEntry( txn, i, DiskLoc() );
- }
- setDeletedListLegacyGrabBag(txn, DiskLoc());
- }
+void NamespaceDetailsRSV1MetaData::setDeletedListLegacyGrabBag(OperationContext* txn,
+ const DiskLoc& loc) {
+ *txn->recoveryUnit()->writing(&_details->deletedListLegacyGrabBag) = loc;
+}
- const DiskLoc& NamespaceDetailsRSV1MetaData::firstExtent( OperationContext* txn ) const {
- return _details->firstExtent;
+void NamespaceDetailsRSV1MetaData::orphanDeletedList(OperationContext* txn) {
+ for (int i = 0; i < RecordStoreV1Base::Buckets; i++) {
+ setDeletedListEntry(txn, i, DiskLoc());
}
+ setDeletedListLegacyGrabBag(txn, DiskLoc());
+}
- void NamespaceDetailsRSV1MetaData::setFirstExtent( OperationContext* txn, const DiskLoc& loc ) {
- *txn->recoveryUnit()->writing( &_details->firstExtent ) = loc;
- }
+const DiskLoc& NamespaceDetailsRSV1MetaData::firstExtent(OperationContext* txn) const {
+ return _details->firstExtent;
+}
- const DiskLoc& NamespaceDetailsRSV1MetaData::lastExtent( OperationContext* txn ) const {
- return _details->lastExtent;
- }
+void NamespaceDetailsRSV1MetaData::setFirstExtent(OperationContext* txn, const DiskLoc& loc) {
+ *txn->recoveryUnit()->writing(&_details->firstExtent) = loc;
+}
- void NamespaceDetailsRSV1MetaData::setLastExtent( OperationContext* txn, const DiskLoc& loc ) {
- *txn->recoveryUnit()->writing( &_details->lastExtent ) = loc;
- }
+const DiskLoc& NamespaceDetailsRSV1MetaData::lastExtent(OperationContext* txn) const {
+ return _details->lastExtent;
+}
- bool NamespaceDetailsRSV1MetaData::isCapped() const {
- return _details->isCapped;
- }
+void NamespaceDetailsRSV1MetaData::setLastExtent(OperationContext* txn, const DiskLoc& loc) {
+ *txn->recoveryUnit()->writing(&_details->lastExtent) = loc;
+}
- bool NamespaceDetailsRSV1MetaData::isUserFlagSet( int flag ) const {
- return _details->userFlags & flag;
- }
+bool NamespaceDetailsRSV1MetaData::isCapped() const {
+ return _details->isCapped;
+}
- int NamespaceDetailsRSV1MetaData::userFlags() const {
- return _details->userFlags;
- }
+bool NamespaceDetailsRSV1MetaData::isUserFlagSet(int flag) const {
+ return _details->userFlags & flag;
+}
- bool NamespaceDetailsRSV1MetaData::setUserFlag( OperationContext* txn, int flag ) {
- if ( ( _details->userFlags & flag ) == flag )
- return false;
+int NamespaceDetailsRSV1MetaData::userFlags() const {
+ return _details->userFlags;
+}
- txn->recoveryUnit()->writingInt( _details->userFlags) |= flag;
- return true;
- }
+bool NamespaceDetailsRSV1MetaData::setUserFlag(OperationContext* txn, int flag) {
+ if ((_details->userFlags & flag) == flag)
+ return false;
- bool NamespaceDetailsRSV1MetaData::clearUserFlag( OperationContext* txn, int flag ) {
- if ( ( _details->userFlags & flag ) == 0 )
- return false;
+ txn->recoveryUnit()->writingInt(_details->userFlags) |= flag;
+ return true;
+}
- txn->recoveryUnit()->writingInt(_details->userFlags) &= ~flag;
- return true;
- }
+bool NamespaceDetailsRSV1MetaData::clearUserFlag(OperationContext* txn, int flag) {
+ if ((_details->userFlags & flag) == 0)
+ return false;
- bool NamespaceDetailsRSV1MetaData::replaceUserFlags( OperationContext* txn, int flags ) {
- if ( _details->userFlags == flags )
- return false;
+ txn->recoveryUnit()->writingInt(_details->userFlags) &= ~flag;
+ return true;
+}
- txn->recoveryUnit()->writingInt(_details->userFlags) = flags;
- return true;
- }
+bool NamespaceDetailsRSV1MetaData::replaceUserFlags(OperationContext* txn, int flags) {
+ if (_details->userFlags == flags)
+ return false;
- int NamespaceDetailsRSV1MetaData::lastExtentSize( OperationContext* txn ) const {
- return _details->lastExtentSize;
- }
+ txn->recoveryUnit()->writingInt(_details->userFlags) = flags;
+ return true;
+}
- void NamespaceDetailsRSV1MetaData::setLastExtentSize( OperationContext* txn, int newMax ) {
- if ( _details->lastExtentSize == newMax )
- return;
- txn->recoveryUnit()->writingInt(_details->lastExtentSize) = newMax;
- }
+int NamespaceDetailsRSV1MetaData::lastExtentSize(OperationContext* txn) const {
+ return _details->lastExtentSize;
+}
- long long NamespaceDetailsRSV1MetaData::maxCappedDocs() const {
- invariant( _details->isCapped );
- if ( _details->maxDocsInCapped == 0x7fffffff )
- return numeric_limits<long long>::max();
- return _details->maxDocsInCapped;
- }
+void NamespaceDetailsRSV1MetaData::setLastExtentSize(OperationContext* txn, int newMax) {
+ if (_details->lastExtentSize == newMax)
+ return;
+ txn->recoveryUnit()->writingInt(_details->lastExtentSize) = newMax;
+}
+
+long long NamespaceDetailsRSV1MetaData::maxCappedDocs() const {
+ invariant(_details->isCapped);
+ if (_details->maxDocsInCapped == 0x7fffffff)
+ return numeric_limits<long long>::max();
+ return _details->maxDocsInCapped;
+}
}
diff --git a/src/mongo/db/storage/mmap_v1/catalog/namespace_details_rsv1_metadata.h b/src/mongo/db/storage/mmap_v1/catalog/namespace_details_rsv1_metadata.h
index 5bc9c475506..a6fde4807b5 100644
--- a/src/mongo/db/storage/mmap_v1/catalog/namespace_details_rsv1_metadata.h
+++ b/src/mongo/db/storage/mmap_v1/catalog/namespace_details_rsv1_metadata.h
@@ -38,70 +38,65 @@
namespace mongo {
- class RecordStore;
+class RecordStore;
- /*
- * NOTE: NamespaceDetails will become a struct
- * all dur, etc... will move here
- */
- class NamespaceDetailsRSV1MetaData : public RecordStoreV1MetaData {
- public:
- explicit NamespaceDetailsRSV1MetaData( StringData ns, NamespaceDetails* details);
-
- virtual ~NamespaceDetailsRSV1MetaData(){}
+/*
+ * NOTE: NamespaceDetails will become a struct
+ * all dur, etc... will move here
+ */
+class NamespaceDetailsRSV1MetaData : public RecordStoreV1MetaData {
+public:
+ explicit NamespaceDetailsRSV1MetaData(StringData ns, NamespaceDetails* details);
- virtual const DiskLoc& capExtent() const;
- virtual void setCapExtent( OperationContext* txn, const DiskLoc& loc );
+ virtual ~NamespaceDetailsRSV1MetaData() {}
- virtual const DiskLoc& capFirstNewRecord() const;
- virtual void setCapFirstNewRecord( OperationContext* txn, const DiskLoc& loc );
+ virtual const DiskLoc& capExtent() const;
+ virtual void setCapExtent(OperationContext* txn, const DiskLoc& loc);
- virtual bool capLooped() const;
+ virtual const DiskLoc& capFirstNewRecord() const;
+ virtual void setCapFirstNewRecord(OperationContext* txn, const DiskLoc& loc);
- virtual long long dataSize() const;
- virtual long long numRecords() const;
+ virtual bool capLooped() const;
- virtual void incrementStats( OperationContext* txn,
- long long dataSizeIncrement,
- long long numRecordsIncrement );
+ virtual long long dataSize() const;
+ virtual long long numRecords() const;
- virtual void setStats( OperationContext* txn,
- long long dataSize,
- long long numRecords );
+ virtual void incrementStats(OperationContext* txn,
+ long long dataSizeIncrement,
+ long long numRecordsIncrement);
- virtual DiskLoc deletedListEntry( int bucket ) const;
- virtual void setDeletedListEntry( OperationContext* txn,
- int bucket,
- const DiskLoc& loc );
+ virtual void setStats(OperationContext* txn, long long dataSize, long long numRecords);
- virtual DiskLoc deletedListLegacyGrabBag() const;
- virtual void setDeletedListLegacyGrabBag(OperationContext* txn, const DiskLoc& loc);
+ virtual DiskLoc deletedListEntry(int bucket) const;
+ virtual void setDeletedListEntry(OperationContext* txn, int bucket, const DiskLoc& loc);
- virtual void orphanDeletedList(OperationContext* txn);
+ virtual DiskLoc deletedListLegacyGrabBag() const;
+ virtual void setDeletedListLegacyGrabBag(OperationContext* txn, const DiskLoc& loc);
- virtual const DiskLoc& firstExtent( OperationContext* txn ) const;
- virtual void setFirstExtent( OperationContext* txn, const DiskLoc& loc );
+ virtual void orphanDeletedList(OperationContext* txn);
- virtual const DiskLoc& lastExtent( OperationContext* txn ) const;
- virtual void setLastExtent( OperationContext* txn, const DiskLoc& loc );
+ virtual const DiskLoc& firstExtent(OperationContext* txn) const;
+ virtual void setFirstExtent(OperationContext* txn, const DiskLoc& loc);
- virtual bool isCapped() const;
+ virtual const DiskLoc& lastExtent(OperationContext* txn) const;
+ virtual void setLastExtent(OperationContext* txn, const DiskLoc& loc);
- virtual bool isUserFlagSet( int flag ) const;
- virtual int userFlags() const;
- virtual bool setUserFlag( OperationContext* txn, int flag );
- virtual bool clearUserFlag( OperationContext* txn, int flag );
- virtual bool replaceUserFlags( OperationContext* txn, int flags );
+ virtual bool isCapped() const;
- virtual int lastExtentSize( OperationContext* txn ) const;
- virtual void setLastExtentSize( OperationContext* txn, int newMax );
+ virtual bool isUserFlagSet(int flag) const;
+ virtual int userFlags() const;
+ virtual bool setUserFlag(OperationContext* txn, int flag);
+ virtual bool clearUserFlag(OperationContext* txn, int flag);
+ virtual bool replaceUserFlags(OperationContext* txn, int flags);
- virtual long long maxCappedDocs() const;
+ virtual int lastExtentSize(OperationContext* txn) const;
+ virtual void setLastExtentSize(OperationContext* txn, int newMax);
- private:
- std::string _ns;
- NamespaceDetails* _details;
- RecordStore* _namespaceRecordStore;
- };
+ virtual long long maxCappedDocs() const;
+private:
+ std::string _ns;
+ NamespaceDetails* _details;
+ RecordStore* _namespaceRecordStore;
+};
}
diff --git a/src/mongo/db/storage/mmap_v1/catalog/namespace_index.cpp b/src/mongo/db/storage/mmap_v1/catalog/namespace_index.cpp
index 8f1bb505197..12e90d2db57 100644
--- a/src/mongo/db/storage/mmap_v1/catalog/namespace_index.cpp
+++ b/src/mongo/db/storage/mmap_v1/catalog/namespace_index.cpp
@@ -47,211 +47,194 @@
namespace mongo {
- using std::endl;
- using std::list;
- using std::string;
+using std::endl;
+using std::list;
+using std::string;
- NamespaceIndex::NamespaceIndex(const std::string& dir, const std::string& database)
- : _dir(dir),
- _database(database),
- _ht(nullptr) {
+NamespaceIndex::NamespaceIndex(const std::string& dir, const std::string& database)
+ : _dir(dir), _database(database), _ht(nullptr) {}
- }
-
- NamespaceIndex::~NamespaceIndex() {
-
- }
-
- NamespaceDetails* NamespaceIndex::details(StringData ns) const {
- const Namespace n(ns);
- return details(n);
- }
+NamespaceIndex::~NamespaceIndex() {}
- NamespaceDetails* NamespaceIndex::details(const Namespace& ns) const {
- return _ht->get(ns);
- }
-
- void NamespaceIndex::add_ns( OperationContext* txn,
- StringData ns, const DiskLoc& loc, bool capped) {
- NamespaceDetails details( loc, capped );
- add_ns( txn, ns, &details );
- }
+NamespaceDetails* NamespaceIndex::details(StringData ns) const {
+ const Namespace n(ns);
+ return details(n);
+}
- void NamespaceIndex::add_ns( OperationContext* txn,
- StringData ns,
- const NamespaceDetails* details ) {
- Namespace n(ns);
- add_ns( txn, n, details );
- }
+NamespaceDetails* NamespaceIndex::details(const Namespace& ns) const {
+ return _ht->get(ns);
+}
- void NamespaceIndex::add_ns( OperationContext* txn,
- const Namespace& ns,
- const NamespaceDetails* details ) {
- const NamespaceString nss(ns.toString());
- invariant(txn->lockState()->isDbLockedForMode(nss.db(), MODE_X));
+void NamespaceIndex::add_ns(OperationContext* txn, StringData ns, const DiskLoc& loc, bool capped) {
+ NamespaceDetails details(loc, capped);
+ add_ns(txn, ns, &details);
+}
- massert(17315, "no . in ns", nsIsFull(nss.toString()));
+void NamespaceIndex::add_ns(OperationContext* txn, StringData ns, const NamespaceDetails* details) {
+ Namespace n(ns);
+ add_ns(txn, n, details);
+}
- uassert(10081, "too many namespaces/collections", _ht->put(txn, ns, *details));
- }
+void NamespaceIndex::add_ns(OperationContext* txn,
+ const Namespace& ns,
+ const NamespaceDetails* details) {
+ const NamespaceString nss(ns.toString());
+ invariant(txn->lockState()->isDbLockedForMode(nss.db(), MODE_X));
- void NamespaceIndex::kill_ns( OperationContext* txn, StringData ns) {
- const NamespaceString nss(ns.toString());
- invariant(txn->lockState()->isDbLockedForMode(nss.db(), MODE_X));
+ massert(17315, "no . in ns", nsIsFull(nss.toString()));
- const Namespace n(ns);
- _ht->kill(txn, n);
+ uassert(10081, "too many namespaces/collections", _ht->put(txn, ns, *details));
+}
- if (ns.size() <= Namespace::MaxNsColletionLen) {
- // Larger namespace names don't have room for $extras so they can't exist. The code
- // below would cause an "$extra: ns too large" error and stacktrace to be printed to the
- // log even though everything is fine.
- for( int i = 0; i<=1; i++ ) {
- try {
- Namespace extra(n.extraName(i));
- _ht->kill(txn, extra);
- }
- catch(DBException&) {
- LOG(3) << "caught exception in kill_ns" << endl;
- }
+void NamespaceIndex::kill_ns(OperationContext* txn, StringData ns) {
+ const NamespaceString nss(ns.toString());
+ invariant(txn->lockState()->isDbLockedForMode(nss.db(), MODE_X));
+
+ const Namespace n(ns);
+ _ht->kill(txn, n);
+
+ if (ns.size() <= Namespace::MaxNsColletionLen) {
+ // Larger namespace names don't have room for $extras so they can't exist. The code
+ // below would cause an "$extra: ns too large" error and stacktrace to be printed to the
+ // log even though everything is fine.
+ for (int i = 0; i <= 1; i++) {
+ try {
+ Namespace extra(n.extraName(i));
+ _ht->kill(txn, extra);
+ } catch (DBException&) {
+ LOG(3) << "caught exception in kill_ns" << endl;
}
}
}
+}
- bool NamespaceIndex::pathExists() const {
- return boost::filesystem::exists(path());
- }
-
- boost::filesystem::path NamespaceIndex::path() const {
- boost::filesystem::path ret( _dir );
- if (storageGlobalParams.directoryperdb)
- ret /= _database;
- ret /= ( _database + ".ns" );
- return ret;
- }
+bool NamespaceIndex::pathExists() const {
+ return boost::filesystem::exists(path());
+}
- static void namespaceGetNamespacesCallback( const Namespace& k , NamespaceDetails& v , list<string>* l ) {
- if ( ! k.hasDollarSign() || k == "local.oplog.$main" ) {
- // we call out local.oplog.$main specifically as its the only "normal"
- // collection that has a $, so we make sure it gets added
- l->push_back( k.toString() );
- }
- }
+boost::filesystem::path NamespaceIndex::path() const {
+ boost::filesystem::path ret(_dir);
+ if (storageGlobalParams.directoryperdb)
+ ret /= _database;
+ ret /= (_database + ".ns");
+ return ret;
+}
- void NamespaceIndex::getCollectionNamespaces( list<string>* tofill ) const {
- _ht->iterAll(stdx::bind(namespaceGetNamespacesCallback,
- stdx::placeholders::_1,
- stdx::placeholders::_2,
- tofill));
+static void namespaceGetNamespacesCallback(const Namespace& k,
+ NamespaceDetails& v,
+ list<string>* l) {
+ if (!k.hasDollarSign() || k == "local.oplog.$main") {
+ // we call out local.oplog.$main specifically as its the only "normal"
+ // collection that has a $, so we make sure it gets added
+ l->push_back(k.toString());
}
+}
- void NamespaceIndex::maybeMkdir() const {
- if (!storageGlobalParams.directoryperdb)
- return;
- boost::filesystem::path dir( _dir );
- dir /= _database;
- if ( !boost::filesystem::exists( dir ) )
- MONGO_ASSERT_ON_EXCEPTION_WITH_MSG( boost::filesystem::create_directory( dir ), "create dir for db " );
- }
+void NamespaceIndex::getCollectionNamespaces(list<string>* tofill) const {
+ _ht->iterAll(stdx::bind(
+ namespaceGetNamespacesCallback, stdx::placeholders::_1, stdx::placeholders::_2, tofill));
+}
- void NamespaceIndex::init(OperationContext* txn) {
- invariant(!_ht.get());
+void NamespaceIndex::maybeMkdir() const {
+ if (!storageGlobalParams.directoryperdb)
+ return;
+ boost::filesystem::path dir(_dir);
+ dir /= _database;
+ if (!boost::filesystem::exists(dir))
+ MONGO_ASSERT_ON_EXCEPTION_WITH_MSG(boost::filesystem::create_directory(dir),
+ "create dir for db ");
+}
- unsigned long long len = 0;
+void NamespaceIndex::init(OperationContext* txn) {
+ invariant(!_ht.get());
- const boost::filesystem::path nsPath = path();
- const std::string pathString = nsPath.string();
+ unsigned long long len = 0;
- void* p = 0;
+ const boost::filesystem::path nsPath = path();
+ const std::string pathString = nsPath.string();
- if (boost::filesystem::exists(nsPath)) {
- if (_f.open(pathString, true)) {
- len = _f.length();
+ void* p = 0;
- if (len % (1024 * 1024) != 0) {
- StringBuilder sb;
- sb << "Invalid length: " << len
- << " for .ns file: " << pathString << ". Cannot open database";
+ if (boost::filesystem::exists(nsPath)) {
+ if (_f.open(pathString, true)) {
+ len = _f.length();
- log() << sb.str();
- uassert(10079, sb.str(), len % (1024 * 1024) == 0);
- }
+ if (len % (1024 * 1024) != 0) {
+ StringBuilder sb;
+ sb << "Invalid length: " << len << " for .ns file: " << pathString
+ << ". Cannot open database";
- p = _f.getView();
+ log() << sb.str();
+ uassert(10079, sb.str(), len % (1024 * 1024) == 0);
}
+
+ p = _f.getView();
}
- else {
- // use mmapv1GlobalOptions.lenForNewNsFiles, we are making a new database
- massert(10343,
- "bad mmapv1GlobalOptions.lenForNewNsFiles",
- mmapv1GlobalOptions.lenForNewNsFiles >= 1024*1024);
+ } else {
+ // use mmapv1GlobalOptions.lenForNewNsFiles, we are making a new database
+ massert(10343,
+ "bad mmapv1GlobalOptions.lenForNewNsFiles",
+ mmapv1GlobalOptions.lenForNewNsFiles >= 1024 * 1024);
- maybeMkdir();
+ maybeMkdir();
- unsigned long long l = mmapv1GlobalOptions.lenForNewNsFiles;
- log() << "allocating new ns file " << pathString << ", filling with zeroes..." << endl;
+ unsigned long long l = mmapv1GlobalOptions.lenForNewNsFiles;
+ log() << "allocating new ns file " << pathString << ", filling with zeroes..." << endl;
- {
- // Due to SERVER-15369 we need to explicitly write zero-bytes to the NS file.
- const unsigned long long kBlockSize = 1024*1024;
- invariant(l % kBlockSize == 0); // ns files can only be multiples of 1MB
- const std::vector<char> zeros(kBlockSize, 0);
+ {
+ // Due to SERVER-15369 we need to explicitly write zero-bytes to the NS file.
+ const unsigned long long kBlockSize = 1024 * 1024;
+ invariant(l % kBlockSize == 0); // ns files can only be multiples of 1MB
+ const std::vector<char> zeros(kBlockSize, 0);
- File file;
- file.open(pathString.c_str());
+ File file;
+ file.open(pathString.c_str());
- massert(18825,
- str::stream() << "couldn't create file " << pathString,
- file.is_open());
+ massert(18825, str::stream() << "couldn't create file " << pathString, file.is_open());
- for (fileofs ofs = 0; ofs < l && !file.bad(); ofs += kBlockSize) {
- file.write(ofs, &zeros[0], kBlockSize);
- }
+ for (fileofs ofs = 0; ofs < l && !file.bad(); ofs += kBlockSize) {
+ file.write(ofs, &zeros[0], kBlockSize);
+ }
- if (file.bad()) {
- try {
- boost::filesystem::remove(pathString);
- } catch (const std::exception& e) {
- StringBuilder ss;
- ss << "error removing file: " << e.what();
- massert(18909, ss.str(), 0);
- }
- }
- else {
- file.fsync();
+ if (file.bad()) {
+ try {
+ boost::filesystem::remove(pathString);
+ } catch (const std::exception& e) {
+ StringBuilder ss;
+ ss << "error removing file: " << e.what();
+ massert(18909, ss.str(), 0);
}
-
- massert(18826,
- str::stream() << "failure writing file " << pathString,
- !file.bad());
+ } else {
+ file.fsync();
}
- if (_f.create(pathString, l, true)) {
- // The writes done in this function must not be rolled back. This will leave the
- // file empty, but available for future use. That is why we go directly to the
- // global dur dirty list rather than going through the OperationContext.
- getDur().createdFile(pathString, l);
+ massert(18826, str::stream() << "failure writing file " << pathString, !file.bad());
+ }
- // Commit the journal and all changes to disk so that even if exceptions occur
- // during subsequent initialization, we won't have uncommited changes during file
- // close.
- getDur().commitNow(txn);
+ if (_f.create(pathString, l, true)) {
+ // The writes done in this function must not be rolled back. This will leave the
+ // file empty, but available for future use. That is why we go directly to the
+ // global dur dirty list rather than going through the OperationContext.
+ getDur().createdFile(pathString, l);
- len = l;
- invariant(len == mmapv1GlobalOptions.lenForNewNsFiles);
+ // Commit the journal and all changes to disk so that even if exceptions occur
+ // during subsequent initialization, we won't have uncommited changes during file
+ // close.
+ getDur().commitNow(txn);
- p = _f.getView();
- }
- }
+ len = l;
+ invariant(len == mmapv1GlobalOptions.lenForNewNsFiles);
- if (p == 0) {
- severe() << "error couldn't open file " << pathString << " terminating" << endl;
- invariant(false);
+ p = _f.getView();
}
+ }
- invariant(len <= 0x7fffffff);
- _ht.reset(new NamespaceHashTable(p, (int) len, "namespace index"));
+ if (p == 0) {
+ severe() << "error couldn't open file " << pathString << " terminating" << endl;
+ invariant(false);
}
+ invariant(len <= 0x7fffffff);
+ _ht.reset(new NamespaceHashTable(p, (int)len, "namespace index"));
+}
}
-
diff --git a/src/mongo/db/storage/mmap_v1/catalog/namespace_index.h b/src/mongo/db/storage/mmap_v1/catalog/namespace_index.h
index 44f429311ba..53d162bc601 100644
--- a/src/mongo/db/storage/mmap_v1/catalog/namespace_index.h
+++ b/src/mongo/db/storage/mmap_v1/catalog/namespace_index.h
@@ -40,53 +40,53 @@
namespace mongo {
- class NamespaceDetails;
- class NamespaceHashTable;
- class OperationContext;
+class NamespaceDetails;
+class NamespaceHashTable;
+class OperationContext;
- /* NamespaceIndex is the ".ns" file you see in the data directory. It is the "system catalog"
- if you will: at least the core parts. (Additional info in system.* collections.)
- */
- class NamespaceIndex {
- MONGO_DISALLOW_COPYING(NamespaceIndex);
- public:
- NamespaceIndex(const std::string& dir, const std::string& database);
- ~NamespaceIndex();
+/* NamespaceIndex is the ".ns" file you see in the data directory. It is the "system catalog"
+ if you will: at least the core parts. (Additional info in system.* collections.)
+*/
+class NamespaceIndex {
+ MONGO_DISALLOW_COPYING(NamespaceIndex);
- /* returns true if the file represented by this file exists on disk */
- bool pathExists() const;
+public:
+ NamespaceIndex(const std::string& dir, const std::string& database);
+ ~NamespaceIndex();
- void init(OperationContext* txn);
+ /* returns true if the file represented by this file exists on disk */
+ bool pathExists() const;
- void add_ns( OperationContext* txn,
- StringData ns, const DiskLoc& loc, bool capped);
- void add_ns( OperationContext* txn,
- StringData ns, const NamespaceDetails* details );
- void add_ns( OperationContext* txn,
- const Namespace& ns, const NamespaceDetails* details );
+ void init(OperationContext* txn);
- NamespaceDetails* details(StringData ns) const;
- NamespaceDetails* details(const Namespace& ns) const;
+ void add_ns(OperationContext* txn, StringData ns, const DiskLoc& loc, bool capped);
+ void add_ns(OperationContext* txn, StringData ns, const NamespaceDetails* details);
+ void add_ns(OperationContext* txn, const Namespace& ns, const NamespaceDetails* details);
- void kill_ns( OperationContext* txn,
- StringData ns);
+ NamespaceDetails* details(StringData ns) const;
+ NamespaceDetails* details(const Namespace& ns) const;
- bool allocated() const { return _ht.get() != 0; }
+ void kill_ns(OperationContext* txn, StringData ns);
- void getCollectionNamespaces( std::list<std::string>* tofill ) const;
+ bool allocated() const {
+ return _ht.get() != 0;
+ }
- boost::filesystem::path path() const;
+ void getCollectionNamespaces(std::list<std::string>* tofill) const;
- unsigned long long fileLength() const { return _f.length(); }
+ boost::filesystem::path path() const;
- private:
- void maybeMkdir() const;
+ unsigned long long fileLength() const {
+ return _f.length();
+ }
- const std::string _dir;
- const std::string _database;
+private:
+ void maybeMkdir() const;
- DurableMappedFile _f;
- std::unique_ptr<NamespaceHashTable> _ht;
- };
+ const std::string _dir;
+ const std::string _database;
+ DurableMappedFile _f;
+ std::unique_ptr<NamespaceHashTable> _ht;
+};
}
diff --git a/src/mongo/db/storage/mmap_v1/catalog/namespace_test.cpp b/src/mongo/db/storage/mmap_v1/catalog/namespace_test.cpp
index 6a0edb79ea4..85cd79be43b 100644
--- a/src/mongo/db/storage/mmap_v1/catalog/namespace_test.cpp
+++ b/src/mongo/db/storage/mmap_v1/catalog/namespace_test.cpp
@@ -34,36 +34,35 @@
namespace mongo {
- using std::string;
+using std::string;
- TEST( NamespaceTest, Basics ) {
- Namespace foo( "foo.bar" );
- Namespace bar( "bar.foo" );
+TEST(NamespaceTest, Basics) {
+ Namespace foo("foo.bar");
+ Namespace bar("bar.foo");
- ASSERT_EQUALS( foo.toString(), foo.toString() );
- ASSERT_EQUALS( foo.hash(), foo.hash() );
+ ASSERT_EQUALS(foo.toString(), foo.toString());
+ ASSERT_EQUALS(foo.hash(), foo.hash());
- ASSERT_NOT_EQUALS( foo.hash(), bar.hash() );
+ ASSERT_NOT_EQUALS(foo.hash(), bar.hash());
- ASSERT( foo == foo );
- ASSERT( !( foo != foo ) );
- ASSERT( foo != bar );
- ASSERT( !( foo == bar ) );
- }
-
- TEST( NamespaceTest, ExtraName ) {
- Namespace foo( "foo.bar" );
- ASSERT_FALSE( foo.isExtra() );
+ ASSERT(foo == foo);
+ ASSERT(!(foo != foo));
+ ASSERT(foo != bar);
+ ASSERT(!(foo == bar));
+}
- string str0 = foo.extraName( 0 );
- ASSERT_EQUALS( "foo.bar$extra", str0 );
- Namespace ex0( str0 );
- ASSERT_TRUE( ex0.isExtra() );
+TEST(NamespaceTest, ExtraName) {
+ Namespace foo("foo.bar");
+ ASSERT_FALSE(foo.isExtra());
- string str1 = foo.extraName( 1 );
- ASSERT_EQUALS( "foo.bar$extrb", str1 );
- Namespace ex1( str1 );
- ASSERT_TRUE( ex1.isExtra() );
+ string str0 = foo.extraName(0);
+ ASSERT_EQUALS("foo.bar$extra", str0);
+ Namespace ex0(str0);
+ ASSERT_TRUE(ex0.isExtra());
- }
+ string str1 = foo.extraName(1);
+ ASSERT_EQUALS("foo.bar$extrb", str1);
+ Namespace ex1(str1);
+ ASSERT_TRUE(ex1.isExtra());
+}
}
diff --git a/src/mongo/db/storage/mmap_v1/compress.cpp b/src/mongo/db/storage/mmap_v1/compress.cpp
index bae8bc5acba..8f8dce527ed 100644
--- a/src/mongo/db/storage/mmap_v1/compress.cpp
+++ b/src/mongo/db/storage/mmap_v1/compress.cpp
@@ -36,24 +36,22 @@
namespace mongo {
- void rawCompress(const char* input,
- size_t input_length,
- char* compressed,
- size_t* compressed_length)
- {
- snappy::RawCompress(input, input_length, compressed, compressed_length);
- }
-
- size_t maxCompressedLength(size_t source_len) {
- return snappy::MaxCompressedLength(source_len);
- }
-
- size_t compress(const char* input, size_t input_length, std::string* output) {
- return snappy::Compress(input, input_length, output);
- }
-
- bool uncompress(const char* compressed, size_t compressed_length, std::string* uncompressed) {
- return snappy::Uncompress(compressed, compressed_length, uncompressed);
- }
+void rawCompress(const char* input,
+ size_t input_length,
+ char* compressed,
+ size_t* compressed_length) {
+ snappy::RawCompress(input, input_length, compressed, compressed_length);
+}
+
+size_t maxCompressedLength(size_t source_len) {
+ return snappy::MaxCompressedLength(source_len);
+}
+size_t compress(const char* input, size_t input_length, std::string* output) {
+ return snappy::Compress(input, input_length, output);
+}
+
+bool uncompress(const char* compressed, size_t compressed_length, std::string* uncompressed) {
+ return snappy::Uncompress(compressed, compressed_length, uncompressed);
+}
}
diff --git a/src/mongo/db/storage/mmap_v1/compress.h b/src/mongo/db/storage/mmap_v1/compress.h
index b8afa4d90c5..8ff828a93a6 100644
--- a/src/mongo/db/storage/mmap_v1/compress.h
+++ b/src/mongo/db/storage/mmap_v1/compress.h
@@ -32,18 +32,15 @@
#include <string>
-namespace mongo {
+namespace mongo {
- size_t compress(const char* input, size_t input_length, std::string* output);
+size_t compress(const char* input, size_t input_length, std::string* output);
- bool uncompress(const char* compressed, size_t compressed_length, std::string* uncompressed);
-
- size_t maxCompressedLength(size_t source_len);
- void rawCompress(const char* input,
- size_t input_length,
- char* compressed,
- size_t* compressed_length);
+bool uncompress(const char* compressed, size_t compressed_length, std::string* uncompressed);
+size_t maxCompressedLength(size_t source_len);
+void rawCompress(const char* input,
+ size_t input_length,
+ char* compressed,
+ size_t* compressed_length);
}
-
-
diff --git a/src/mongo/db/storage/mmap_v1/data_file.cpp b/src/mongo/db/storage/mmap_v1/data_file.cpp
index 15fbaba024d..90f6b71b7c6 100644
--- a/src/mongo/db/storage/mmap_v1/data_file.cpp
+++ b/src/mongo/db/storage/mmap_v1/data_file.cpp
@@ -47,216 +47,201 @@
namespace mongo {
- using std::endl;
+using std::endl;
namespace {
- void data_file_check(void *_mb) {
- if (sizeof(char *) == 4) {
- uassert(10084,
- "can't map file memory - mongo requires 64 bit build for larger datasets",
- _mb != NULL);
- }
- else {
- uassert(10085, "can't map file memory", _mb != NULL);
- }
+void data_file_check(void* _mb) {
+ if (sizeof(char*) == 4) {
+ uassert(10084,
+ "can't map file memory - mongo requires 64 bit build for larger datasets",
+ _mb != NULL);
+ } else {
+ uassert(10085, "can't map file memory", _mb != NULL);
}
+}
-} // namespace
+} // namespace
- BOOST_STATIC_ASSERT(DataFileHeader::HeaderSize == 8192);
- BOOST_STATIC_ASSERT(sizeof(static_cast<DataFileHeader*>(NULL)->data) == 4);
- BOOST_STATIC_ASSERT(
- sizeof(DataFileHeader) - sizeof(static_cast<DataFileHeader*>(NULL)->data)
- == DataFileHeader::HeaderSize);
+BOOST_STATIC_ASSERT(DataFileHeader::HeaderSize == 8192);
+BOOST_STATIC_ASSERT(sizeof(static_cast<DataFileHeader*>(NULL)->data) == 4);
+BOOST_STATIC_ASSERT(sizeof(DataFileHeader) - sizeof(static_cast<DataFileHeader*>(NULL)->data) ==
+ DataFileHeader::HeaderSize);
- int DataFile::maxSize() {
- if ( sizeof( int* ) == 4 ) {
- return 512 * 1024 * 1024;
- }
- else if (mmapv1GlobalOptions.smallfiles) {
- return 0x7ff00000 >> 2;
- }
- else {
- return 0x7ff00000;
- }
+int DataFile::maxSize() {
+ if (sizeof(int*) == 4) {
+ return 512 * 1024 * 1024;
+ } else if (mmapv1GlobalOptions.smallfiles) {
+ return 0x7ff00000 >> 2;
+ } else {
+ return 0x7ff00000;
}
+}
+
+NOINLINE_DECL void DataFile::badOfs(int ofs) const {
+ msgasserted(13440,
+ str::stream() << "bad offset:" << ofs << " accessing file: " << mmf.filename()
+ << ". See http://dochub.mongodb.org/core/data-recovery");
+}
- NOINLINE_DECL void DataFile::badOfs(int ofs) const {
- msgasserted(13440, str::stream() << "bad offset:" << ofs
- << " accessing file: " << mmf.filename()
- << ". See http://dochub.mongodb.org/core/data-recovery");
+int DataFile::_defaultSize() const {
+ int size;
+
+ if (_fileNo <= 4) {
+ size = (64 * 1024 * 1024) << _fileNo;
+ } else {
+ size = 0x7ff00000;
}
- int DataFile::_defaultSize() const {
- int size;
+ if (mmapv1GlobalOptions.smallfiles) {
+ size = size >> 2;
+ }
- if (_fileNo <= 4) {
- size = (64 * 1024 * 1024) << _fileNo;
- }
- else {
- size = 0x7ff00000;
- }
+ return size;
+}
- if (mmapv1GlobalOptions.smallfiles) {
- size = size >> 2;
- }
+/** @return true if found and opened. if uninitialized (prealloc only) does not open. */
+Status DataFile::openExisting(const char* filename) {
+ invariant(_mb == 0);
- return size;
+ if (!boost::filesystem::exists(filename)) {
+ return Status(ErrorCodes::InvalidPath, "DataFile::openExisting - file does not exist");
}
- /** @return true if found and opened. if uninitialized (prealloc only) does not open. */
- Status DataFile::openExisting(const char *filename) {
- invariant(_mb == 0);
-
- if (!boost::filesystem::exists(filename)) {
- return Status(ErrorCodes::InvalidPath, "DataFile::openExisting - file does not exist");
- }
+ if (!mmf.open(filename, false)) {
+ return Status(ErrorCodes::InternalError, "DataFile::openExisting - mmf.open failed");
+ }
- if (!mmf.open(filename, false)) {
- return Status(ErrorCodes::InternalError, "DataFile::openExisting - mmf.open failed");
- }
+ // The mapped view of the file should never be NULL if the open call above succeeded.
+ _mb = mmf.getView();
+ invariant(_mb);
- // The mapped view of the file should never be NULL if the open call above succeeded.
- _mb = mmf.getView();
- invariant(_mb);
+ const uint64_t sz = mmf.length();
+ invariant(sz <= 0x7fffffff);
+ invariant(sz % 4096 == 0);
- const uint64_t sz = mmf.length();
- invariant(sz <= 0x7fffffff);
- invariant(sz % 4096 == 0);
-
- if (sz < 64*1024*1024 && !mmapv1GlobalOptions.smallfiles) {
- if( sz >= 16*1024*1024 && sz % (1024*1024) == 0 ) {
- log() << "info openExisting file size " << sz
- << " but mmapv1GlobalOptions.smallfiles=false: "
- << filename << endl;
- }
- else {
- log() << "openExisting size " << sz << " less than minimum file size expectation "
- << filename << endl;
- verify(false);
- }
+ if (sz < 64 * 1024 * 1024 && !mmapv1GlobalOptions.smallfiles) {
+ if (sz >= 16 * 1024 * 1024 && sz % (1024 * 1024) == 0) {
+ log() << "info openExisting file size " << sz
+ << " but mmapv1GlobalOptions.smallfiles=false: " << filename << endl;
+ } else {
+ log() << "openExisting size " << sz << " less than minimum file size expectation "
+ << filename << endl;
+ verify(false);
}
-
- data_file_check(_mb);
- return Status::OK();
}
- void DataFile::open( OperationContext* txn,
- const char *filename,
- int minSize,
- bool preallocateOnly ) {
-
- long size = _defaultSize();
-
- while (size < minSize) {
- if (size < maxSize() / 2) {
- size *= 2;
- }
- else {
- size = maxSize();
- break;
- }
- }
+ data_file_check(_mb);
+ return Status::OK();
+}
+
+void DataFile::open(OperationContext* txn,
+ const char* filename,
+ int minSize,
+ bool preallocateOnly) {
+ long size = _defaultSize();
- if (size > maxSize()) {
+ while (size < minSize) {
+ if (size < maxSize() / 2) {
+ size *= 2;
+ } else {
size = maxSize();
+ break;
}
+ }
- invariant(size >= 64 * 1024 * 1024 || mmapv1GlobalOptions.smallfiles);
- invariant( size % 4096 == 0 );
+ if (size > maxSize()) {
+ size = maxSize();
+ }
- if ( preallocateOnly ) {
- if (mmapv1GlobalOptions.prealloc) {
- FileAllocator::get()->requestAllocation( filename, size );
- }
- return;
- }
+ invariant(size >= 64 * 1024 * 1024 || mmapv1GlobalOptions.smallfiles);
+ invariant(size % 4096 == 0);
- {
- invariant(_mb == 0);
- unsigned long long sz = size;
- if (mmf.create(filename, sz, false)) {
- _mb = mmf.getView();
- }
+ if (preallocateOnly) {
+ if (mmapv1GlobalOptions.prealloc) {
+ FileAllocator::get()->requestAllocation(filename, size);
+ }
+ return;
+ }
- invariant(sz <= 0x7fffffff);
- size = (int)sz;
+ {
+ invariant(_mb == 0);
+ unsigned long long sz = size;
+ if (mmf.create(filename, sz, false)) {
+ _mb = mmf.getView();
}
- data_file_check(_mb);
- header()->init(txn, _fileNo, size, filename);
+ invariant(sz <= 0x7fffffff);
+ size = (int)sz;
}
- void DataFile::flush( bool sync ) {
- mmf.flush( sync );
- }
+ data_file_check(_mb);
+ header()->init(txn, _fileNo, size, filename);
+}
- DiskLoc DataFile::allocExtentArea( OperationContext* txn, int size ) {
- // The header would be NULL if file open failed. However, if file open failed we should
- // never be entering here.
- invariant(header());
- invariant(size <= header()->unusedLength);
+void DataFile::flush(bool sync) {
+ mmf.flush(sync);
+}
- int offset = header()->unused.getOfs();
+DiskLoc DataFile::allocExtentArea(OperationContext* txn, int size) {
+ // The header would be NULL if file open failed. However, if file open failed we should
+ // never be entering here.
+ invariant(header());
+ invariant(size <= header()->unusedLength);
- DataFileHeader *h = header();
- *txn->recoveryUnit()->writing(&h->unused) = DiskLoc(_fileNo, offset + size);
- txn->recoveryUnit()->writingInt(h->unusedLength) = h->unusedLength - size;
+ int offset = header()->unused.getOfs();
- return DiskLoc(_fileNo, offset);
- }
+ DataFileHeader* h = header();
+ *txn->recoveryUnit()->writing(&h->unused) = DiskLoc(_fileNo, offset + size);
+ txn->recoveryUnit()->writingInt(h->unusedLength) = h->unusedLength - size;
- // -------------------------------------------------------------------------------
-
- void DataFileHeader::init(OperationContext* txn,
- int fileno,
- int filelength,
- const char* filename) {
-
- if (uninitialized()) {
- DEV log() << "datafileheader::init initializing " << filename << " n:" << fileno << endl;
-
- massert(13640,
- str::stream() << "DataFileHeader looks corrupt at file open filelength:"
- << filelength << " fileno:" << fileno,
- filelength > 32768);
-
- // The writes done in this function must not be rolled back. If the containing
- // UnitOfWork rolls back it should roll back to the state *after* these writes. This
- // will leave the file empty, but available for future use. That is why we go directly
- // to the global dur dirty list rather than going through the RecoveryUnit.
- getDur().createdFile(filename, filelength);
-
- typedef std::pair<void*, unsigned> Intent;
- std::vector<Intent> intent;
- intent.push_back(std::make_pair(this, sizeof(DataFileHeader)));
- privateViews.makeWritable(this, sizeof(DataFileHeader));
- getDur().declareWriteIntents(intent);
-
- fileLength = filelength;
- version = DataFileVersion::defaultForNewFiles();
- unused.set(fileno, HeaderSize);
- unusedLength = fileLength - HeaderSize - 16;
- freeListStart.Null();
- freeListEnd.Null();
- }
- else {
- checkUpgrade(txn);
- }
- }
+ return DiskLoc(_fileNo, offset);
+}
- void DataFileHeader::checkUpgrade(OperationContext* txn) {
- if ( freeListStart == DiskLoc(0, 0) ) {
- // we are upgrading from 2.4 to 2.6
- invariant(freeListEnd == DiskLoc(0, 0)); // both start and end should be (0,0) or real
- WriteUnitOfWork wunit(txn);
- *txn->recoveryUnit()->writing( &freeListStart ) = DiskLoc();
- *txn->recoveryUnit()->writing( &freeListEnd ) = DiskLoc();
- wunit.commit();
- }
+// -------------------------------------------------------------------------------
+
+void DataFileHeader::init(OperationContext* txn, int fileno, int filelength, const char* filename) {
+ if (uninitialized()) {
+ DEV log() << "datafileheader::init initializing " << filename << " n:" << fileno << endl;
+
+ massert(13640,
+ str::stream() << "DataFileHeader looks corrupt at file open filelength:"
+ << filelength << " fileno:" << fileno,
+ filelength > 32768);
+
+ // The writes done in this function must not be rolled back. If the containing
+ // UnitOfWork rolls back it should roll back to the state *after* these writes. This
+ // will leave the file empty, but available for future use. That is why we go directly
+ // to the global dur dirty list rather than going through the RecoveryUnit.
+ getDur().createdFile(filename, filelength);
+
+ typedef std::pair<void*, unsigned> Intent;
+ std::vector<Intent> intent;
+ intent.push_back(std::make_pair(this, sizeof(DataFileHeader)));
+ privateViews.makeWritable(this, sizeof(DataFileHeader));
+ getDur().declareWriteIntents(intent);
+
+ fileLength = filelength;
+ version = DataFileVersion::defaultForNewFiles();
+ unused.set(fileno, HeaderSize);
+ unusedLength = fileLength - HeaderSize - 16;
+ freeListStart.Null();
+ freeListEnd.Null();
+ } else {
+ checkUpgrade(txn);
}
+}
+void DataFileHeader::checkUpgrade(OperationContext* txn) {
+ if (freeListStart == DiskLoc(0, 0)) {
+ // we are upgrading from 2.4 to 2.6
+ invariant(freeListEnd == DiskLoc(0, 0)); // both start and end should be (0,0) or real
+ WriteUnitOfWork wunit(txn);
+ *txn->recoveryUnit()->writing(&freeListStart) = DiskLoc();
+ *txn->recoveryUnit()->writing(&freeListEnd) = DiskLoc();
+ wunit.commit();
+ }
+}
}
diff --git a/src/mongo/db/storage/mmap_v1/data_file.h b/src/mongo/db/storage/mmap_v1/data_file.h
index 6eddb092478..ed6e08e7931 100644
--- a/src/mongo/db/storage/mmap_v1/data_file.h
+++ b/src/mongo/db/storage/mmap_v1/data_file.h
@@ -35,158 +35,181 @@
namespace mongo {
- class OperationContext;
+class OperationContext;
#pragma pack(1)
- class DataFileVersion {
- public:
- DataFileVersion(uint32_t major, uint32_t minor) :_major(major), _minor(minor) {}
-
- static DataFileVersion defaultForNewFiles() {
- return DataFileVersion(kCurrentMajor, kIndexes24AndNewer
- | kMayHave28Freelist
- );
- }
-
- bool isCompatibleWithCurrentCode() const {
- if (_major != kCurrentMajor)
- return false;
-
- if (_minor & ~kUsedMinorFlagsMask)
- return false;
-
- const uint32_t indexCleanliness = _minor & kIndexPluginMask;
- if (indexCleanliness != kIndexes24AndNewer && indexCleanliness != kIndexes22AndOlder)
- return false;
-
- // We are compatible with either setting of kMayHave28Freelist.
-
- return true;
- }
-
- bool is24IndexClean() const { return (_minor & kIndexPluginMask) == kIndexes24AndNewer; }
- void setIs24IndexClean() { _minor = ((_minor & ~kIndexPluginMask) | kIndexes24AndNewer); }
-
- bool mayHave28Freelist() const { return _minor & kMayHave28Freelist; }
- void setMayHave28Freelist() { _minor |= kMayHave28Freelist; }
-
- uint32_t majorRaw() const { return _major; }
- uint32_t minorRaw() const { return _minor; }
-
- private:
- static const uint32_t kCurrentMajor = 4;
-
- // minor layout:
- // first 4 bits - index plugin cleanliness.
- // see IndexCatalog::_upgradeDatabaseMinorVersionIfNeeded for details
- // 5th bit - 1 if started with 3.0-style freelist implementation (SERVER-14081)
- // 6th through 31st bit - reserved and must be set to 0.
- static const uint32_t kIndexPluginMask = 0xf;
- static const uint32_t kIndexes22AndOlder = 5;
- static const uint32_t kIndexes24AndNewer = 6;
-
- static const uint32_t kMayHave28Freelist = (1 << 4);
-
- // All set bits we know about are covered by this mask.
- static const uint32_t kUsedMinorFlagsMask = 0x1f;
-
- uint32_t _major;
- uint32_t _minor;
- };
-
- // Note: Intentionally not defining relational operators for DataFileVersion as there is no
- // total ordering of all versions now that '_minor' is used as a bit vector.
+class DataFileVersion {
+public:
+ DataFileVersion(uint32_t major, uint32_t minor) : _major(major), _minor(minor) {}
+
+ static DataFileVersion defaultForNewFiles() {
+ return DataFileVersion(kCurrentMajor, kIndexes24AndNewer | kMayHave28Freelist);
+ }
+
+ bool isCompatibleWithCurrentCode() const {
+ if (_major != kCurrentMajor)
+ return false;
+
+ if (_minor & ~kUsedMinorFlagsMask)
+ return false;
+
+ const uint32_t indexCleanliness = _minor & kIndexPluginMask;
+ if (indexCleanliness != kIndexes24AndNewer && indexCleanliness != kIndexes22AndOlder)
+ return false;
+
+ // We are compatible with either setting of kMayHave28Freelist.
+
+ return true;
+ }
+
+ bool is24IndexClean() const {
+ return (_minor & kIndexPluginMask) == kIndexes24AndNewer;
+ }
+ void setIs24IndexClean() {
+ _minor = ((_minor & ~kIndexPluginMask) | kIndexes24AndNewer);
+ }
+
+ bool mayHave28Freelist() const {
+ return _minor & kMayHave28Freelist;
+ }
+ void setMayHave28Freelist() {
+ _minor |= kMayHave28Freelist;
+ }
+
+ uint32_t majorRaw() const {
+ return _major;
+ }
+ uint32_t minorRaw() const {
+ return _minor;
+ }
+
+private:
+ static const uint32_t kCurrentMajor = 4;
+
+ // minor layout:
+ // first 4 bits - index plugin cleanliness.
+ // see IndexCatalog::_upgradeDatabaseMinorVersionIfNeeded for details
+ // 5th bit - 1 if started with 3.0-style freelist implementation (SERVER-14081)
+ // 6th through 31st bit - reserved and must be set to 0.
+ static const uint32_t kIndexPluginMask = 0xf;
+ static const uint32_t kIndexes22AndOlder = 5;
+ static const uint32_t kIndexes24AndNewer = 6;
+
+ static const uint32_t kMayHave28Freelist = (1 << 4);
+
+ // All set bits we know about are covered by this mask.
+ static const uint32_t kUsedMinorFlagsMask = 0x1f;
+
+ uint32_t _major;
+ uint32_t _minor;
+};
+
+// Note: Intentionally not defining relational operators for DataFileVersion as there is no
+// total ordering of all versions now that '_minor' is used as a bit vector.
#pragma pack()
- /* a datafile - i.e. the "dbname.<#>" files :
-
- ----------------------
- DataFileHeader
- ----------------------
- Extent (for a particular namespace)
- MmapV1RecordHeader
- ...
- MmapV1RecordHeader (some chained for unused space)
- ----------------------
- more Extents...
- ----------------------
- */
+/* a datafile - i.e. the "dbname.<#>" files :
+
+ ----------------------
+ DataFileHeader
+ ----------------------
+ Extent (for a particular namespace)
+ MmapV1RecordHeader
+ ...
+ MmapV1RecordHeader (some chained for unused space)
+ ----------------------
+ more Extents...
+ ----------------------
+*/
#pragma pack(1)
- class DataFileHeader {
- public:
- DataFileVersion version;
- int fileLength;
- DiskLoc unused; /* unused is the portion of the file that doesn't belong to any allocated extents. -1 = no more */
- int unusedLength;
- DiskLoc freeListStart;
- DiskLoc freeListEnd;
- char reserved[8192 - 4*4 - 8*3];
+class DataFileHeader {
+public:
+ DataFileVersion version;
+ int fileLength;
+ DiskLoc
+ unused; /* unused is the portion of the file that doesn't belong to any allocated extents. -1 = no more */
+ int unusedLength;
+ DiskLoc freeListStart;
+ DiskLoc freeListEnd;
+ char reserved[8192 - 4 * 4 - 8 * 3];
- char data[4]; // first extent starts here
+ char data[4]; // first extent starts here
- enum { HeaderSize = 8192 };
+ enum { HeaderSize = 8192 };
- bool uninitialized() const { return version.majorRaw() == 0; }
+ bool uninitialized() const {
+ return version.majorRaw() == 0;
+ }
- void init(OperationContext* txn, int fileno, int filelength, const char* filename);
+ void init(OperationContext* txn, int fileno, int filelength, const char* filename);
- void checkUpgrade(OperationContext* txn);
+ void checkUpgrade(OperationContext* txn);
- bool isEmpty() const {
- return uninitialized() || ( unusedLength == fileLength - HeaderSize - 16 );
- }
- };
+ bool isEmpty() const {
+ return uninitialized() || (unusedLength == fileLength - HeaderSize - 16);
+ }
+};
#pragma pack()
- class DataFile {
- public:
- DataFile(int fn) : _fileNo(fn), _mb(NULL) {
-
- }
-
- /** @return true if found and opened. if uninitialized (prealloc only) does not open. */
- Status openExisting(const char *filename );
-
- /** creates if DNE */
- void open(OperationContext* txn,
- const char *filename,
- int requestedDataSize = 0,
- bool preallocateOnly = false);
+class DataFile {
+public:
+ DataFile(int fn) : _fileNo(fn), _mb(NULL) {}
- DiskLoc allocExtentArea( OperationContext* txn, int size );
+ /** @return true if found and opened. if uninitialized (prealloc only) does not open. */
+ Status openExisting(const char* filename);
- DataFileHeader* getHeader() { return header(); }
- const DataFileHeader* getHeader() const { return header(); }
+ /** creates if DNE */
+ void open(OperationContext* txn,
+ const char* filename,
+ int requestedDataSize = 0,
+ bool preallocateOnly = false);
- HANDLE getFd() { return mmf.getFd(); }
- unsigned long long length() const { return mmf.length(); }
+ DiskLoc allocExtentArea(OperationContext* txn, int size);
- /* return max size an extent may be */
- static int maxSize();
+ DataFileHeader* getHeader() {
+ return header();
+ }
+ const DataFileHeader* getHeader() const {
+ return header();
+ }
- /** fsync */
- void flush( bool sync );
+ HANDLE getFd() {
+ return mmf.getFd();
+ }
+ unsigned long long length() const {
+ return mmf.length();
+ }
- private:
- friend class MmapV1ExtentManager;
+ /* return max size an extent may be */
+ static int maxSize();
+ /** fsync */
+ void flush(bool sync);
- void badOfs(int) const;
- int _defaultSize() const;
+private:
+ friend class MmapV1ExtentManager;
- void grow(DiskLoc dl, int size);
- char* p() const { return (char *) _mb; }
- DataFileHeader* header() { return static_cast<DataFileHeader*>( _mb ); }
- const DataFileHeader* header() const { return static_cast<DataFileHeader*>( _mb ); }
+ void badOfs(int) const;
+ int _defaultSize() const;
+ void grow(DiskLoc dl, int size);
- const int _fileNo;
+ char* p() const {
+ return (char*)_mb;
+ }
+ DataFileHeader* header() {
+ return static_cast<DataFileHeader*>(_mb);
+ }
+ const DataFileHeader* header() const {
+ return static_cast<DataFileHeader*>(_mb);
+ }
- DurableMappedFile mmf;
- void *_mb; // the memory mapped view
- };
+ const int _fileNo;
+ DurableMappedFile mmf;
+ void* _mb; // the memory mapped view
+};
}
diff --git a/src/mongo/db/storage/mmap_v1/data_file_sync.cpp b/src/mongo/db/storage/mmap_v1/data_file_sync.cpp
index 9579278ded1..013877cb08b 100644
--- a/src/mongo/db/storage/mmap_v1/data_file_sync.cpp
+++ b/src/mongo/db/storage/mmap_v1/data_file_sync.cpp
@@ -44,95 +44,90 @@
namespace mongo {
- using std::endl;
+using std::endl;
- DataFileSync dataFileSync;
+DataFileSync dataFileSync;
- DataFileSync::DataFileSync()
- : ServerStatusSection( "backgroundFlushing" ),
- _total_time( 0 ),
- _flushes( 0 ),
- _last() {
+DataFileSync::DataFileSync()
+ : ServerStatusSection("backgroundFlushing"), _total_time(0), _flushes(0), _last() {}
- }
-
- void DataFileSync::run() {
- Client::initThread( name().c_str() );
+void DataFileSync::run() {
+ Client::initThread(name().c_str());
+ if (storageGlobalParams.syncdelay == 0) {
+ log() << "warning: --syncdelay 0 is not recommended and can have strange performance"
+ << endl;
+ } else if (storageGlobalParams.syncdelay == 1) {
+ log() << "--syncdelay 1" << endl;
+ } else if (storageGlobalParams.syncdelay != 60) {
+ LOG(1) << "--syncdelay " << storageGlobalParams.syncdelay << endl;
+ }
+ int time_flushing = 0;
+ while (!inShutdown()) {
+ _diaglog.flush();
if (storageGlobalParams.syncdelay == 0) {
- log() << "warning: --syncdelay 0 is not recommended and can have strange performance" << endl;
- }
- else if (storageGlobalParams.syncdelay == 1) {
- log() << "--syncdelay 1" << endl;
+ // in case at some point we add an option to change at runtime
+ sleepsecs(5);
+ continue;
}
- else if (storageGlobalParams.syncdelay != 60) {
- LOG(1) << "--syncdelay " << storageGlobalParams.syncdelay << endl;
- }
- int time_flushing = 0;
- while ( ! inShutdown() ) {
- _diaglog.flush();
- if (storageGlobalParams.syncdelay == 0) {
- // in case at some point we add an option to change at runtime
- sleepsecs(5);
- continue;
- }
-
- sleepmillis((long long) std::max(0.0, (storageGlobalParams.syncdelay * 1000) - time_flushing));
-
- if ( inShutdown() ) {
- // occasional issue trying to flush during shutdown when sleep interrupted
- break;
- }
-
- Date_t start = jsTime();
- StorageEngine* storageEngine = getGlobalServiceContext()->getGlobalStorageEngine();
- int numFiles = storageEngine->flushAllFiles( true );
- time_flushing = (jsTime() - start).count();
-
- _flushed(time_flushing);
-
- if( shouldLog(logger::LogSeverity::Debug(1)) || time_flushing >= 10000 ) {
- log() << "flushing mmaps took " << time_flushing << "ms " << " for " << numFiles << " files" << endl;
- }
- }
- }
- BSONObj DataFileSync::generateSection(OperationContext* txn,
- const BSONElement& configElement) const {
- if (!running()) {
- return BSONObj();
+ sleepmillis(
+ (long long)std::max(0.0, (storageGlobalParams.syncdelay * 1000) - time_flushing));
+
+ if (inShutdown()) {
+ // occasional issue trying to flush during shutdown when sleep interrupted
+ break;
}
- BSONObjBuilder b;
- b.appendNumber( "flushes" , _flushes );
- b.appendNumber( "total_ms" , _total_time );
- b.appendNumber( "average_ms" , (_flushes ? (_total_time / double(_flushes)) : 0.0) );
- b.appendNumber( "last_ms" , _last_time );
- b.append("last_finished", _last);
- return b.obj();
+ Date_t start = jsTime();
+ StorageEngine* storageEngine = getGlobalServiceContext()->getGlobalStorageEngine();
+ int numFiles = storageEngine->flushAllFiles(true);
+ time_flushing = (jsTime() - start).count();
+
+ _flushed(time_flushing);
+
+ if (shouldLog(logger::LogSeverity::Debug(1)) || time_flushing >= 10000) {
+ log() << "flushing mmaps took " << time_flushing << "ms "
+ << " for " << numFiles << " files" << endl;
+ }
}
+}
- void DataFileSync::_flushed(int ms) {
- _flushes++;
- _total_time += ms;
- _last_time = ms;
- _last = jsTime();
+BSONObj DataFileSync::generateSection(OperationContext* txn,
+ const BSONElement& configElement) const {
+ if (!running()) {
+ return BSONObj();
}
+ BSONObjBuilder b;
+ b.appendNumber("flushes", _flushes);
+ b.appendNumber("total_ms", _total_time);
+ b.appendNumber("average_ms", (_flushes ? (_total_time / double(_flushes)) : 0.0));
+ b.appendNumber("last_ms", _last_time);
+ b.append("last_finished", _last);
+ return b.obj();
+}
+
+void DataFileSync::_flushed(int ms) {
+ _flushes++;
+ _total_time += ms;
+ _last_time = ms;
+ _last = jsTime();
+}
- class MemJournalServerStatusMetric : public ServerStatusMetric {
- public:
- MemJournalServerStatusMetric() : ServerStatusMetric(".mem.mapped") {}
- virtual void appendAtLeaf( BSONObjBuilder& b ) const {
- int m = static_cast<int>(MemoryMappedFile::totalMappedLength() / ( 1024 * 1024 ));
- b.appendNumber( "mapped" , m );
- if (storageGlobalParams.dur) {
- m *= 2;
- b.appendNumber( "mappedWithJournal" , m );
- }
+class MemJournalServerStatusMetric : public ServerStatusMetric {
+public:
+ MemJournalServerStatusMetric() : ServerStatusMetric(".mem.mapped") {}
+ virtual void appendAtLeaf(BSONObjBuilder& b) const {
+ int m = static_cast<int>(MemoryMappedFile::totalMappedLength() / (1024 * 1024));
+ b.appendNumber("mapped", m);
+ if (storageGlobalParams.dur) {
+ m *= 2;
+ b.appendNumber("mappedWithJournal", m);
}
+ }
- } memJournalServerStatusMetric;
+} memJournalServerStatusMetric;
}
diff --git a/src/mongo/db/storage/mmap_v1/data_file_sync.h b/src/mongo/db/storage/mmap_v1/data_file_sync.h
index a92f55b64f8..b204fdad019 100644
--- a/src/mongo/db/storage/mmap_v1/data_file_sync.h
+++ b/src/mongo/db/storage/mmap_v1/data_file_sync.h
@@ -33,30 +33,32 @@
namespace mongo {
- /**
- * does background async flushes of mmapped files
- */
- class DataFileSync : public BackgroundJob , public ServerStatusSection {
- public:
- DataFileSync();
-
- virtual bool includeByDefault() const { return true; }
- virtual std::string name() const { return "DataFileSync"; }
+/**
+ * does background async flushes of mmapped files
+ */
+class DataFileSync : public BackgroundJob, public ServerStatusSection {
+public:
+ DataFileSync();
- void run();
+ virtual bool includeByDefault() const {
+ return true;
+ }
+ virtual std::string name() const {
+ return "DataFileSync";
+ }
- virtual BSONObj generateSection(OperationContext* txn,
- const BSONElement& configElement) const;
+ void run();
- private:
- void _flushed(int ms);
+ virtual BSONObj generateSection(OperationContext* txn, const BSONElement& configElement) const;
- long long _total_time;
- long long _flushes;
- int _last_time;
- Date_t _last;
+private:
+ void _flushed(int ms);
- };
+ long long _total_time;
+ long long _flushes;
+ int _last_time;
+ Date_t _last;
+};
- extern DataFileSync dataFileSync;
+extern DataFileSync dataFileSync;
}
diff --git a/src/mongo/db/storage/mmap_v1/diskloc.h b/src/mongo/db/storage/mmap_v1/diskloc.h
index 9d3adc64da7..662daf074d5 100644
--- a/src/mongo/db/storage/mmap_v1/diskloc.h
+++ b/src/mongo/db/storage/mmap_v1/diskloc.h
@@ -43,149 +43,176 @@
namespace mongo {
- template< class Version > class BtreeBucket;
+template <class Version>
+class BtreeBucket;
#pragma pack(1)
- /** represents a disk location/offset on disk in a database. 64 bits.
- it is assumed these will be passed around by value a lot so don't do anything to make them large
- (such as adding a virtual function)
- */
- class DiskLoc {
- int _a; // this will be volume, file #, etc. but is a logical value could be anything depending on storage engine
- int ofs;
-
- public:
-
- enum SentinelValues {
- /* note NullOfs is different. todo clean up. see refs to NullOfs in code - use is valid but outside DiskLoc context so confusing as-is. */
- NullOfs = -1,
-
- // Caps the number of files that may be allocated in a database, allowing about 32TB of
- // data per db. Note that the DiskLoc and DiskLoc56Bit types supports more files than
- // this value, as does the data storage format.
- MaxFiles=16000,
-
- // How invalid DiskLocs are represented in RecordIds.
- InvalidRepr = -2LL,
- };
-
- DiskLoc(int a, int Ofs) : _a(a), ofs(Ofs) { }
- DiskLoc() { Null(); }
-
- // Minimum allowed DiskLoc. No MmapV1RecordHeader may begin at this location because file and extent
- // headers must precede Records in a file.
- static DiskLoc min() { return DiskLoc(0, 0); }
-
- // Maximum allowed DiskLoc.
- // No MmapV1RecordHeader may begin at this location because the minimum size of a MmapV1RecordHeader is larger than
- // one byte. Also, the last bit is not able to be used because mmapv1 uses that for "used".
- static DiskLoc max() { return DiskLoc(0x7fffffff, 0x7ffffffe); }
-
- bool questionable() const {
- return ofs < -1 ||
- _a < -1 ||
- _a > 524288;
- }
+/** represents a disk location/offset on disk in a database. 64 bits.
+ it is assumed these will be passed around by value a lot so don't do anything to make them large
+ (such as adding a virtual function)
+ */
+class DiskLoc {
+ int _a; // this will be volume, file #, etc. but is a logical value could be anything depending on storage engine
+ int ofs;
+
+public:
+ enum SentinelValues {
+ /* note NullOfs is different. todo clean up. see refs to NullOfs in code - use is valid but outside DiskLoc context so confusing as-is. */
+ NullOfs = -1,
+
+ // Caps the number of files that may be allocated in a database, allowing about 32TB of
+ // data per db. Note that the DiskLoc and DiskLoc56Bit types supports more files than
+ // this value, as does the data storage format.
+ MaxFiles = 16000,
+
+ // How invalid DiskLocs are represented in RecordIds.
+ InvalidRepr = -2LL,
+ };
- bool isNull() const { return _a == -1; }
- DiskLoc& Null() {
- _a = -1;
- ofs = 0; /* note NullOfs is different. todo clean up. see refs to NullOfs in code - use is valid but outside DiskLoc context so confusing as-is. */
- return *this;
- }
- void assertOk() const { verify(!isNull()); }
- DiskLoc& setInvalid() {
- _a = -2;
- ofs = 0;
- return *this;
- }
- bool isValid() const { return _a != -2; }
-
- std::string toString() const {
- if ( isNull() )
- return "null";
- std::stringstream ss;
- ss << _a << ':' << std::hex << ofs;
- return ss.str();
- }
+ DiskLoc(int a, int Ofs) : _a(a), ofs(Ofs) {}
+ DiskLoc() {
+ Null();
+ }
- BSONObj toBSONObj() const { return BSON( "file" << _a << "offset" << ofs ); }
+ // Minimum allowed DiskLoc. No MmapV1RecordHeader may begin at this location because file and extent
+ // headers must precede Records in a file.
+ static DiskLoc min() {
+ return DiskLoc(0, 0);
+ }
- int a() const { return _a; }
+ // Maximum allowed DiskLoc.
+ // No MmapV1RecordHeader may begin at this location because the minimum size of a MmapV1RecordHeader is larger than
+ // one byte. Also, the last bit is not able to be used because mmapv1 uses that for "used".
+ static DiskLoc max() {
+ return DiskLoc(0x7fffffff, 0x7ffffffe);
+ }
- int& GETOFS() { return ofs; }
- int getOfs() const { return ofs; }
- void set(int a, int b) {
- _a=a;
- ofs=b;
- }
+ bool questionable() const {
+ return ofs < -1 || _a < -1 || _a > 524288;
+ }
- void inc(int amt) {
- verify( !isNull() );
- ofs += amt;
- }
+ bool isNull() const {
+ return _a == -1;
+ }
+ DiskLoc& Null() {
+ _a = -1;
+ ofs =
+ 0; /* note NullOfs is different. todo clean up. see refs to NullOfs in code - use is valid but outside DiskLoc context so confusing as-is. */
+ return *this;
+ }
+ void assertOk() const {
+ verify(!isNull());
+ }
+ DiskLoc& setInvalid() {
+ _a = -2;
+ ofs = 0;
+ return *this;
+ }
+ bool isValid() const {
+ return _a != -2;
+ }
- bool sameFile(DiskLoc b) {
- return _a== b._a;
- }
+ std::string toString() const {
+ if (isNull())
+ return "null";
+ std::stringstream ss;
+ ss << _a << ':' << std::hex << ofs;
+ return ss.str();
+ }
- bool operator==(const DiskLoc& b) const {
- return _a==b._a&& ofs == b.ofs;
- }
- bool operator!=(const DiskLoc& b) const {
- return !(*this==b);
- }
- int compare(const DiskLoc& b) const {
- int x = _a - b._a;
- if ( x )
- return x;
- return ofs - b.ofs;
- }
+ BSONObj toBSONObj() const {
+ return BSON("file" << _a << "offset" << ofs);
+ }
- static DiskLoc fromRecordId(RecordId id) {
- if (id.isNormal())
- return DiskLoc((id.repr() >> 32), uint32_t(id.repr()));
+ int a() const {
+ return _a;
+ }
- if (id.isNull())
- return DiskLoc();
+ int& GETOFS() {
+ return ofs;
+ }
+ int getOfs() const {
+ return ofs;
+ }
+ void set(int a, int b) {
+ _a = a;
+ ofs = b;
+ }
- if (id == RecordId::max())
- return DiskLoc::max();
+ void inc(int amt) {
+ verify(!isNull());
+ ofs += amt;
+ }
- if (id == RecordId::min())
- return DiskLoc::min();
+ bool sameFile(DiskLoc b) {
+ return _a == b._a;
+ }
- dassert(id.repr() == InvalidRepr);
- return DiskLoc().setInvalid();
- }
+ bool operator==(const DiskLoc& b) const {
+ return _a == b._a && ofs == b.ofs;
+ }
+ bool operator!=(const DiskLoc& b) const {
+ return !(*this == b);
+ }
+ int compare(const DiskLoc& b) const {
+ int x = _a - b._a;
+ if (x)
+ return x;
+ return ofs - b.ofs;
+ }
+
+ static DiskLoc fromRecordId(RecordId id) {
+ if (id.isNormal())
+ return DiskLoc((id.repr() >> 32), uint32_t(id.repr()));
- RecordId toRecordId() const {
- if (_a >= 0) {
- if (*this == DiskLoc::min())
- return RecordId::min();
+ if (id.isNull())
+ return DiskLoc();
- if (*this == DiskLoc::max())
- return RecordId::max();
+ if (id == RecordId::max())
+ return DiskLoc::max();
- return RecordId(uint64_t(_a) << 32 | uint32_t(ofs));
- }
+ if (id == RecordId::min())
+ return DiskLoc::min();
+
+ dassert(id.repr() == InvalidRepr);
+ return DiskLoc().setInvalid();
+ }
- if (isNull())
- return RecordId();
+ RecordId toRecordId() const {
+ if (_a >= 0) {
+ if (*this == DiskLoc::min())
+ return RecordId::min();
- dassert(!isValid());
- return RecordId(InvalidRepr);
+ if (*this == DiskLoc::max())
+ return RecordId::max();
+
+ return RecordId(uint64_t(_a) << 32 | uint32_t(ofs));
}
- };
-#pragma pack()
- inline bool operator< (const DiskLoc& rhs, const DiskLoc& lhs) { return rhs.compare(lhs) < 0; }
- inline bool operator<=(const DiskLoc& rhs, const DiskLoc& lhs) { return rhs.compare(lhs) <= 0; }
- inline bool operator> (const DiskLoc& rhs, const DiskLoc& lhs) { return rhs.compare(lhs) > 0; }
- inline bool operator>=(const DiskLoc& rhs, const DiskLoc& lhs) { return rhs.compare(lhs) >= 0; }
+ if (isNull())
+ return RecordId();
- inline std::ostream& operator<<( std::ostream &stream, const DiskLoc &loc ) {
- return stream << loc.toString();
+ dassert(!isValid());
+ return RecordId(InvalidRepr);
}
+};
+#pragma pack()
-} // namespace mongo
+inline bool operator<(const DiskLoc& rhs, const DiskLoc& lhs) {
+ return rhs.compare(lhs) < 0;
+}
+inline bool operator<=(const DiskLoc& rhs, const DiskLoc& lhs) {
+ return rhs.compare(lhs) <= 0;
+}
+inline bool operator>(const DiskLoc& rhs, const DiskLoc& lhs) {
+ return rhs.compare(lhs) > 0;
+}
+inline bool operator>=(const DiskLoc& rhs, const DiskLoc& lhs) {
+ return rhs.compare(lhs) >= 0;
+}
+
+inline std::ostream& operator<<(std::ostream& stream, const DiskLoc& loc) {
+ return stream << loc.toString();
+}
+
+} // namespace mongo
diff --git a/src/mongo/db/storage/mmap_v1/dur.cpp b/src/mongo/db/storage/mmap_v1/dur.cpp
index a596bba061f..21c729eea17 100644
--- a/src/mongo/db/storage/mmap_v1/dur.cpp
+++ b/src/mongo/db/storage/mmap_v1/dur.cpp
@@ -38,15 +38,15 @@
have to handle falling behind which would use too much ram (going back into a read lock would suffice to stop that).
for now (1.7.5/1.8.0) we are in read lock which is not ideal.
WRITETODATAFILES
- actually write to the database data files in this phase. currently done by memcpy'ing the writes back to
- the non-private MMF. alternatively one could write to the files the traditional way; however the way our
+ actually write to the database data files in this phase. currently done by memcpy'ing the writes back to
+ the non-private MMF. alternatively one could write to the files the traditional way; however the way our
storage engine works that isn't any faster (actually measured a tiny bit slower).
REMAPPRIVATEVIEW
we could in a write lock quickly flip readers back to the main view, then stay in read lock and do our real
remapping. with many files (e.g., 1000), remapping could be time consuming (several ms), so we don't want
to be too frequent.
there could be a slow down immediately after remapping as fresh copy-on-writes for commonly written pages will
- be required. so doing these remaps fractionally is helpful.
+ be required. so doing these remaps fractionally is helpful.
mutexes:
@@ -99,820 +99,788 @@
namespace mongo {
- using std::endl;
- using std::fixed;
- using std::hex;
- using std::set;
- using std::setprecision;
- using std::setw;
- using std::string;
- using std::stringstream;
+using std::endl;
+using std::fixed;
+using std::hex;
+using std::set;
+using std::setprecision;
+using std::setw;
+using std::string;
+using std::stringstream;
namespace dur {
namespace {
- // Used to activate the flush thread
- stdx::mutex flushMutex;
- stdx::condition_variable flushRequested;
+// Used to activate the flush thread
+stdx::mutex flushMutex;
+stdx::condition_variable flushRequested;
- // This is waited on for getlasterror acknowledgements. It means that data has been written to
- // the journal, but not necessarily applied to the shared view, so it is all right to
- // acknowledge the user operation, but NOT all right to delete the journal files for example.
- NotifyAll commitNotify;
+// This is waited on for getlasterror acknowledgements. It means that data has been written to
+// the journal, but not necessarily applied to the shared view, so it is all right to
+// acknowledge the user operation, but NOT all right to delete the journal files for example.
+NotifyAll commitNotify;
- // This is waited on for complete flush. It means that data has been both written to journal
- // and applied to the shared view, so it is allowed to delete the journal files. Used for
- // fsync:true, close DB, shutdown acknowledgements.
- NotifyAll applyToDataFilesNotify;
+// This is waited on for complete flush. It means that data has been both written to journal
+// and applied to the shared view, so it is allowed to delete the journal files. Used for
+// fsync:true, close DB, shutdown acknowledgements.
+NotifyAll applyToDataFilesNotify;
- // When set, the flush thread will exit
- AtomicUInt32 shutdownRequested(0);
+// When set, the flush thread will exit
+AtomicUInt32 shutdownRequested(0);
- enum {
- // How many commit cycles to do before considering doing a remap
- NumCommitsBeforeRemap = 10,
+enum {
+ // How many commit cycles to do before considering doing a remap
+ NumCommitsBeforeRemap = 10,
- // How many outstanding journal flushes should be allowed before applying writer back
- // pressure. Size of 1 allows two journal blocks to be in the process of being written -
- // one on the journal writer's buffer and one blocked waiting to be picked up.
- NumAsyncJournalWrites = 1,
- };
+ // How many outstanding journal flushes should be allowed before applying writer back
+ // pressure. Size of 1 allows two journal blocks to be in the process of being written -
+ // one on the journal writer's buffer and one blocked waiting to be picked up.
+ NumAsyncJournalWrites = 1,
+};
- // Remap loop state
- unsigned remapFileToStartAt;
+// Remap loop state
+unsigned remapFileToStartAt;
- // How frequently to reset the durability statistics
- enum { DurStatsResetIntervalMillis = 3 * 1000 };
+// How frequently to reset the durability statistics
+enum { DurStatsResetIntervalMillis = 3 * 1000 };
- // Size sanity checks
- BOOST_STATIC_ASSERT(UncommittedBytesLimit > BSONObjMaxInternalSize * 3);
- BOOST_STATIC_ASSERT(sizeof(void*) == 4 || UncommittedBytesLimit > BSONObjMaxInternalSize * 6);
+// Size sanity checks
+BOOST_STATIC_ASSERT(UncommittedBytesLimit > BSONObjMaxInternalSize * 3);
+BOOST_STATIC_ASSERT(sizeof(void*) == 4 || UncommittedBytesLimit > BSONObjMaxInternalSize * 6);
- /**
- * MMAP V1 durability server status section.
- */
- class DurSSS : public ServerStatusSection {
- public:
- DurSSS() : ServerStatusSection("dur") {
+/**
+ * MMAP V1 durability server status section.
+ */
+class DurSSS : public ServerStatusSection {
+public:
+ DurSSS() : ServerStatusSection("dur") {}
- }
+ virtual bool includeByDefault() const {
+ return true;
+ }
- virtual bool includeByDefault() const { return true; }
+ virtual BSONObj generateSection(OperationContext* txn, const BSONElement& configElement) const {
+ if (!getDur().isDurable()) {
+ return BSONObj();
+ }
- virtual BSONObj generateSection(OperationContext* txn,
- const BSONElement& configElement) const {
+ return dur::stats.asObj();
+ }
- if (!getDur().isDurable()) {
- return BSONObj();
- }
+} durSSS;
- return dur::stats.asObj();
- }
- } durSSS;
+/**
+ * A no-op durability interface. Used for the case when journaling is not enabled.
+ */
+class NonDurableImpl : public DurableInterface {
+public:
+ NonDurableImpl() {}
+ // DurableInterface virtual methods
+ virtual void* writingPtr(void* x, unsigned len) {
+ return x;
+ }
+ virtual void declareWriteIntent(void*, unsigned) {}
+ virtual void declareWriteIntents(const std::vector<std::pair<void*, unsigned>>& intents) {}
+ virtual void createdFile(const std::string& filename, unsigned long long len) {}
+ virtual bool waitUntilDurable() {
+ return false;
+ }
+ virtual bool commitNow(OperationContext* txn) {
+ return false;
+ }
+ virtual bool commitIfNeeded() {
+ return false;
+ }
+ virtual void syncDataAndTruncateJournal(OperationContext* txn) {}
+ virtual bool isDurable() const {
+ return false;
+ }
+ virtual void closingFileNotification() {}
+ virtual void commitAndStopDurThread() {}
+};
- /**
- * A no-op durability interface. Used for the case when journaling is not enabled.
- */
- class NonDurableImpl : public DurableInterface {
- public:
- NonDurableImpl() { }
- // DurableInterface virtual methods
- virtual void* writingPtr(void *x, unsigned len) { return x; }
- virtual void declareWriteIntent(void*, unsigned) { }
- virtual void declareWriteIntents(const std::vector<std::pair<void*, unsigned> >& intents) {
+/**
+ * The actual durability interface, when journaling is enabled.
+ */
+class DurableImpl : public DurableInterface {
+public:
+ DurableImpl() {}
+
+ // DurableInterface virtual methods
+ virtual void declareWriteIntents(const std::vector<std::pair<void*, unsigned>>& intents);
+ virtual void createdFile(const std::string& filename, unsigned long long len);
+ virtual bool waitUntilDurable();
+ virtual bool commitNow(OperationContext* txn);
+ virtual bool commitIfNeeded();
+ virtual void syncDataAndTruncateJournal(OperationContext* txn);
+ virtual bool isDurable() const {
+ return true;
+ }
+ virtual void closingFileNotification();
+ virtual void commitAndStopDurThread();
- }
- virtual void createdFile(const std::string& filename, unsigned long long len) { }
- virtual bool waitUntilDurable() { return false; }
- virtual bool commitNow(OperationContext* txn) { return false; }
- virtual bool commitIfNeeded() { return false; }
- virtual void syncDataAndTruncateJournal(OperationContext* txn) {}
- virtual bool isDurable() const { return false; }
- virtual void closingFileNotification() { }
- virtual void commitAndStopDurThread() { }
- };
-
-
- /**
- * The actual durability interface, when journaling is enabled.
- */
- class DurableImpl : public DurableInterface {
- public:
- DurableImpl() { }
-
- // DurableInterface virtual methods
- virtual void declareWriteIntents(const std::vector<std::pair<void*, unsigned> >& intents);
- virtual void createdFile(const std::string& filename, unsigned long long len);
- virtual bool waitUntilDurable();
- virtual bool commitNow(OperationContext* txn);
- virtual bool commitIfNeeded();
- virtual void syncDataAndTruncateJournal(OperationContext* txn);
- virtual bool isDurable() const { return true; }
- virtual void closingFileNotification();
- virtual void commitAndStopDurThread();
-
- void start();
-
- private:
- stdx::thread _durThreadHandle;
- };
-
-
- /**
- * Diagnostic to check that the private view and the non-private view are in sync after
- * applying the journal changes. This function is very slow and only runs when paranoid checks
- * are enabled.
- *
- * Must be called under at least S flush lock to ensure that there are no concurrent writes
- * happening.
- */
- void debugValidateFileMapsMatch(const DurableMappedFile* mmf) {
- const unsigned char *p = (const unsigned char *)mmf->getView();
- const unsigned char *w = (const unsigned char *)mmf->view_write();
-
- // Ignore pre-allocated files that are not fully created yet
- if (!p || !w) {
- return;
- }
+ void start();
- if (memcmp(p, w, (unsigned)mmf->length()) == 0) {
- return;
- }
+private:
+ stdx::thread _durThreadHandle;
+};
- unsigned low = 0xffffffff;
- unsigned high = 0;
- log() << "DurParanoid mismatch in " << mmf->filename();
+/**
+ * Diagnostic to check that the private view and the non-private view are in sync after
+ * applying the journal changes. This function is very slow and only runs when paranoid checks
+ * are enabled.
+ *
+ * Must be called under at least S flush lock to ensure that there are no concurrent writes
+ * happening.
+ */
+void debugValidateFileMapsMatch(const DurableMappedFile* mmf) {
+ const unsigned char* p = (const unsigned char*)mmf->getView();
+ const unsigned char* w = (const unsigned char*)mmf->view_write();
- int logged = 0;
- unsigned lastMismatch = 0xffffffff;
+ // Ignore pre-allocated files that are not fully created yet
+ if (!p || !w) {
+ return;
+ }
- for (unsigned i = 0; i < mmf->length(); i++) {
- if (p[i] != w[i]) {
+ if (memcmp(p, w, (unsigned)mmf->length()) == 0) {
+ return;
+ }
- if (lastMismatch != 0xffffffff && lastMismatch + 1 != i) {
- // Separate blocks of mismatches
- log() << std::endl;
- }
+ unsigned low = 0xffffffff;
+ unsigned high = 0;
- lastMismatch = i;
+ log() << "DurParanoid mismatch in " << mmf->filename();
- if (++logged < 60) {
- if (logged == 1) {
- // For .ns files to find offset in record
- log() << "ofs % 628 = 0x" << hex << (i % 628) << endl;
- }
+ int logged = 0;
+ unsigned lastMismatch = 0xffffffff;
- stringstream ss;
- ss << "mismatch ofs:" << hex << i
- << "\tfilemap:" << setw(2) << (unsigned)w[i]
- << "\tprivmap:" << setw(2) << (unsigned)p[i];
+ for (unsigned i = 0; i < mmf->length(); i++) {
+ if (p[i] != w[i]) {
+ if (lastMismatch != 0xffffffff && lastMismatch + 1 != i) {
+ // Separate blocks of mismatches
+ log() << std::endl;
+ }
- if (p[i] > 32 && p[i] <= 126) {
- ss << '\t' << p[i];
- }
+ lastMismatch = i;
- log() << ss.str() << endl;
+ if (++logged < 60) {
+ if (logged == 1) {
+ // For .ns files to find offset in record
+ log() << "ofs % 628 = 0x" << hex << (i % 628) << endl;
}
- if (logged == 60) {
- log() << "..." << endl;
+ stringstream ss;
+ ss << "mismatch ofs:" << hex << i << "\tfilemap:" << setw(2) << (unsigned)w[i]
+ << "\tprivmap:" << setw(2) << (unsigned)p[i];
+
+ if (p[i] > 32 && p[i] <= 126) {
+ ss << '\t' << p[i];
}
- if (i < low) low = i;
- if (i > high) high = i;
+ log() << ss.str() << endl;
}
+
+ if (logged == 60) {
+ log() << "..." << endl;
+ }
+
+ if (i < low)
+ low = i;
+ if (i > high)
+ high = i;
}
+ }
- if (low != 0xffffffff) {
- std::stringstream ss;
- ss << "journal error warning views mismatch " << mmf->filename() << ' '
- << hex << low << ".." << high
- << " len:" << high - low + 1;
+ if (low != 0xffffffff) {
+ std::stringstream ss;
+ ss << "journal error warning views mismatch " << mmf->filename() << ' ' << hex << low
+ << ".." << high << " len:" << high - low + 1;
- log() << ss.str() << endl;
- log() << "priv loc: " << (void*)(p + low) << ' ' << endl;
+ log() << ss.str() << endl;
+ log() << "priv loc: " << (void*)(p + low) << ' ' << endl;
- severe() << "Written data does not match in-memory view. Missing WriteIntent?";
- invariant(false);
- }
+ severe() << "Written data does not match in-memory view. Missing WriteIntent?";
+ invariant(false);
}
+}
- /**
- * Main code of the remap private view function.
- */
- void remapPrivateViewImpl(double fraction) {
- LOG(4) << "journal REMAPPRIVATEVIEW" << endl;
-
- // There is no way that the set of files can change while we are in this method, because
- // we hold the flush lock in X mode. For files to go away, a database needs to be dropped,
- // which means acquiring the flush lock in at least IX mode.
- //
- // However, the record fetcher logic unfortunately operates without any locks and on
- // Windows and Solaris remap is not atomic and there is a window where the record fetcher
- // might get an access violation. That's why we acquire the mongo files mutex here in X
- // mode and the record fetcher takes in in S-mode (see MmapV1RecordFetcher for more
- // detail).
- //
- // See SERVER-5723 for performance improvement.
- // See SERVER-5680 to see why this code is necessary on Windows.
- // See SERVER-8795 to see why this code is necessary on Solaris.
+/**
+ * Main code of the remap private view function.
+ */
+void remapPrivateViewImpl(double fraction) {
+ LOG(4) << "journal REMAPPRIVATEVIEW" << endl;
+
+// There is no way that the set of files can change while we are in this method, because
+// we hold the flush lock in X mode. For files to go away, a database needs to be dropped,
+// which means acquiring the flush lock in at least IX mode.
+//
+// However, the record fetcher logic unfortunately operates without any locks and on
+// Windows and Solaris remap is not atomic and there is a window where the record fetcher
+// might get an access violation. That's why we acquire the mongo files mutex here in X
+// mode and the record fetcher takes in in S-mode (see MmapV1RecordFetcher for more
+// detail).
+//
+// See SERVER-5723 for performance improvement.
+// See SERVER-5680 to see why this code is necessary on Windows.
+// See SERVER-8795 to see why this code is necessary on Solaris.
#if defined(_WIN32) || defined(__sun)
- LockMongoFilesExclusive lk;
+ LockMongoFilesExclusive lk;
#else
- LockMongoFilesShared lk;
+ LockMongoFilesShared lk;
#endif
- std::set<MongoFile*>& files = MongoFile::getAllFiles();
+ std::set<MongoFile*>& files = MongoFile::getAllFiles();
- const unsigned sz = files.size();
- if (sz == 0) {
- return;
- }
+ const unsigned sz = files.size();
+ if (sz == 0) {
+ return;
+ }
- unsigned ntodo = (unsigned) (sz * fraction);
- if( ntodo < 1 ) ntodo = 1;
- if( ntodo > sz ) ntodo = sz;
+ unsigned ntodo = (unsigned)(sz * fraction);
+ if (ntodo < 1)
+ ntodo = 1;
+ if (ntodo > sz)
+ ntodo = sz;
+
+ const set<MongoFile*>::iterator b = files.begin();
+ const set<MongoFile*>::iterator e = files.end();
+ set<MongoFile*>::iterator i = b;
+
+ // Skip to our starting position as remembered from the last remap cycle
+ for (unsigned x = 0; x < remapFileToStartAt; x++) {
+ i++;
+ if (i == e)
+ i = b;
+ }
- const set<MongoFile*>::iterator b = files.begin();
- const set<MongoFile*>::iterator e = files.end();
- set<MongoFile*>::iterator i = b;
+ // Mark where to start on the next cycle
+ const unsigned startedAt = remapFileToStartAt;
+ remapFileToStartAt = (remapFileToStartAt + ntodo) % sz;
- // Skip to our starting position as remembered from the last remap cycle
- for (unsigned x = 0; x < remapFileToStartAt; x++) {
- i++;
- if (i == e) i = b;
- }
+ Timer t;
- // Mark where to start on the next cycle
- const unsigned startedAt = remapFileToStartAt;
- remapFileToStartAt = (remapFileToStartAt + ntodo) % sz;
+ for (unsigned x = 0; x < ntodo; x++) {
+ if ((*i)->isDurableMappedFile()) {
+ DurableMappedFile* const mmf = (DurableMappedFile*)*i;
- Timer t;
+ // Sanity check that the contents of the shared and the private view match so we
+ // don't end up overwriting data.
+ if (mmapv1GlobalOptions.journalOptions & MMAPV1Options::JournalParanoid) {
+ debugValidateFileMapsMatch(mmf);
+ }
- for (unsigned x = 0; x < ntodo; x++) {
- if ((*i)->isDurableMappedFile()) {
- DurableMappedFile* const mmf = (DurableMappedFile*) *i;
+ if (mmf->willNeedRemap()) {
+ mmf->remapThePrivateView();
+ }
- // Sanity check that the contents of the shared and the private view match so we
- // don't end up overwriting data.
- if (mmapv1GlobalOptions.journalOptions & MMAPV1Options::JournalParanoid) {
- debugValidateFileMapsMatch(mmf);
- }
+ i++;
- if (mmf->willNeedRemap()) {
- mmf->remapThePrivateView();
- }
+ if (i == e)
+ i = b;
+ }
+ }
- i++;
+ LOG(3) << "journal REMAPPRIVATEVIEW done startedAt: " << startedAt << " n:" << ntodo << ' '
+ << t.millis() << "ms";
+}
- if (i == e) i = b;
- }
- }
- LOG(3) << "journal REMAPPRIVATEVIEW done startedAt: " << startedAt << " n:" << ntodo
- << ' ' << t.millis() << "ms";
- }
+// One instance of each durability interface
+DurableImpl durableImpl;
+NonDurableImpl nonDurableImpl;
+} // namespace
- // One instance of each durability interface
- DurableImpl durableImpl;
- NonDurableImpl nonDurableImpl;
-} // namespace
+// Declared in dur_preplogbuffer.cpp
+void PREPLOGBUFFER(JSectHeader& outHeader, AlignedBuilder& outBuffer);
+// Declared in dur_journal.cpp
+boost::filesystem::path getJournalDir();
+void preallocateFiles();
- // Declared in dur_preplogbuffer.cpp
- void PREPLOGBUFFER(JSectHeader& outHeader, AlignedBuilder& outBuffer);
+// Forward declaration
+static void durThread();
- // Declared in dur_journal.cpp
- boost::filesystem::path getJournalDir();
- void preallocateFiles();
+// Durability activity statistics
+Stats stats;
- // Forward declaration
- static void durThread();
+// Reference to the write intents tracking object
+CommitJob commitJob;
- // Durability activity statistics
- Stats stats;
+// Reference to the active durability interface
+DurableInterface* DurableInterface::_impl(&nonDurableImpl);
- // Reference to the write intents tracking object
- CommitJob commitJob;
- // Reference to the active durability interface
- DurableInterface* DurableInterface::_impl(&nonDurableImpl);
+//
+// Stats
+//
+Stats::Stats() : _currIdx(0) {}
- //
- // Stats
- //
+void Stats::reset() {
+ // Seal the current metrics
+ _stats[_currIdx]._durationMillis = _stats[_currIdx].getCurrentDurationMillis();
- Stats::Stats() : _currIdx(0) {
+ // Use a new metric
+ const unsigned newCurrIdx = (_currIdx + 1) % (sizeof(_stats) / sizeof(_stats[0]));
+ _stats[newCurrIdx].reset();
- }
+ _currIdx = newCurrIdx;
+}
- void Stats::reset() {
- // Seal the current metrics
- _stats[_currIdx]._durationMillis = _stats[_currIdx].getCurrentDurationMillis();
+BSONObj Stats::asObj() const {
+ // Use the previous statistic
+ const S& stats = _stats[(_currIdx - 1) % (sizeof(_stats) / sizeof(_stats[0]))];
- // Use a new metric
- const unsigned newCurrIdx = (_currIdx + 1) % (sizeof(_stats) / sizeof(_stats[0]));
- _stats[newCurrIdx].reset();
+ BSONObjBuilder builder;
+ stats._asObj(&builder);
- _currIdx = newCurrIdx;
- }
+ return builder.obj();
+}
- BSONObj Stats::asObj() const {
- // Use the previous statistic
- const S& stats = _stats[(_currIdx - 1) % (sizeof(_stats) / sizeof(_stats[0]))];
+void Stats::S::reset() {
+ memset(this, 0, sizeof(*this));
+ _startTimeMicros = curTimeMicros64();
+}
- BSONObjBuilder builder;
- stats._asObj(&builder);
+std::string Stats::S::_CSVHeader() const {
+ return "cmts\t jrnMB\t wrDFMB\t cIWLk\t early\t prpLgB\t wrToJ\t wrToDF\t rmpPrVw";
+}
- return builder.obj();
- }
+std::string Stats::S::_asCSV() const {
+ stringstream ss;
+ ss << setprecision(2) << _commits << '\t' << _journaledBytes / 1000000.0 << '\t'
+ << _writeToDataFilesBytes / 1000000.0 << '\t' << _commitsInWriteLock << '\t' << 0 << '\t'
+ << (unsigned)(_prepLogBufferMicros / 1000) << '\t'
+ << (unsigned)(_writeToJournalMicros / 1000) << '\t'
+ << (unsigned)(_writeToDataFilesMicros / 1000) << '\t'
+ << (unsigned)(_remapPrivateViewMicros / 1000) << '\t' << (unsigned)(_commitsMicros / 1000)
+ << '\t' << (unsigned)(_commitsInWriteLockMicros / 1000) << '\t';
- void Stats::S::reset() {
- memset(this, 0, sizeof(*this));
- _startTimeMicros = curTimeMicros64();
- }
+ return ss.str();
+}
- std::string Stats::S::_CSVHeader() const {
- return "cmts\t jrnMB\t wrDFMB\t cIWLk\t early\t prpLgB\t wrToJ\t wrToDF\t rmpPrVw";
- }
+void Stats::S::_asObj(BSONObjBuilder* builder) const {
+ BSONObjBuilder& b = *builder;
+ b << "commits" << _commits << "journaledMB" << _journaledBytes / 1000000.0
+ << "writeToDataFilesMB" << _writeToDataFilesBytes / 1000000.0 << "compression"
+ << _journaledBytes / (_uncompressedBytes + 1.0) << "commitsInWriteLock" << _commitsInWriteLock
+ << "earlyCommits" << 0 << "timeMs"
+ << BSON("dt" << _durationMillis << "prepLogBuffer" << (unsigned)(_prepLogBufferMicros / 1000)
+ << "writeToJournal" << (unsigned)(_writeToJournalMicros / 1000)
+ << "writeToDataFiles" << (unsigned)(_writeToDataFilesMicros / 1000)
+ << "remapPrivateView" << (unsigned)(_remapPrivateViewMicros / 1000) << "commits"
+ << (unsigned)(_commitsMicros / 1000) << "commitsInWriteLock"
+ << (unsigned)(_commitsInWriteLockMicros / 1000));
- std::string Stats::S::_asCSV() const {
- stringstream ss;
- ss << setprecision(2)
- << _commits << '\t'
- << _journaledBytes / 1000000.0 << '\t'
- << _writeToDataFilesBytes / 1000000.0 << '\t'
- << _commitsInWriteLock << '\t'
- << 0 << '\t'
- << (unsigned) (_prepLogBufferMicros / 1000) << '\t'
- << (unsigned) (_writeToJournalMicros / 1000) << '\t'
- << (unsigned) (_writeToDataFilesMicros / 1000) << '\t'
- << (unsigned) (_remapPrivateViewMicros / 1000) << '\t'
- << (unsigned) (_commitsMicros / 1000) << '\t'
- << (unsigned) (_commitsInWriteLockMicros / 1000) << '\t';
-
- return ss.str();
+ if (mmapv1GlobalOptions.journalCommitInterval != 0) {
+ b << "journalCommitIntervalMs" << mmapv1GlobalOptions.journalCommitInterval;
}
+}
- void Stats::S::_asObj(BSONObjBuilder* builder) const {
- BSONObjBuilder& b = *builder;
- b << "commits" << _commits
- << "journaledMB" << _journaledBytes / 1000000.0
- << "writeToDataFilesMB" << _writeToDataFilesBytes / 1000000.0
- << "compression" << _journaledBytes / (_uncompressedBytes + 1.0)
- << "commitsInWriteLock" << _commitsInWriteLock
- << "earlyCommits" << 0
- << "timeMs" << BSON("dt" << _durationMillis <<
- "prepLogBuffer" << (unsigned) (_prepLogBufferMicros / 1000) <<
- "writeToJournal" << (unsigned) (_writeToJournalMicros / 1000) <<
- "writeToDataFiles" << (unsigned) (_writeToDataFilesMicros / 1000) <<
- "remapPrivateView" << (unsigned) (_remapPrivateViewMicros / 1000) <<
- "commits" << (unsigned)(_commitsMicros / 1000) <<
- "commitsInWriteLock"
- << (unsigned)(_commitsInWriteLockMicros / 1000));
-
- if (mmapv1GlobalOptions.journalCommitInterval != 0) {
- b << "journalCommitIntervalMs" << mmapv1GlobalOptions.journalCommitInterval;
- }
- }
+//
+// DurableInterface
+//
- //
- // DurableInterface
- //
+DurableInterface::DurableInterface() {}
- DurableInterface::DurableInterface() {
+DurableInterface::~DurableInterface() {}
- }
- DurableInterface::~DurableInterface() {
+//
+// DurableImpl
+//
- }
+bool DurableImpl::commitNow(OperationContext* txn) {
+ NotifyAll::When when = commitNotify.now();
+ AutoYieldFlushLockForMMAPV1Commit flushLockYield(txn->lockState());
- //
- // DurableImpl
- //
+ // There is always just one waiting anyways
+ flushRequested.notify_one();
- bool DurableImpl::commitNow(OperationContext* txn) {
- NotifyAll::When when = commitNotify.now();
+ // commitNotify.waitFor ensures that whatever was scheduled for journaling before this
+ // call has been persisted to the journal file. This does not mean that this data has been
+ // applied to the shared view yet though, that's why we wait for applyToDataFilesNotify.
+ applyToDataFilesNotify.waitFor(when);
- AutoYieldFlushLockForMMAPV1Commit flushLockYield(txn->lockState());
+ return true;
+}
- // There is always just one waiting anyways
- flushRequested.notify_one();
+bool DurableImpl::waitUntilDurable() {
+ commitNotify.awaitBeyondNow();
+ return true;
+}
- // commitNotify.waitFor ensures that whatever was scheduled for journaling before this
- // call has been persisted to the journal file. This does not mean that this data has been
- // applied to the shared view yet though, that's why we wait for applyToDataFilesNotify.
- applyToDataFilesNotify.waitFor(when);
+void DurableImpl::createdFile(const std::string& filename, unsigned long long len) {
+ std::shared_ptr<DurOp> op(new FileCreatedOp(filename, len));
+ commitJob.noteOp(op);
+}
- return true;
- }
- bool DurableImpl::waitUntilDurable() {
- commitNotify.awaitBeyondNow();
- return true;
+void DurableImpl::declareWriteIntents(const std::vector<std::pair<void*, unsigned>>& intents) {
+ typedef std::vector<std::pair<void*, unsigned>> Intents;
+ stdx::lock_guard<SimpleMutex> lk(commitJob.groupCommitMutex);
+ for (Intents::const_iterator it(intents.begin()), end(intents.end()); it != end; ++it) {
+ commitJob.note(it->first, it->second);
}
+}
- void DurableImpl::createdFile(const std::string& filename, unsigned long long len) {
- std::shared_ptr<DurOp> op(new FileCreatedOp(filename, len));
- commitJob.noteOp(op);
+bool DurableImpl::commitIfNeeded() {
+ if (MONGO_likely(commitJob.bytes() < UncommittedBytesLimit)) {
+ return false;
}
+ // Just wake up the flush thread
+ flushRequested.notify_one();
+ return true;
+}
- void DurableImpl::declareWriteIntents(
- const std::vector<std::pair<void*, unsigned> >& intents) {
- typedef std::vector<std::pair<void*, unsigned> > Intents;
- stdx::lock_guard<SimpleMutex> lk(commitJob.groupCommitMutex);
- for (Intents::const_iterator it(intents.begin()), end(intents.end()); it != end; ++it) {
- commitJob.note(it->first, it->second);
- }
- }
-
- bool DurableImpl::commitIfNeeded() {
- if (MONGO_likely(commitJob.bytes() < UncommittedBytesLimit)) {
- return false;
- }
+void DurableImpl::syncDataAndTruncateJournal(OperationContext* txn) {
+ invariant(txn->lockState()->isW());
- // Just wake up the flush thread
- flushRequested.notify_one();
- return true;
- }
+ // Once this returns, all the outstanding journal has been applied to the data files and
+ // so it's safe to do the flushAll/journalCleanup below.
+ commitNow(txn);
- void DurableImpl::syncDataAndTruncateJournal(OperationContext* txn) {
- invariant(txn->lockState()->isW());
+ // Flush the shared view to disk.
+ MongoFile::flushAll(true);
- // Once this returns, all the outstanding journal has been applied to the data files and
- // so it's safe to do the flushAll/journalCleanup below.
- commitNow(txn);
+ // Once the shared view has been flushed, we do not need the journal files anymore.
+ journalCleanup(true);
- // Flush the shared view to disk.
- MongoFile::flushAll(true);
+ // Double check post-conditions
+ invariant(!haveJournalFiles());
+}
- // Once the shared view has been flushed, we do not need the journal files anymore.
- journalCleanup(true);
+void DurableImpl::closingFileNotification() {
+ if (commitJob.hasWritten()) {
+ severe() << "journal warning files are closing outside locks with writes pending";
- // Double check post-conditions
- invariant(!haveJournalFiles());
+ // File is closing while there are unwritten changes
+ invariant(false);
}
+}
- void DurableImpl::closingFileNotification() {
- if (commitJob.hasWritten()) {
- severe() << "journal warning files are closing outside locks with writes pending";
+void DurableImpl::commitAndStopDurThread() {
+ NotifyAll::When when = commitNotify.now();
- // File is closing while there are unwritten changes
- invariant(false);
- }
- }
+ // There is always just one waiting anyways
+ flushRequested.notify_one();
- void DurableImpl::commitAndStopDurThread() {
- NotifyAll::When when = commitNotify.now();
+ // commitNotify.waitFor ensures that whatever was scheduled for journaling before this
+ // call has been persisted to the journal file. This does not mean that this data has been
+ // applied to the shared view yet though, that's why we wait for applyToDataFilesNotify.
+ applyToDataFilesNotify.waitFor(when);
- // There is always just one waiting anyways
- flushRequested.notify_one();
+ // Flush the shared view to disk.
+ MongoFile::flushAll(true);
- // commitNotify.waitFor ensures that whatever was scheduled for journaling before this
- // call has been persisted to the journal file. This does not mean that this data has been
- // applied to the shared view yet though, that's why we wait for applyToDataFilesNotify.
- applyToDataFilesNotify.waitFor(when);
+ // Once the shared view has been flushed, we do not need the journal files anymore.
+ journalCleanup(true);
- // Flush the shared view to disk.
- MongoFile::flushAll(true);
+ // Double check post-conditions
+ invariant(!haveJournalFiles());
- // Once the shared view has been flushed, we do not need the journal files anymore.
- journalCleanup(true);
+ shutdownRequested.store(1);
- // Double check post-conditions
- invariant(!haveJournalFiles());
+ // Wait for the durability thread to terminate
+ log() << "Terminating durability thread ...";
+ _durThreadHandle.join();
+}
- shutdownRequested.store(1);
+void DurableImpl::start() {
+ // Start the durability thread
+ stdx::thread t(durThread);
+ _durThreadHandle.swap(t);
+}
- // Wait for the durability thread to terminate
- log() << "Terminating durability thread ...";
- _durThreadHandle.join();
- }
- void DurableImpl::start() {
- // Start the durability thread
- stdx::thread t(durThread);
- _durThreadHandle.swap(t);
+/**
+ * Remaps the private view from the shared view so that it does not consume too much
+ * copy-on-write/swap space. Must only be called after the in-memory journal has been flushed
+ * to disk and applied on top of the shared view.
+ *
+ * @param fraction Value between (0, 1] indicating what fraction of the memory to remap.
+ * Remapping too much or too frequently incurs copy-on-write page fault cost.
+ */
+static void remapPrivateView(double fraction) {
+ // Remapping private views must occur after WRITETODATAFILES otherwise we wouldn't see any
+ // newly written data on reads.
+ invariant(!commitJob.hasWritten());
+
+ try {
+ Timer t;
+ remapPrivateViewImpl(fraction);
+ stats.curr()->_remapPrivateViewMicros += t.micros();
+
+ LOG(4) << "remapPrivateView end";
+ return;
+ } catch (DBException& e) {
+ severe() << "dbexception in remapPrivateView causing immediate shutdown: " << e.toString();
+ } catch (std::ios_base::failure& e) {
+ severe() << "ios_base exception in remapPrivateView causing immediate shutdown: "
+ << e.what();
+ } catch (std::bad_alloc& e) {
+ severe() << "bad_alloc exception in remapPrivateView causing immediate shutdown: "
+ << e.what();
+ } catch (std::exception& e) {
+ severe() << "exception in remapPrivateView causing immediate shutdown: " << e.what();
+ } catch (...) {
+ severe() << "unknown exception in remapPrivateView causing immediate shutdown: ";
}
+ invariant(false);
+}
- /**
- * Remaps the private view from the shared view so that it does not consume too much
- * copy-on-write/swap space. Must only be called after the in-memory journal has been flushed
- * to disk and applied on top of the shared view.
- *
- * @param fraction Value between (0, 1] indicating what fraction of the memory to remap.
- * Remapping too much or too frequently incurs copy-on-write page fault cost.
- */
- static void remapPrivateView(double fraction) {
- // Remapping private views must occur after WRITETODATAFILES otherwise we wouldn't see any
- // newly written data on reads.
- invariant(!commitJob.hasWritten());
- try {
- Timer t;
- remapPrivateViewImpl(fraction);
- stats.curr()->_remapPrivateViewMicros += t.micros();
+/**
+ * The main durability thread loop. There is a single instance of this function running.
+ */
+static void durThread() {
+ Client::initThread("durability");
- LOG(4) << "remapPrivateView end";
- return;
- }
- catch (DBException& e) {
- severe() << "dbexception in remapPrivateView causing immediate shutdown: "
- << e.toString();
- }
- catch (std::ios_base::failure& e) {
- severe() << "ios_base exception in remapPrivateView causing immediate shutdown: "
- << e.what();
- }
- catch (std::bad_alloc& e) {
- severe() << "bad_alloc exception in remapPrivateView causing immediate shutdown: "
- << e.what();
- }
- catch (std::exception& e) {
- severe() << "exception in remapPrivateView causing immediate shutdown: "
- << e.what();
- }
- catch (...) {
- severe() << "unknown exception in remapPrivateView causing immediate shutdown: ";
- }
+ log() << "Durability thread started";
- invariant(false);
+ bool samePartition = true;
+ try {
+ const std::string dbpathDir = boost::filesystem::path(storageGlobalParams.dbpath).string();
+ samePartition = onSamePartition(getJournalDir().string(), dbpathDir);
+ } catch (...) {
}
+ // Spawn the journal writer thread
+ JournalWriter journalWriter(&commitNotify, &applyToDataFilesNotify, NumAsyncJournalWrites);
+ journalWriter.start();
- /**
- * The main durability thread loop. There is a single instance of this function running.
- */
- static void durThread() {
- Client::initThread("durability");
+ // Used as an estimate of how much / how fast to remap
+ uint64_t commitCounter(0);
+ uint64_t estimatedPrivateMapSize(0);
+ uint64_t remapLastTimestamp(0);
- log() << "Durability thread started";
-
- bool samePartition = true;
- try {
- const std::string dbpathDir =
- boost::filesystem::path(storageGlobalParams.dbpath).string();
- samePartition = onSamePartition(getJournalDir().string(), dbpathDir);
+ while (shutdownRequested.loadRelaxed() == 0) {
+ unsigned ms = mmapv1GlobalOptions.journalCommitInterval;
+ if (ms == 0) {
+ ms = samePartition ? 100 : 30;
}
- catch(...) {
- }
+ // +1 so it never goes down to zero
+ const unsigned oneThird = (ms / 3) + 1;
- // Spawn the journal writer thread
- JournalWriter journalWriter(&commitNotify, &applyToDataFilesNotify, NumAsyncJournalWrites);
- journalWriter.start();
+ // Reset the stats based on the reset interval
+ if (stats.curr()->getCurrentDurationMillis() > DurStatsResetIntervalMillis) {
+ stats.reset();
+ }
- // Used as an estimate of how much / how fast to remap
- uint64_t commitCounter(0);
- uint64_t estimatedPrivateMapSize(0);
- uint64_t remapLastTimestamp(0);
+ try {
+ stdx::unique_lock<stdx::mutex> lock(flushMutex);
- while (shutdownRequested.loadRelaxed() == 0) {
- unsigned ms = mmapv1GlobalOptions.journalCommitInterval;
- if (ms == 0) {
- ms = samePartition ? 100 : 30;
- }
+ for (unsigned i = 0; i <= 2; i++) {
+ if (boost::cv_status::no_timeout ==
+ flushRequested.wait_for(lock, Milliseconds(oneThird))) {
+ // Someone forced a flush
+ break;
+ }
- // +1 so it never goes down to zero
- const unsigned oneThird = (ms / 3) + 1;
+ if (commitNotify.nWaiting()) {
+ // One or more getLastError j:true is pending
+ break;
+ }
- // Reset the stats based on the reset interval
- if (stats.curr()->getCurrentDurationMillis() > DurStatsResetIntervalMillis) {
- stats.reset();
+ if (commitJob.bytes() > UncommittedBytesLimit / 2) {
+ // The number of written bytes is growing
+ break;
+ }
}
- try {
- stdx::unique_lock<stdx::mutex> lock(flushMutex);
+ // The commit logic itself
+ LOG(4) << "groupCommit begin";
- for (unsigned i = 0; i <= 2; i++) {
- if (boost::cv_status::no_timeout == flushRequested.wait_for(
- lock, Milliseconds(oneThird))) {
- // Someone forced a flush
- break;
- }
+ Timer t;
- if (commitNotify.nWaiting()) {
- // One or more getLastError j:true is pending
- break;
+ OperationContextImpl txn;
+ AutoAcquireFlushLockForMMAPV1Commit autoFlushLock(txn.lockState());
+
+ // We need to snapshot the commitNumber after the flush lock has been obtained,
+ // because at this point we know that we have a stable snapshot of the data.
+ const NotifyAll::When commitNumber(commitNotify.now());
+
+ LOG(4) << "Processing commit number " << commitNumber;
+
+ if (!commitJob.hasWritten()) {
+ // We do not need the journal lock anymore. Free it here, for the really
+ // unlikely possibility that the writeBuffer command below blocks.
+ autoFlushLock.release();
+
+ // getlasterror request could have came after the data was already committed.
+ // No need to call committingReset though, because we have not done any
+ // writes (hasWritten == false).
+ JournalWriter::Buffer* const buffer = journalWriter.newBuffer();
+ buffer->setNoop();
+
+ journalWriter.writeBuffer(buffer, commitNumber);
+ } else {
+ // This copies all the in-memory changes into the journal writer's buffer.
+ JournalWriter::Buffer* const buffer = journalWriter.newBuffer();
+ PREPLOGBUFFER(buffer->getHeader(), buffer->getBuilder());
+
+ estimatedPrivateMapSize += commitJob.bytes();
+ commitCounter++;
+
+ // Now that the write intents have been copied to the buffer, the commit job is
+ // free to be reused. We need to reset the commit job's contents while under
+ // the S flush lock, because otherwise someone might have done a write and this
+ // would wipe out their changes without ever being committed.
+ commitJob.committingReset();
+
+ double systemMemoryPressurePercentage =
+ ProcessInfo::getSystemMemoryPressurePercentage();
+
+ // Now that the in-memory modifications have been collected, we can potentially
+ // release the flush lock if remap is not necessary.
+ // When we remap due to memory pressure, we look at two criteria
+ // 1. If the amount of 4k pages touched exceeds 512 MB,
+ // a reasonable estimate of memory pressure on Linux.
+ // 2. Check if the amount of free memory on the machine is running low,
+ // since #1 is underestimates the memory pressure on Windows since
+ // commits in 64MB chunks.
+ const bool shouldRemap = (estimatedPrivateMapSize >= UncommittedBytesLimit) ||
+ (systemMemoryPressurePercentage > 0.0) ||
+ (commitCounter % NumCommitsBeforeRemap == 0) ||
+ (mmapv1GlobalOptions.journalOptions & MMAPV1Options::JournalAlwaysRemap);
+
+ double remapFraction = 0.0;
+
+ if (shouldRemap) {
+ // We want to remap all private views about every 2 seconds. There could be
+ // ~1000 views so we do a little each pass. There will be copy on write
+ // faults after remapping, so doing a little bit at a time will avoid big
+ // load spikes when the pages are touched.
+ //
+ // TODO: Instead of the time-based logic above, consider using ProcessInfo
+ // and watching for getResidentSize to drop, which is more precise.
+ remapFraction = (curTimeMicros64() - remapLastTimestamp) / 2000000.0;
+
+ if (mmapv1GlobalOptions.journalOptions & MMAPV1Options::JournalAlwaysRemap) {
+ remapFraction = 1;
+ } else {
+ // We don't want to get close to the UncommittedBytesLimit
+ const double remapMemFraction =
+ estimatedPrivateMapSize / ((double)UncommittedBytesLimit);
+
+ remapFraction = std::max(remapMemFraction, remapFraction);
+
+ remapFraction = std::max(systemMemoryPressurePercentage, remapFraction);
}
+ } else {
+ LOG(4) << "Early release flush lock";
- if (commitJob.bytes() > UncommittedBytesLimit / 2) {
- // The number of written bytes is growing
- break;
- }
+ // We will not be doing a remap so drop the flush lock. That way we will be
+ // doing the journal I/O outside of lock, so other threads can proceed.
+ invariant(!shouldRemap);
+ autoFlushLock.release();
}
- // The commit logic itself
- LOG(4) << "groupCommit begin";
+ // Request async I/O to the journal. This may block.
+ journalWriter.writeBuffer(buffer, commitNumber);
+
+ // Data has now been written to the shared view. If remap was requested, we
+ // would still be holding the S flush lock here, so just upgrade it and
+ // perform the remap.
+ if (shouldRemap) {
+ // Need to wait for the previously scheduled journal writes to complete
+ // before any remap is attempted.
+ journalWriter.flush();
+ journalWriter.assertIdle();
+
+ // Upgrading the journal lock to flush stops all activity on the system,
+ // because we will be remapping memory and we don't want readers to be
+ // accessing it. Technically this step could be avoided on systems, which
+ // support atomic remap.
+ autoFlushLock.upgradeFlushLockToExclusive();
+ remapPrivateView(remapFraction);
- Timer t;
-
- OperationContextImpl txn;
- AutoAcquireFlushLockForMMAPV1Commit autoFlushLock(txn.lockState());
-
- // We need to snapshot the commitNumber after the flush lock has been obtained,
- // because at this point we know that we have a stable snapshot of the data.
- const NotifyAll::When commitNumber(commitNotify.now());
-
- LOG(4) << "Processing commit number " << commitNumber;
-
- if (!commitJob.hasWritten()) {
- // We do not need the journal lock anymore. Free it here, for the really
- // unlikely possibility that the writeBuffer command below blocks.
autoFlushLock.release();
- // getlasterror request could have came after the data was already committed.
- // No need to call committingReset though, because we have not done any
- // writes (hasWritten == false).
- JournalWriter::Buffer* const buffer = journalWriter.newBuffer();
- buffer->setNoop();
-
- journalWriter.writeBuffer(buffer, commitNumber);
- }
- else {
- // This copies all the in-memory changes into the journal writer's buffer.
- JournalWriter::Buffer* const buffer = journalWriter.newBuffer();
- PREPLOGBUFFER(buffer->getHeader(), buffer->getBuilder());
-
- estimatedPrivateMapSize += commitJob.bytes();
- commitCounter++;
-
- // Now that the write intents have been copied to the buffer, the commit job is
- // free to be reused. We need to reset the commit job's contents while under
- // the S flush lock, because otherwise someone might have done a write and this
- // would wipe out their changes without ever being committed.
- commitJob.committingReset();
-
- double systemMemoryPressurePercentage =
- ProcessInfo::getSystemMemoryPressurePercentage();
-
- // Now that the in-memory modifications have been collected, we can potentially
- // release the flush lock if remap is not necessary.
- // When we remap due to memory pressure, we look at two criteria
- // 1. If the amount of 4k pages touched exceeds 512 MB,
- // a reasonable estimate of memory pressure on Linux.
- // 2. Check if the amount of free memory on the machine is running low,
- // since #1 is underestimates the memory pressure on Windows since
- // commits in 64MB chunks.
- const bool shouldRemap =
- (estimatedPrivateMapSize >= UncommittedBytesLimit) ||
- (systemMemoryPressurePercentage > 0.0) ||
- (commitCounter % NumCommitsBeforeRemap == 0) ||
- (mmapv1GlobalOptions.journalOptions & MMAPV1Options::JournalAlwaysRemap);
-
- double remapFraction = 0.0;
-
- if (shouldRemap) {
- // We want to remap all private views about every 2 seconds. There could be
- // ~1000 views so we do a little each pass. There will be copy on write
- // faults after remapping, so doing a little bit at a time will avoid big
- // load spikes when the pages are touched.
- //
- // TODO: Instead of the time-based logic above, consider using ProcessInfo
- // and watching for getResidentSize to drop, which is more precise.
- remapFraction = (curTimeMicros64() - remapLastTimestamp) / 2000000.0;
-
- if (mmapv1GlobalOptions.journalOptions &
- MMAPV1Options::JournalAlwaysRemap) {
- remapFraction = 1;
- }
- else {
- // We don't want to get close to the UncommittedBytesLimit
- const double remapMemFraction =
- estimatedPrivateMapSize / ((double)UncommittedBytesLimit);
-
- remapFraction = std::max(remapMemFraction, remapFraction);
-
- remapFraction = std::max(systemMemoryPressurePercentage, remapFraction);
- }
- }
- else {
- LOG(4) << "Early release flush lock";
-
- // We will not be doing a remap so drop the flush lock. That way we will be
- // doing the journal I/O outside of lock, so other threads can proceed.
- invariant(!shouldRemap);
- autoFlushLock.release();
- }
+ // Reset the private map estimate outside of the lock
+ estimatedPrivateMapSize = 0;
+ remapLastTimestamp = curTimeMicros64();
- // Request async I/O to the journal. This may block.
- journalWriter.writeBuffer(buffer, commitNumber);
-
- // Data has now been written to the shared view. If remap was requested, we
- // would still be holding the S flush lock here, so just upgrade it and
- // perform the remap.
- if (shouldRemap) {
- // Need to wait for the previously scheduled journal writes to complete
- // before any remap is attempted.
- journalWriter.flush();
- journalWriter.assertIdle();
-
- // Upgrading the journal lock to flush stops all activity on the system,
- // because we will be remapping memory and we don't want readers to be
- // accessing it. Technically this step could be avoided on systems, which
- // support atomic remap.
- autoFlushLock.upgradeFlushLockToExclusive();
- remapPrivateView(remapFraction);
-
- autoFlushLock.release();
-
- // Reset the private map estimate outside of the lock
- estimatedPrivateMapSize = 0;
- remapLastTimestamp = curTimeMicros64();
-
- stats.curr()->_commitsInWriteLock++;
- stats.curr()->_commitsInWriteLockMicros += t.micros();
- }
+ stats.curr()->_commitsInWriteLock++;
+ stats.curr()->_commitsInWriteLockMicros += t.micros();
}
-
- stats.curr()->_commits++;
- stats.curr()->_commitsMicros += t.micros();
-
- LOG(4) << "groupCommit end";
- }
- catch (DBException& e) {
- severe() << "dbexception in durThread causing immediate shutdown: "
- << e.toString();
- invariant(false);
- }
- catch (std::ios_base::failure& e) {
- severe() << "ios_base exception in durThread causing immediate shutdown: "
- << e.what();
- invariant(false);
}
- catch (std::bad_alloc& e) {
- severe() << "bad_alloc exception in durThread causing immediate shutdown: "
- << e.what();
- invariant(false);
- }
- catch (std::exception& e) {
- severe() << "exception in durThread causing immediate shutdown: "
- << e.what();
- invariant(false);
- }
- catch (...) {
- severe() << "unhandled exception in durThread causing immediate shutdown";
- invariant(false);
- }
- }
- // Stops the journal thread and ensures everything was written
- invariant(!commitJob.hasWritten());
+ stats.curr()->_commits++;
+ stats.curr()->_commitsMicros += t.micros();
- journalWriter.flush();
- journalWriter.shutdown();
-
- log() << "Durability thread stopped";
+ LOG(4) << "groupCommit end";
+ } catch (DBException& e) {
+ severe() << "dbexception in durThread causing immediate shutdown: " << e.toString();
+ invariant(false);
+ } catch (std::ios_base::failure& e) {
+ severe() << "ios_base exception in durThread causing immediate shutdown: " << e.what();
+ invariant(false);
+ } catch (std::bad_alloc& e) {
+ severe() << "bad_alloc exception in durThread causing immediate shutdown: " << e.what();
+ invariant(false);
+ } catch (std::exception& e) {
+ severe() << "exception in durThread causing immediate shutdown: " << e.what();
+ invariant(false);
+ } catch (...) {
+ severe() << "unhandled exception in durThread causing immediate shutdown";
+ invariant(false);
+ }
}
+ // Stops the journal thread and ensures everything was written
+ invariant(!commitJob.hasWritten());
- /**
- * Invoked at server startup. Recovers the database by replaying journal files and then
- * starts the durability thread.
- */
- void startup() {
- if (!storageGlobalParams.dur) {
- return;
- }
+ journalWriter.flush();
+ journalWriter.shutdown();
- journalMakeDir();
+ log() << "Durability thread stopped";
+}
- try {
- replayJournalFilesAtStartup();
- }
- catch (DBException& e) {
- severe() << "dbexception during recovery: " << e.toString();
- throw;
- }
- catch (std::exception& e) {
- severe() << "std::exception during recovery: " << e.what();
- throw;
- }
- catch (...) {
- severe() << "exception during recovery";
- throw;
- }
- preallocateFiles();
+/**
+ * Invoked at server startup. Recovers the database by replaying journal files and then
+ * starts the durability thread.
+ */
+void startup() {
+ if (!storageGlobalParams.dur) {
+ return;
+ }
- durableImpl.start();
- DurableInterface::_impl = &durableImpl;
+ journalMakeDir();
+
+ try {
+ replayJournalFilesAtStartup();
+ } catch (DBException& e) {
+ severe() << "dbexception during recovery: " << e.toString();
+ throw;
+ } catch (std::exception& e) {
+ severe() << "std::exception during recovery: " << e.what();
+ throw;
+ } catch (...) {
+ severe() << "exception during recovery";
+ throw;
}
-} // namespace dur
-} // namespace mongo
+ preallocateFiles();
+
+ durableImpl.start();
+ DurableInterface::_impl = &durableImpl;
+}
+
+} // namespace dur
+} // namespace mongo
diff --git a/src/mongo/db/storage/mmap_v1/dur.h b/src/mongo/db/storage/mmap_v1/dur.h
index 2915ece1439..7cfd46fada3 100644
--- a/src/mongo/db/storage/mmap_v1/dur.h
+++ b/src/mongo/db/storage/mmap_v1/dur.h
@@ -35,126 +35,130 @@
namespace mongo {
- class OperationContext;
+class OperationContext;
namespace dur {
- // a smaller limit is likely better on 32 bit
- const unsigned UncommittedBytesLimit = (sizeof(void*) == 4) ? 50 * 1024 * 1024 : 512 * 1024 * 1024;
+// a smaller limit is likely better on 32 bit
+const unsigned UncommittedBytesLimit = (sizeof(void*) == 4) ? 50 * 1024 * 1024 : 512 * 1024 * 1024;
- class DurableInterface {
- MONGO_DISALLOW_COPYING(DurableInterface);
- public:
- virtual ~DurableInterface();
+class DurableInterface {
+ MONGO_DISALLOW_COPYING(DurableInterface);
- /**
- * Declare that a file has been created. Normally writes are applied only after journaling
- * for safety. But here the file is created first, and the journal will just replay the
- * creation if the create didn't happen due to a crash.
- */
- virtual void createdFile(const std::string& filename, unsigned long long len) = 0;
+public:
+ virtual ~DurableInterface();
- // Declare write intents. Use these methods to declare "i'm about to write to x and it
- // should be logged for redo."
- //
- // Failure to call declare write intents is checked in MONGO_CONFIG_DEBUG_BUILD mode by
- // using a read only mapped view (i.e., you'll segfault if the code is covered in that
- // situation). The debug check doesn't verify that your length is correct though.
- virtual void declareWriteIntents(
- const std::vector<std::pair<void*, unsigned> >& intents) = 0;
+ /**
+ * Declare that a file has been created. Normally writes are applied only after journaling
+ * for safety. But here the file is created first, and the journal will just replay the
+ * creation if the create didn't happen due to a crash.
+ */
+ virtual void createdFile(const std::string& filename, unsigned long long len) = 0;
- /** Wait for acknowledgement of the next group commit.
- @return true if --dur is on. There will be delay.
- @return false if --dur is off.
- */
- virtual bool waitUntilDurable() = 0;
+ // Declare write intents. Use these methods to declare "i'm about to write to x and it
+ // should be logged for redo."
+ //
+ // Failure to call declare write intents is checked in MONGO_CONFIG_DEBUG_BUILD mode by
+ // using a read only mapped view (i.e., you'll segfault if the code is covered in that
+ // situation). The debug check doesn't verify that your length is correct though.
+ virtual void declareWriteIntents(const std::vector<std::pair<void*, unsigned>>& intents) = 0;
- /** Commit immediately.
+ /** Wait for acknowledgement of the next group commit.
+ @return true if --dur is on. There will be delay.
+ @return false if --dur is off.
+ */
+ virtual bool waitUntilDurable() = 0;
- Generally, you do not want to do this often, as highly granular committing may affect
- performance.
+ /** Commit immediately.
- Does not return until the commit is complete.
+ Generally, you do not want to do this often, as highly granular committing may affect
+ performance.
- You must be at least read locked when you call this. Ideally, you are not write locked
- and then read operations can occur concurrently.
+ Does not return until the commit is complete.
- Do not use this. Use commitIfNeeded() instead.
+ You must be at least read locked when you call this. Ideally, you are not write locked
+ and then read operations can occur concurrently.
- @return true if --dur is on.
- @return false if --dur is off. (in which case there is action)
- */
- virtual bool commitNow(OperationContext* txn) = 0;
+ Do not use this. Use commitIfNeeded() instead.
- /** Commit if enough bytes have been modified. Current threshold is 50MB
+ @return true if --dur is on.
+ @return false if --dur is off. (in which case there is action)
+ */
+ virtual bool commitNow(OperationContext* txn) = 0;
- The idea is that long running write operations that don't yield
- (like creating an index or update with $atomic) can call this
- whenever the db is in a sane state and it will prevent commits
- from growing too large.
- @return true if commited
- */
- virtual bool commitIfNeeded() = 0;
+ /** Commit if enough bytes have been modified. Current threshold is 50MB
+ The idea is that long running write operations that don't yield
+ (like creating an index or update with $atomic) can call this
+ whenever the db is in a sane state and it will prevent commits
+ from growing too large.
+ @return true if commited
+ */
+ virtual bool commitIfNeeded() = 0;
- /**
- * Called when a DurableMappedFile is closing. Asserts that there are no unwritten changes,
- * because that would mean journal replay on recovery would try to write to non-existent
- * files and fail.
- */
- virtual void closingFileNotification() = 0;
- /**
- * Invoked at clean shutdown time. Performs one last commit/flush and terminates the
- * flush thread.
- *
- * Must be called under the global X lock.
- */
- virtual void commitAndStopDurThread() = 0;
+ /**
+ * Called when a DurableMappedFile is closing. Asserts that there are no unwritten changes,
+ * because that would mean journal replay on recovery would try to write to non-existent
+ * files and fail.
+ */
+ virtual void closingFileNotification() = 0;
- /**
- * Commits pending changes, flushes all changes to main data files, then removes the
- * journal.
- *
- * WARNING: Data *must* be in a crash-recoverable state when this is called and must
- * not be inside of a write unit of work.
- *
- * This is useful as a "barrier" to ensure that writes before this call will never go
- * through recovery and be applied to files that have had changes made after this call
- * applied.
- */
- virtual void syncDataAndTruncateJournal(OperationContext* txn) = 0;
+ /**
+ * Invoked at clean shutdown time. Performs one last commit/flush and terminates the
+ * flush thread.
+ *
+ * Must be called under the global X lock.
+ */
+ virtual void commitAndStopDurThread() = 0;
- virtual bool isDurable() const = 0;
+ /**
+ * Commits pending changes, flushes all changes to main data files, then removes the
+ * journal.
+ *
+ * WARNING: Data *must* be in a crash-recoverable state when this is called and must
+ * not be inside of a write unit of work.
+ *
+ * This is useful as a "barrier" to ensure that writes before this call will never go
+ * through recovery and be applied to files that have had changes made after this call
+ * applied.
+ */
+ virtual void syncDataAndTruncateJournal(OperationContext* txn) = 0;
- static DurableInterface& getDur() { return *_impl; }
+ virtual bool isDurable() const = 0;
- protected:
- DurableInterface();
+ static DurableInterface& getDur() {
+ return *_impl;
+ }
- private:
- friend void startup();
+protected:
+ DurableInterface();
- static DurableInterface* _impl;
- };
+private:
+ friend void startup();
+ static DurableInterface* _impl;
+};
- /**
- * Called during startup to startup the durability module.
- * Does nothing if storageGlobalParams.dur is false
- */
- void startup();
-} // namespace dur
+/**
+ * Called during startup to startup the durability module.
+ * Does nothing if storageGlobalParams.dur is false
+ */
+void startup();
+} // namespace dur
- /**
- * Provides a reference to the active durability interface.
- *
- * TODO: The only reason this is an inline function is that tests try to link it and fail if
- * the MMAP V1 engine is not included.
- */
- inline dur::DurableInterface& getDur() { return dur::DurableInterface::getDur(); }
-} // namespace mongo
+/**
+ * Provides a reference to the active durability interface.
+ *
+ * TODO: The only reason this is an inline function is that tests try to link it and fail if
+ * the MMAP V1 engine is not included.
+ */
+inline dur::DurableInterface& getDur() {
+ return dur::DurableInterface::getDur();
+}
+
+} // namespace mongo
diff --git a/src/mongo/db/storage/mmap_v1/dur_commitjob.cpp b/src/mongo/db/storage/mmap_v1/dur_commitjob.cpp
index 27e7681b17c..aff01c1c7bf 100644
--- a/src/mongo/db/storage/mmap_v1/dur_commitjob.cpp
+++ b/src/mongo/db/storage/mmap_v1/dur_commitjob.cpp
@@ -44,83 +44,76 @@
namespace mongo {
- using std::shared_ptr;
- using std::endl;
- using std::max;
- using std::min;
+using std::shared_ptr;
+using std::endl;
+using std::max;
+using std::min;
namespace dur {
- void WriteIntent::absorb(const WriteIntent& other) {
- dassert(overlaps(other));
+void WriteIntent::absorb(const WriteIntent& other) {
+ dassert(overlaps(other));
- void* newStart = min(start(), other.start());
- p = max(p, other.p);
- len = (char*)p - (char*)newStart;
+ void* newStart = min(start(), other.start());
+ p = max(p, other.p);
+ len = (char*)p - (char*)newStart;
- dassert(contains(other));
- }
+ dassert(contains(other));
+}
- CommitJob::CommitJob() :
- _hasWritten(false),
- _lastNotedPos(0),
- _bytes(0) {
+CommitJob::CommitJob() : _hasWritten(false), _lastNotedPos(0), _bytes(0) {}
- }
+CommitJob::~CommitJob() {}
- CommitJob::~CommitJob() {
+void CommitJob::noteOp(shared_ptr<DurOp> p) {
+ stdx::lock_guard<SimpleMutex> lk(groupCommitMutex);
+ _hasWritten = true;
+ _durOps.push_back(p);
+}
- }
-
- void CommitJob::noteOp(shared_ptr<DurOp> p) {
- stdx::lock_guard<SimpleMutex> lk(groupCommitMutex);
- _hasWritten = true;
- _durOps.push_back(p);
- }
+void CommitJob::note(void* p, int len) {
+ _hasWritten = true;
- void CommitJob::note(void* p, int len) {
- _hasWritten = true;
+ if (!_alreadyNoted.checkAndSet(p, len)) {
+ // Remember intent. We will journal it in a bit.
+ _insertWriteIntent(p, len);
- if (!_alreadyNoted.checkAndSet(p, len)) {
- // Remember intent. We will journal it in a bit.
- _insertWriteIntent(p, len);
+ // Round off to page address (4KB).
+ const size_t x = ((size_t)p) & ~0xfff;
- // Round off to page address (4KB).
- const size_t x = ((size_t)p) & ~0xfff;
+ if (x != _lastNotedPos) {
+ _lastNotedPos = x;
- if (x != _lastNotedPos) {
- _lastNotedPos = x;
+ // Add the full page amount
+ _bytes += (len + 4095) & ~0xfff;
- // Add the full page amount
- _bytes += (len + 4095) & ~0xfff;
+ if (_bytes > UncommittedBytesLimit * 3) {
+ _complains++;
- if (_bytes > UncommittedBytesLimit * 3) {
- _complains++;
+ // Throttle logging
+ if (_complains < 100 || (curTimeMillis64() - _lastComplainMs >= 60000)) {
+ _lastComplainMs = curTimeMillis64();
- // Throttle logging
- if (_complains < 100 || (curTimeMillis64() - _lastComplainMs >= 60000)) {
- _lastComplainMs = curTimeMillis64();
+ warning() << "DR102 too much data written uncommitted (" << _bytes / 1000000.0
+ << "MB)";
- warning() << "DR102 too much data written uncommitted ("
- << _bytes / 1000000.0 << "MB)";
-
- if (_complains < 10 || _complains % 10 == 0) {
- printStackTrace();
- }
+ if (_complains < 10 || _complains % 10 == 0) {
+ printStackTrace();
}
}
}
}
}
-
- void CommitJob::committingReset() {
- _hasWritten = false;
- _alreadyNoted.clear();
- _intents.clear();
- _durOps.clear();
- _bytes = 0;
- }
-
-} // namespace "dur"
-} // namespace "mongo"
+}
+
+void CommitJob::committingReset() {
+ _hasWritten = false;
+ _alreadyNoted.clear();
+ _intents.clear();
+ _durOps.clear();
+ _bytes = 0;
+}
+
+} // namespace "dur"
+} // namespace "mongo"
diff --git a/src/mongo/db/storage/mmap_v1/dur_commitjob.h b/src/mongo/db/storage/mmap_v1/dur_commitjob.h
index b2d07c3b293..8261b613c57 100644
--- a/src/mongo/db/storage/mmap_v1/dur_commitjob.h
+++ b/src/mongo/db/storage/mmap_v1/dur_commitjob.h
@@ -35,179 +35,191 @@
namespace mongo {
namespace dur {
- typedef std::vector<std::shared_ptr<DurOp> > DurOpsVector;
+typedef std::vector<std::shared_ptr<DurOp>> DurOpsVector;
- /**
- * Declaration of an intent to write to a region of a memory mapped view. We store the end
- * rather than the start pointer to make operator < faster since that is heavily used in
- * set lookup.
- */
- struct WriteIntent {
- WriteIntent() : p(0) { }
- WriteIntent(void *a, unsigned b) : p((char*)a + b), len(b) { }
+/**
+ * Declaration of an intent to write to a region of a memory mapped view. We store the end
+ * rather than the start pointer to make operator < faster since that is heavily used in
+ * set lookup.
+ */
+struct WriteIntent {
+ WriteIntent() : p(0) {}
+ WriteIntent(void* a, unsigned b) : p((char*)a + b), len(b) {}
+
+ void* start() const {
+ return (char*)p - len;
+ }
+ void* end() const {
+ return p;
+ }
+ unsigned length() const {
+ return len;
+ }
+ bool operator<(const WriteIntent& rhs) const {
+ return end() < rhs.end();
+ }
+
+ bool overlaps(const WriteIntent& rhs) const {
+ return (start() <= rhs.end() && end() >= rhs.start());
+ }
+
+ bool contains(const WriteIntent& rhs) const {
+ return (start() <= rhs.start() && end() >= rhs.end());
+ }
+
+ // merge into me:
+ void absorb(const WriteIntent& other);
+
+ friend std::ostream& operator<<(std::ostream& out, const WriteIntent& wi) {
+ return (out << "p: " << wi.p << " end: " << wi.end() << " len: " << wi.len);
+ }
+
+private:
+ void* p; // intent to write up to p
+ unsigned len; // up to this len
+};
+
+typedef std::vector<WriteIntent> WriteIntentsVector;
- void* start() const { return (char*)p - len; }
- void* end() const { return p; }
- unsigned length() const { return len; }
- bool operator < (const WriteIntent& rhs) const { return end() < rhs.end(); }
- bool overlaps(const WriteIntent& rhs) const {
- return (start() <= rhs.end() && end() >= rhs.start());
- }
+/**
+ * Bitmap to remember things we have already marked for journaling. False negatives are ok
+ * if infrequent, since they impact performance.
+ */
+template <int Prime>
+class Already {
+ MONGO_DISALLOW_COPYING(Already);
+
+public:
+ Already() {
+ clear();
+ }
+
+ void clear() {
+ memset(this, 0, sizeof(*this));
+ }
- bool contains(const WriteIntent& rhs) const {
- return (start() <= rhs.start() && end() >= rhs.end());
+ /**
+ * Checks if we have Already recorded/indicated our write intent for this region of
+ * memory and automatically upgrades the length if the length was shorter previously.
+ *
+ * @return true if already indicated.
+ */
+ bool checkAndSet(void* p, int len) {
+ const unsigned x = hashPointer(p);
+ std::pair<void*, int>& nd = nodes[x % Prime];
+
+ if (nd.first == p) {
+ if (nd.second < len) {
+ nd.second = len;
+ return false; // haven't indicated this len yet
+ }
+ return true; // already indicated
}
- // merge into me:
- void absorb(const WriteIntent& other);
-
- friend std::ostream& operator << (std::ostream& out, const WriteIntent& wi) {
- return (out << "p: " << wi.p << " end: " << wi.end() << " len: " << wi.len);
+ nd.first = p;
+ nd.second = len;
+ return false; // a new set
+ }
+
+private:
+ static unsigned hashPointer(void* v) {
+ unsigned x = 0;
+ unsigned char* p = (unsigned char*)&v;
+ for (unsigned i = 0; i < sizeof(void*); i++) {
+ x = x * 131 + p[i];
}
+ return x;
+ }
+
+ std::pair<void*, int> nodes[Prime];
+};
- private:
- void *p; // intent to write up to p
- unsigned len; // up to this len
- };
- typedef std::vector<WriteIntent> WriteIntentsVector;
+/**
+ * Tracks all write operations on the private view so they can be journaled.
+ */
+class CommitJob {
+ MONGO_DISALLOW_COPYING(CommitJob);
+public:
+ CommitJob();
+ ~CommitJob();
/**
- * Bitmap to remember things we have already marked for journaling. False negatives are ok
- * if infrequent, since they impact performance.
+ * Note an operation other than a "basic write".
*/
- template<int Prime>
- class Already {
- MONGO_DISALLOW_COPYING(Already);
- public:
- Already() {
- clear();
- }
-
- void clear() {
- memset(this, 0, sizeof(*this));
- }
-
- /**
- * Checks if we have Already recorded/indicated our write intent for this region of
- * memory and automatically upgrades the length if the length was shorter previously.
- *
- * @return true if already indicated.
- */
- bool checkAndSet(void* p, int len) {
- const unsigned x = hashPointer(p);
- std::pair<void*, int>& nd = nodes[x % Prime];
-
- if (nd.first == p) {
- if (nd.second < len) {
- nd.second = len;
- return false; // haven't indicated this len yet
- }
- return true; // already indicated
- }
-
- nd.first = p;
- nd.second = len;
- return false; // a new set
- }
+ void noteOp(std::shared_ptr<DurOp> p);
- private:
-
- static unsigned hashPointer(void *v) {
- unsigned x = 0;
- unsigned char *p = (unsigned char *)&v;
- for (unsigned i = 0; i < sizeof(void*); i++) {
- x = x * 131 + p[i];
- }
- return x;
- }
+ /**
+ * Record/note an intent to write.
+ *
+ * NOTE: Not thread safe. Requires the mutex to be locked.
+ */
+ void note(void* p, int len);
- std::pair<void*, int> nodes[Prime];
- };
+ /**
+ * When this value is false we don't have to do any group commit.
+ */
+ bool hasWritten() const {
+ return _hasWritten;
+ }
+ /**
+ * We use the commitjob object over and over, calling committingReset() rather than
+ * reconstructing.
+ */
+ void committingReset();
/**
- * Tracks all write operations on the private view so they can be journaled.
+ * We check how much written and if it is getting to be a lot, we commit sooner.
*/
- class CommitJob {
- MONGO_DISALLOW_COPYING(CommitJob);
- public:
- CommitJob();
- ~CommitJob();
-
- /**
- * Note an operation other than a "basic write".
- */
- void noteOp(std::shared_ptr<DurOp> p);
-
- /**
- * Record/note an intent to write.
- *
- * NOTE: Not thread safe. Requires the mutex to be locked.
- */
- void note(void* p, int len);
-
- /**
- * When this value is false we don't have to do any group commit.
- */
- bool hasWritten() const { return _hasWritten; }
-
- /**
- * We use the commitjob object over and over, calling committingReset() rather than
- * reconstructing.
- */
- void committingReset();
-
- /**
- * We check how much written and if it is getting to be a lot, we commit sooner.
- */
- size_t bytes() const { return _bytes; }
-
- /**
- * Sorts the internal list of write intents so that overlapping and duplicate items can be
- * merged. We do the sort here so the caller receives something they must keep const from
- * their POV.
- */
- const WriteIntentsVector& getIntentsSorted() {
- sort(_intents.begin(), _intents.end());
- return _intents;
- }
+ size_t bytes() const {
+ return _bytes;
+ }
- const DurOpsVector& ops() const {
- return _durOps;
- }
+ /**
+ * Sorts the internal list of write intents so that overlapping and duplicate items can be
+ * merged. We do the sort here so the caller receives something they must keep const from
+ * their POV.
+ */
+ const WriteIntentsVector& getIntentsSorted() {
+ sort(_intents.begin(), _intents.end());
+ return _intents;
+ }
- SimpleMutex groupCommitMutex;
+ const DurOpsVector& ops() const {
+ return _durOps;
+ }
- private:
+ SimpleMutex groupCommitMutex;
- void _insertWriteIntent(void* p, int len) {
- _intents.push_back(WriteIntent(p, len));
- wassert(_intents.size() < 2000000);
- }
+private:
+ void _insertWriteIntent(void* p, int len) {
+ _intents.push_back(WriteIntent(p, len));
+ wassert(_intents.size() < 2000000);
+ }
- // Whether we put write intents or durops
- bool _hasWritten;
+ // Whether we put write intents or durops
+ bool _hasWritten;
- // Write intents along with a bitmask for whether we have already noted them
- Already<127> _alreadyNoted;
- WriteIntentsVector _intents;
+ // Write intents along with a bitmask for whether we have already noted them
+ Already<127> _alreadyNoted;
+ WriteIntentsVector _intents;
- // All the ops other than basic writes
- DurOpsVector _durOps;
+ // All the ops other than basic writes
+ DurOpsVector _durOps;
- // Used to count the private map used bytes. Note that _lastNotedPos doesn't reset with
- // each commit, but that is ok we aren't being that precise.
- size_t _lastNotedPos;
- size_t _bytes;
+ // Used to count the private map used bytes. Note that _lastNotedPos doesn't reset with
+ // each commit, but that is ok we aren't being that precise.
+ size_t _lastNotedPos;
+ size_t _bytes;
- // Warning logging for large commits
- uint64_t _lastComplainMs;
- unsigned _complains;
- };
+ // Warning logging for large commits
+ uint64_t _lastComplainMs;
+ unsigned _complains;
+};
-} // namespace "dur"
-} // namespace "mongo"
+} // namespace "dur"
+} // namespace "mongo"
diff --git a/src/mongo/db/storage/mmap_v1/dur_journal.cpp b/src/mongo/db/storage/mmap_v1/dur_journal.cpp
index a76ade46128..66c88e3e156 100644
--- a/src/mongo/db/storage/mmap_v1/dur_journal.cpp
+++ b/src/mongo/db/storage/mmap_v1/dur_journal.cpp
@@ -58,7 +58,7 @@
#include "mongo/util/hex.h"
#include "mongo/util/log.h"
#include "mongo/util/mongoutils/str.h"
-#include "mongo/util/net/listen.h" // getelapsedtimemillis
+#include "mongo/util/net/listen.h" // getelapsedtimemillis
#include "mongo/util/progress_meter.h"
#include "mongo/util/timer.h"
@@ -66,732 +66,727 @@ using namespace mongoutils;
namespace mongo {
- using std::endl;
- using std::hex;
- using std::string;
+using std::endl;
+using std::hex;
+using std::string;
- class AlignedBuilder;
+class AlignedBuilder;
- namespace dur {
- // Rotate after reaching this data size in a journal (j._<n>) file
- // We use a smaller size for 32 bit as the journal is mmapped during recovery (only)
- // Note if you take a set of datafiles, including journal files, from 32->64 or vice-versa, it must
- // work. (and should as-is)
- // --smallfiles makes the limit small.
+namespace dur {
+// Rotate after reaching this data size in a journal (j._<n>) file
+// We use a smaller size for 32 bit as the journal is mmapped during recovery (only)
+// Note if you take a set of datafiles, including journal files, from 32->64 or vice-versa, it must
+// work. (and should as-is)
+// --smallfiles makes the limit small.
#if defined(MONGO_CONFIG_DEBUG_BUILD)
- unsigned long long DataLimitPerJournalFile = 128 * 1024 * 1024;
+unsigned long long DataLimitPerJournalFile = 128 * 1024 * 1024;
#elif defined(__APPLE__)
- // assuming a developer box if OS X
- unsigned long long DataLimitPerJournalFile = 256 * 1024 * 1024;
+// assuming a developer box if OS X
+unsigned long long DataLimitPerJournalFile = 256 * 1024 * 1024;
#else
- unsigned long long DataLimitPerJournalFile = (sizeof(void*)==4) ? 256 * 1024 * 1024 : 1 * 1024 * 1024 * 1024;
+unsigned long long DataLimitPerJournalFile =
+ (sizeof(void*) == 4) ? 256 * 1024 * 1024 : 1 * 1024 * 1024 * 1024;
#endif
- MONGO_INITIALIZER(InitializeJournalingParams)(InitializerContext* context) {
- if (mmapv1GlobalOptions.smallfiles == true) {
- verify(dur::DataLimitPerJournalFile >= 128 * 1024 * 1024);
- dur::DataLimitPerJournalFile = 128 * 1024 * 1024;
- }
- return Status::OK();
- }
+MONGO_INITIALIZER(InitializeJournalingParams)(InitializerContext* context) {
+ if (mmapv1GlobalOptions.smallfiles == true) {
+ verify(dur::DataLimitPerJournalFile >= 128 * 1024 * 1024);
+ dur::DataLimitPerJournalFile = 128 * 1024 * 1024;
+ }
+ return Status::OK();
+}
- BOOST_STATIC_ASSERT( sizeof(Checksum) == 16 );
- BOOST_STATIC_ASSERT( sizeof(JHeader) == 8192 );
- BOOST_STATIC_ASSERT( sizeof(JSectHeader) == 20 );
- BOOST_STATIC_ASSERT( sizeof(JSectFooter) == 32 );
- BOOST_STATIC_ASSERT( sizeof(JEntry) == 12 );
- BOOST_STATIC_ASSERT( sizeof(LSNFile) == 88 );
+BOOST_STATIC_ASSERT(sizeof(Checksum) == 16);
+BOOST_STATIC_ASSERT(sizeof(JHeader) == 8192);
+BOOST_STATIC_ASSERT(sizeof(JSectHeader) == 20);
+BOOST_STATIC_ASSERT(sizeof(JSectFooter) == 32);
+BOOST_STATIC_ASSERT(sizeof(JEntry) == 12);
+BOOST_STATIC_ASSERT(sizeof(LSNFile) == 88);
- bool usingPreallocate = false;
+bool usingPreallocate = false;
- void removeOldJournalFile(boost::filesystem::path p);
+void removeOldJournalFile(boost::filesystem::path p);
- boost::filesystem::path getJournalDir() {
- boost::filesystem::path p(storageGlobalParams.dbpath);
- p /= "journal";
- return p;
- }
+boost::filesystem::path getJournalDir() {
+ boost::filesystem::path p(storageGlobalParams.dbpath);
+ p /= "journal";
+ return p;
+}
- boost::filesystem::path lsnPath() {
- return getJournalDir()/"lsn";
- }
+boost::filesystem::path lsnPath() {
+ return getJournalDir() / "lsn";
+}
- /** this should be called when something really bad happens so that we can flag appropriately
- */
- void journalingFailure(const char *msg) {
- /** todo:
- (1) don't log too much
- (2) make an indicator in the journal dir that something bad happened.
- (2b) refuse to do a recovery startup if that is there without manual override.
- */
- log() << "journaling failure/error: " << msg << endl;
- verify(false);
- }
+/** this should be called when something really bad happens so that we can flag appropriately
+*/
+void journalingFailure(const char* msg) {
+ /** todo:
+ (1) don't log too much
+ (2) make an indicator in the journal dir that something bad happened.
+ (2b) refuse to do a recovery startup if that is there without manual override.
+ */
+ log() << "journaling failure/error: " << msg << endl;
+ verify(false);
+}
- JSectFooter::JSectFooter() {
- memset(this, 0, sizeof(*this));
- sentinel = JEntry::OpCode_Footer;
- }
+JSectFooter::JSectFooter() {
+ memset(this, 0, sizeof(*this));
+ sentinel = JEntry::OpCode_Footer;
+}
- JSectFooter::JSectFooter(const void* begin, int len) { // needs buffer to compute hash
- sentinel = JEntry::OpCode_Footer;
- reserved = 0;
- magic[0] = magic[1] = magic[2] = magic[3] = '\n';
+JSectFooter::JSectFooter(const void* begin, int len) { // needs buffer to compute hash
+ sentinel = JEntry::OpCode_Footer;
+ reserved = 0;
+ magic[0] = magic[1] = magic[2] = magic[3] = '\n';
- Checksum c;
- c.gen(begin, (unsigned) len);
- memcpy(hash, c.bytes, sizeof(hash));
- }
+ Checksum c;
+ c.gen(begin, (unsigned)len);
+ memcpy(hash, c.bytes, sizeof(hash));
+}
- bool JSectFooter::checkHash(const void* begin, int len) const {
- if( !magicOk() ) {
- log() << "journal footer not valid" << endl;
- return false;
- }
- Checksum c;
- c.gen(begin, len);
- DEV log() << "checkHash len:" << len << " hash:" << toHex(hash, 16) << " current:" << toHex(c.bytes, 16) << endl;
- if( memcmp(hash, c.bytes, sizeof(hash)) == 0 )
- return true;
- log() << "journal checkHash mismatch, got: " << toHex(c.bytes, 16) << " expected: " << toHex(hash,16) << endl;
- return false;
- }
+bool JSectFooter::checkHash(const void* begin, int len) const {
+ if (!magicOk()) {
+ log() << "journal footer not valid" << endl;
+ return false;
+ }
+ Checksum c;
+ c.gen(begin, len);
+ DEV log() << "checkHash len:" << len << " hash:" << toHex(hash, 16)
+ << " current:" << toHex(c.bytes, 16) << endl;
+ if (memcmp(hash, c.bytes, sizeof(hash)) == 0)
+ return true;
+ log() << "journal checkHash mismatch, got: " << toHex(c.bytes, 16)
+ << " expected: " << toHex(hash, 16) << endl;
+ return false;
+}
- namespace {
- SecureRandom* mySecureRandom = NULL;
- stdx::mutex mySecureRandomMutex;
- int64_t getMySecureRandomNumber() {
- stdx::lock_guard<stdx::mutex> lk( mySecureRandomMutex );
- if ( ! mySecureRandom )
- mySecureRandom = SecureRandom::create();
- return mySecureRandom->nextInt64();
- }
- }
+namespace {
+SecureRandom* mySecureRandom = NULL;
+stdx::mutex mySecureRandomMutex;
+int64_t getMySecureRandomNumber() {
+ stdx::lock_guard<stdx::mutex> lk(mySecureRandomMutex);
+ if (!mySecureRandom)
+ mySecureRandom = SecureRandom::create();
+ return mySecureRandom->nextInt64();
+}
+}
- JHeader::JHeader(string fname) {
- magic[0] = 'j'; magic[1] = '\n';
- _version = CurrentVersion;
- memset(ts, 0, sizeof(ts));
- time_t t = time(0);
- strncpy(ts, time_t_to_String_short(t).c_str(), sizeof(ts)-1);
- memset(dbpath, 0, sizeof(dbpath));
- strncpy(dbpath, fname.c_str(), sizeof(dbpath)-1);
- {
- fileId = t&0xffffffff;
- fileId |= static_cast<unsigned long long>( getMySecureRandomNumber() ) << 32;
- }
- memset(reserved3, 0, sizeof(reserved3));
- txt2[0] = txt2[1] = '\n';
- n1 = n2 = n3 = n4 = '\n';
- }
+JHeader::JHeader(string fname) {
+ magic[0] = 'j';
+ magic[1] = '\n';
+ _version = CurrentVersion;
+ memset(ts, 0, sizeof(ts));
+ time_t t = time(0);
+ strncpy(ts, time_t_to_String_short(t).c_str(), sizeof(ts) - 1);
+ memset(dbpath, 0, sizeof(dbpath));
+ strncpy(dbpath, fname.c_str(), sizeof(dbpath) - 1);
+ {
+ fileId = t & 0xffffffff;
+ fileId |= static_cast<unsigned long long>(getMySecureRandomNumber()) << 32;
+ }
+ memset(reserved3, 0, sizeof(reserved3));
+ txt2[0] = txt2[1] = '\n';
+ n1 = n2 = n3 = n4 = '\n';
+}
- Journal j;
+Journal j;
- const unsigned long long LsnShutdownSentinel = ~((unsigned long long)0);
+const unsigned long long LsnShutdownSentinel = ~((unsigned long long)0);
- Journal::Journal() {
- _written = 0;
- _nextFileNumber = 0;
- _curLogFile = 0;
- _curFileId = 0;
- _preFlushTime = 0;
- _lastFlushTime = 0;
- _writeToLSNNeeded = false;
- }
+Journal::Journal() {
+ _written = 0;
+ _nextFileNumber = 0;
+ _curLogFile = 0;
+ _curFileId = 0;
+ _preFlushTime = 0;
+ _lastFlushTime = 0;
+ _writeToLSNNeeded = false;
+}
- boost::filesystem::path Journal::getFilePathFor(int filenumber) const {
- boost::filesystem::path p(dir);
- p /= string(str::stream() << "j._" << filenumber);
- return p;
- }
+boost::filesystem::path Journal::getFilePathFor(int filenumber) const {
+ boost::filesystem::path p(dir);
+ p /= string(str::stream() << "j._" << filenumber);
+ return p;
+}
- /** never throws
- @param anyFiles by default we only look at j._* files. If anyFiles is true, return true
- if there are any files in the journal directory. acquirePathLock() uses this to
- make sure that the journal directory is mounted.
- @return true if journal dir is not empty
- */
- bool haveJournalFiles(bool anyFiles) {
- try {
- boost::filesystem::path jdir = getJournalDir();
- if ( !boost::filesystem::exists( jdir ) )
- return false;
-
- for ( boost::filesystem::directory_iterator i( jdir );
- i != boost::filesystem::directory_iterator();
- ++i ) {
- string fileName = boost::filesystem::path(*i).leaf().string();
- if( anyFiles || str::startsWith(fileName, "j._") )
- return true;
- }
- }
- catch(const std::exception& e) {
- log() << "Unable to check for journal files due to: " << e.what() << endl;
- }
+/** never throws
+ @param anyFiles by default we only look at j._* files. If anyFiles is true, return true
+ if there are any files in the journal directory. acquirePathLock() uses this to
+ make sure that the journal directory is mounted.
+ @return true if journal dir is not empty
+*/
+bool haveJournalFiles(bool anyFiles) {
+ try {
+ boost::filesystem::path jdir = getJournalDir();
+ if (!boost::filesystem::exists(jdir))
return false;
+
+ for (boost::filesystem::directory_iterator i(jdir);
+ i != boost::filesystem::directory_iterator();
+ ++i) {
+ string fileName = boost::filesystem::path(*i).leaf().string();
+ if (anyFiles || str::startsWith(fileName, "j._"))
+ return true;
}
+ } catch (const std::exception& e) {
+ log() << "Unable to check for journal files due to: " << e.what() << endl;
+ }
+ return false;
+}
- /** throws */
- void removeJournalFiles() {
- log() << "removeJournalFiles" << endl;
- try {
- for ( boost::filesystem::directory_iterator i( getJournalDir() );
- i != boost::filesystem::directory_iterator();
- ++i ) {
- string fileName = boost::filesystem::path(*i).leaf().string();
- if( str::startsWith(fileName, "j._") ) {
- try {
- removeOldJournalFile(*i);
- }
- catch(std::exception& e) {
- log() << "couldn't remove " << fileName << ' ' << e.what() << endl;
- throw;
- }
- }
- }
+/** throws */
+void removeJournalFiles() {
+ log() << "removeJournalFiles" << endl;
+ try {
+ for (boost::filesystem::directory_iterator i(getJournalDir());
+ i != boost::filesystem::directory_iterator();
+ ++i) {
+ string fileName = boost::filesystem::path(*i).leaf().string();
+ if (str::startsWith(fileName, "j._")) {
try {
- boost::filesystem::remove(lsnPath());
- }
- catch(...) {
- // std::exception details logged in catch below
- log() << "couldn't remove " << lsnPath().string() << endl;
+ removeOldJournalFile(*i);
+ } catch (std::exception& e) {
+ log() << "couldn't remove " << fileName << ' ' << e.what() << endl;
throw;
}
}
- catch( std::exception& e ) {
- log() << "error removing journal files " << e.what() << endl;
- throw;
- }
- verify(!haveJournalFiles());
-
- flushMyDirectory(getJournalDir() / "file"); // flushes parent of argument (in this case journal dir)
-
- LOG(1) << "removeJournalFiles end" << endl;
}
+ try {
+ boost::filesystem::remove(lsnPath());
+ } catch (...) {
+ // std::exception details logged in catch below
+ log() << "couldn't remove " << lsnPath().string() << endl;
+ throw;
+ }
+ } catch (std::exception& e) {
+ log() << "error removing journal files " << e.what() << endl;
+ throw;
+ }
+ verify(!haveJournalFiles());
- /** at clean shutdown */
- bool okToCleanUp = false; // successful recovery would set this to true
- void Journal::cleanup(bool _log) {
- if( !okToCleanUp )
- return;
+ flushMyDirectory(getJournalDir() /
+ "file"); // flushes parent of argument (in this case journal dir)
- if( _log )
- log() << "journalCleanup..." << endl;
- try {
- stdx::lock_guard<SimpleMutex> lk(_curLogFileMutex);
- closeCurrentJournalFile();
- removeJournalFiles();
- }
- catch(std::exception& e) {
- log() << "error couldn't remove journal file during shutdown " << e.what() << endl;
- throw;
- }
- }
- void journalCleanup(bool log) { j.cleanup(log); }
+ LOG(1) << "removeJournalFiles end" << endl;
+}
- bool _preallocateIsFaster() {
- bool faster = false;
- boost::filesystem::path p = getJournalDir() / "tempLatencyTest";
- if (boost::filesystem::exists(p)) {
- try {
- remove(p);
- }
- catch(const std::exception& e) {
- log() << "Unable to remove temporary file due to: " << e.what() << endl;
- }
- }
- try {
- AlignedBuilder b(8192);
- int millis[2];
- const int N = 50;
- for( int pass = 0; pass < 2; pass++ ) {
- LogFile f(p.string());
- Timer t;
- for( int i = 0 ; i < N; i++ ) {
- f.synchronousAppend(b.buf(), 8192);
- }
- millis[pass] = t.millis();
- // second time through, file exists and is prealloc case
- }
- int diff = millis[0] - millis[1];
- if( diff > 2 * N ) {
- // at least 2ms faster for prealloc case?
- faster = true;
- log() << "preallocateIsFaster=true " << diff / (1.0*N) << endl;
- }
- }
- catch (const std::exception& e) {
- log() << "info preallocateIsFaster couldn't run due to: " << e.what()
- << "; returning false" << endl;
- }
- if (boost::filesystem::exists(p)) {
- try {
- remove(p);
- }
- catch(const std::exception& e) {
- log() << "Unable to remove temporary file due to: " << e.what() << endl;
- }
- }
- return faster;
+/** at clean shutdown */
+bool okToCleanUp = false; // successful recovery would set this to true
+void Journal::cleanup(bool _log) {
+ if (!okToCleanUp)
+ return;
+
+ if (_log)
+ log() << "journalCleanup..." << endl;
+ try {
+ stdx::lock_guard<SimpleMutex> lk(_curLogFileMutex);
+ closeCurrentJournalFile();
+ removeJournalFiles();
+ } catch (std::exception& e) {
+ log() << "error couldn't remove journal file during shutdown " << e.what() << endl;
+ throw;
+ }
+}
+void journalCleanup(bool log) {
+ j.cleanup(log);
+}
+
+bool _preallocateIsFaster() {
+ bool faster = false;
+ boost::filesystem::path p = getJournalDir() / "tempLatencyTest";
+ if (boost::filesystem::exists(p)) {
+ try {
+ remove(p);
+ } catch (const std::exception& e) {
+ log() << "Unable to remove temporary file due to: " << e.what() << endl;
}
- bool preallocateIsFaster() {
+ }
+ try {
+ AlignedBuilder b(8192);
+ int millis[2];
+ const int N = 50;
+ for (int pass = 0; pass < 2; pass++) {
+ LogFile f(p.string());
Timer t;
- bool res = false;
- if( _preallocateIsFaster() && _preallocateIsFaster() ) {
- // maybe system is just super busy at the moment? sleep a second to let it calm down.
- // deciding to to prealloc is a medium big decision:
- sleepsecs(1);
- res = _preallocateIsFaster();
+ for (int i = 0; i < N; i++) {
+ f.synchronousAppend(b.buf(), 8192);
}
- if( t.millis() > 3000 )
- log() << "preallocateIsFaster check took " << t.millis()/1000.0 << " secs" << endl;
- return res;
+ millis[pass] = t.millis();
+ // second time through, file exists and is prealloc case
}
-
- // throws
- void preallocateFile(boost::filesystem::path p, unsigned long long len) {
- if( exists(p) )
- return;
-
- log() << "preallocating a journal file " << p.string() << endl;
-
- const unsigned BLKSZ = 1024 * 1024;
- verify( len % BLKSZ == 0 );
-
- AlignedBuilder b(BLKSZ);
- memset((void*)b.buf(), 0, BLKSZ);
-
- ProgressMeter m(len, 3/*secs*/, 10/*hits between time check (once every 6.4MB)*/);
- m.setName("File Preallocator Progress");
-
- File f;
- f.open( p.string().c_str() , /*read-only*/false , /*direct-io*/false );
- verify( f.is_open() );
- fileofs loc = 0;
- while ( loc < len ) {
- f.write( loc , b.buf() , BLKSZ );
- loc += BLKSZ;
- m.hit(BLKSZ);
- }
- verify( loc == len );
- f.fsync();
+ int diff = millis[0] - millis[1];
+ if (diff > 2 * N) {
+ // at least 2ms faster for prealloc case?
+ faster = true;
+ log() << "preallocateIsFaster=true " << diff / (1.0 * N) << endl;
}
-
- const int NUM_PREALLOC_FILES = 3;
- inline boost::filesystem::path preallocPath(int n) {
- verify(n >= 0);
- verify(n < NUM_PREALLOC_FILES);
- string fn = str::stream() << "prealloc." << n;
- return getJournalDir() / fn;
+ } catch (const std::exception& e) {
+ log() << "info preallocateIsFaster couldn't run due to: " << e.what() << "; returning false"
+ << endl;
+ }
+ if (boost::filesystem::exists(p)) {
+ try {
+ remove(p);
+ } catch (const std::exception& e) {
+ log() << "Unable to remove temporary file due to: " << e.what() << endl;
}
+ }
+ return faster;
+}
+bool preallocateIsFaster() {
+ Timer t;
+ bool res = false;
+ if (_preallocateIsFaster() && _preallocateIsFaster()) {
+ // maybe system is just super busy at the moment? sleep a second to let it calm down.
+ // deciding to to prealloc is a medium big decision:
+ sleepsecs(1);
+ res = _preallocateIsFaster();
+ }
+ if (t.millis() > 3000)
+ log() << "preallocateIsFaster check took " << t.millis() / 1000.0 << " secs" << endl;
+ return res;
+}
- // throws
- void _preallocateFiles() {
- for( int i = 0; i < NUM_PREALLOC_FILES; i++ ) {
- boost::filesystem::path filepath = preallocPath(i);
+// throws
+void preallocateFile(boost::filesystem::path p, unsigned long long len) {
+ if (exists(p))
+ return;
- unsigned long long limit = DataLimitPerJournalFile;
- if( kDebugBuild && i == 1 ) {
- // moving 32->64, the prealloc files would be short. that is "ok", but we
- // want to exercise that case, so we force exercising here when
- // MONGO_CONFIG_DEBUG_BUILD is set by arbitrarily stopping prealloc at a
- // low limit for a file. also we want to be able to change in the future
- // the constant without a lot of work anyway.
- limit = 16 * 1024 * 1024;
- }
- preallocateFile(filepath, limit);
- }
- }
+ log() << "preallocating a journal file " << p.string() << endl;
- void checkFreeSpace() {
- unsigned long long spaceNeeded = static_cast<unsigned long long>(3 * DataLimitPerJournalFile * 1.1); // add 10% for headroom
- unsigned long long freeSpace = File::freeSpace(getJournalDir().string());
- unsigned long long prealloced = 0;
- for( int i = 0; i < NUM_PREALLOC_FILES; i++ ) {
- boost::filesystem::path filepath = preallocPath(i);
- if (exists(filepath))
- prealloced += file_size(filepath);
- }
+ const unsigned BLKSZ = 1024 * 1024;
+ verify(len % BLKSZ == 0);
- if (freeSpace + prealloced < spaceNeeded) {
- log() << endl;
- error() << "Insufficient free space for journal files" << endl;
- log() << "Please make at least " << spaceNeeded/(1024*1024) << "MB available in " << getJournalDir().string() << " or use --smallfiles" << endl;
- log() << endl;
- throw UserException(15926, "Insufficient free space for journals");
- }
- }
+ AlignedBuilder b(BLKSZ);
+ memset((void*)b.buf(), 0, BLKSZ);
- void preallocateFiles() {
- if (!(mmapv1GlobalOptions.journalOptions & MMAPV1Options::JournalNoCheckSpace))
- checkFreeSpace();
+ ProgressMeter m(len, 3 /*secs*/, 10 /*hits between time check (once every 6.4MB)*/);
+ m.setName("File Preallocator Progress");
- if( exists(preallocPath(0)) || // if enabled previously, keep using
- exists(preallocPath(1)) ||
- (mmapv1GlobalOptions.preallocj && preallocateIsFaster()) ) {
- usingPreallocate = true;
- try {
- _preallocateFiles();
- }
- catch (const std::exception& e) {
- log() << "warning caught exception (" << e.what()
- << ") in preallocateFiles, continuing" << endl;
- }
- }
- j.open();
- }
+ File f;
+ f.open(p.string().c_str(), /*read-only*/ false, /*direct-io*/ false);
+ verify(f.is_open());
+ fileofs loc = 0;
+ while (loc < len) {
+ f.write(loc, b.buf(), BLKSZ);
+ loc += BLKSZ;
+ m.hit(BLKSZ);
+ }
+ verify(loc == len);
+ f.fsync();
+}
- void removeOldJournalFile(boost::filesystem::path p) {
- if( usingPreallocate ) {
- try {
- for( int i = 0; i < NUM_PREALLOC_FILES; i++ ) {
- boost::filesystem::path filepath = preallocPath(i);
- if( !boost::filesystem::exists(filepath) ) {
- // we can recycle this file into this prealloc file location
- boost::filesystem::path temppath = filepath.string() + ".temp";
- boost::filesystem::rename(p, temppath);
- {
- // zero the header
- File f;
- f.open(temppath.string().c_str(), false, false);
- char buf[8192];
- memset(buf, 0, 8192);
- f.write(0, buf, 8192);
- f.truncate(DataLimitPerJournalFile);
- f.fsync();
- }
- boost::filesystem::rename(temppath, filepath);
- return;
- }
- }
- } catch (const std::exception& e) {
- log() << "warning exception in dur::removeOldJournalFile " << p.string()
- << ": " << e.what() << endl;
- // fall through and try to delete the file
- }
- }
+const int NUM_PREALLOC_FILES = 3;
+inline boost::filesystem::path preallocPath(int n) {
+ verify(n >= 0);
+ verify(n < NUM_PREALLOC_FILES);
+ string fn = str::stream() << "prealloc." << n;
+ return getJournalDir() / fn;
+}
- // already have 3 prealloc files, so delete this file
- try {
- boost::filesystem::remove(p);
- }
- catch (const std::exception& e) {
- log() << "warning exception removing " << p.string() << ": " << e.what() << endl;
- }
- }
+// throws
+void _preallocateFiles() {
+ for (int i = 0; i < NUM_PREALLOC_FILES; i++) {
+ boost::filesystem::path filepath = preallocPath(i);
+
+ unsigned long long limit = DataLimitPerJournalFile;
+ if (kDebugBuild && i == 1) {
+ // moving 32->64, the prealloc files would be short. that is "ok", but we
+ // want to exercise that case, so we force exercising here when
+ // MONGO_CONFIG_DEBUG_BUILD is set by arbitrarily stopping prealloc at a
+ // low limit for a file. also we want to be able to change in the future
+ // the constant without a lot of work anyway.
+ limit = 16 * 1024 * 1024;
+ }
+ preallocateFile(filepath, limit);
+ }
+}
- // find a prealloc.<n> file, presumably to take and use
- boost::filesystem::path findPrealloced() {
- try {
- for( int i = 0; i < NUM_PREALLOC_FILES; i++ ) {
- boost::filesystem::path filepath = preallocPath(i);
- if( boost::filesystem::exists(filepath) )
- return filepath;
- }
- } catch (const std::exception& e) {
- log() << "warning exception in dur::findPrealloced(): " << e.what() << endl;
- }
- return boost::filesystem::path();
- }
+void checkFreeSpace() {
+ unsigned long long spaceNeeded =
+ static_cast<unsigned long long>(3 * DataLimitPerJournalFile * 1.1); // add 10% for headroom
+ unsigned long long freeSpace = File::freeSpace(getJournalDir().string());
+ unsigned long long prealloced = 0;
+ for (int i = 0; i < NUM_PREALLOC_FILES; i++) {
+ boost::filesystem::path filepath = preallocPath(i);
+ if (exists(filepath))
+ prealloced += file_size(filepath);
+ }
- /** assure journal/ dir exists. throws. call during startup. */
- void journalMakeDir() {
- j.init();
+ if (freeSpace + prealloced < spaceNeeded) {
+ log() << endl;
+ error() << "Insufficient free space for journal files" << endl;
+ log() << "Please make at least " << spaceNeeded / (1024 * 1024) << "MB available in "
+ << getJournalDir().string() << " or use --smallfiles" << endl;
+ log() << endl;
+ throw UserException(15926, "Insufficient free space for journals");
+ }
+}
- boost::filesystem::path p = getJournalDir();
- j.dir = p.string();
- log() << "journal dir=" << j.dir << endl;
- if( !boost::filesystem::exists(j.dir) ) {
- try {
- boost::filesystem::create_directory(j.dir);
- }
- catch(std::exception& e) {
- log() << "error creating directory " << j.dir << ' ' << e.what() << endl;
- throw;
- }
- }
+void preallocateFiles() {
+ if (!(mmapv1GlobalOptions.journalOptions & MMAPV1Options::JournalNoCheckSpace))
+ checkFreeSpace();
+
+ if (exists(preallocPath(0)) || // if enabled previously, keep using
+ exists(preallocPath(1)) ||
+ (mmapv1GlobalOptions.preallocj && preallocateIsFaster())) {
+ usingPreallocate = true;
+ try {
+ _preallocateFiles();
+ } catch (const std::exception& e) {
+ log() << "warning caught exception (" << e.what() << ") in preallocateFiles, continuing"
+ << endl;
}
+ }
+ j.open();
+}
- void Journal::_open() {
- _curFileId = 0;
- verify( _curLogFile == 0 );
- boost::filesystem::path fname = getFilePathFor(_nextFileNumber);
-
- // if we have a prealloced file, use it
- {
- boost::filesystem::path p = findPrealloced();
- if( !p.empty() ) {
- try {
- {
- // JHeader::fileId must be updated before renaming to be race-safe
- LogFile f(p.string());
- JHeader h(p.string());
- AlignedBuilder b(8192);
- b.appendStruct(h);
- f.synchronousAppend(b.buf(), b.len());
- }
- boost::filesystem::rename(p, fname);
- }
- catch (const std::exception& e) {
- log() << "warning couldn't write to / rename file " << p.string()
- << ": " << e.what() << endl;
+void removeOldJournalFile(boost::filesystem::path p) {
+ if (usingPreallocate) {
+ try {
+ for (int i = 0; i < NUM_PREALLOC_FILES; i++) {
+ boost::filesystem::path filepath = preallocPath(i);
+ if (!boost::filesystem::exists(filepath)) {
+ // we can recycle this file into this prealloc file location
+ boost::filesystem::path temppath = filepath.string() + ".temp";
+ boost::filesystem::rename(p, temppath);
+ {
+ // zero the header
+ File f;
+ f.open(temppath.string().c_str(), false, false);
+ char buf[8192];
+ memset(buf, 0, 8192);
+ f.write(0, buf, 8192);
+ f.truncate(DataLimitPerJournalFile);
+ f.fsync();
}
+ boost::filesystem::rename(temppath, filepath);
+ return;
}
}
-
- _curLogFile = new LogFile(fname.string());
- _nextFileNumber++;
- {
- JHeader h(fname.string());
- _curFileId = h.fileId;
- verify(_curFileId);
- AlignedBuilder b(8192);
- b.appendStruct(h);
- _curLogFile->synchronousAppend(b.buf(), b.len());
- }
+ } catch (const std::exception& e) {
+ log() << "warning exception in dur::removeOldJournalFile " << p.string() << ": "
+ << e.what() << endl;
+ // fall through and try to delete the file
}
+ }
- void Journal::init() {
- verify( _curLogFile == 0 );
- MongoFile::notifyPreFlush = preFlush;
- MongoFile::notifyPostFlush = postFlush;
- }
+ // already have 3 prealloc files, so delete this file
+ try {
+ boost::filesystem::remove(p);
+ } catch (const std::exception& e) {
+ log() << "warning exception removing " << p.string() << ": " << e.what() << endl;
+ }
+}
- void Journal::open() {
- verify( MongoFile::notifyPreFlush == preFlush );
- stdx::lock_guard<SimpleMutex> lk(_curLogFileMutex);
- _open();
+// find a prealloc.<n> file, presumably to take and use
+boost::filesystem::path findPrealloced() {
+ try {
+ for (int i = 0; i < NUM_PREALLOC_FILES; i++) {
+ boost::filesystem::path filepath = preallocPath(i);
+ if (boost::filesystem::exists(filepath))
+ return filepath;
}
+ } catch (const std::exception& e) {
+ log() << "warning exception in dur::findPrealloced(): " << e.what() << endl;
+ }
+ return boost::filesystem::path();
+}
- void LSNFile::set(unsigned long long x) {
- memset(this, 0, sizeof(*this));
- lsn = x;
- checkbytes = ~x;
- }
+/** assure journal/ dir exists. throws. call during startup. */
+void journalMakeDir() {
+ j.init();
- /** logs details of the situation, and returns 0, if anything surprising in the LSNFile
- if something highly surprising, throws to abort
- */
- unsigned long long LSNFile::get() {
- uassert(13614, str::stream() << "unexpected version number of lsn file in journal/ directory got: " << ver , ver == 0);
- if( ~lsn != checkbytes ) {
- log() << "lsnfile not valid. recovery will be from log start. lsn: " << hex << lsn << " checkbytes: " << hex << checkbytes << endl;
- return 0;
- }
- return lsn;
+ boost::filesystem::path p = getJournalDir();
+ j.dir = p.string();
+ log() << "journal dir=" << j.dir << endl;
+ if (!boost::filesystem::exists(j.dir)) {
+ try {
+ boost::filesystem::create_directory(j.dir);
+ } catch (std::exception& e) {
+ log() << "error creating directory " << j.dir << ' ' << e.what() << endl;
+ throw;
}
+ }
+}
- /** called during recovery (the error message text below assumes that)
- */
- unsigned long long journalReadLSN() {
- if( !exists(lsnPath()) ) {
- log() << "info no lsn file in journal/ directory" << endl;
- return 0;
- }
+void Journal::_open() {
+ _curFileId = 0;
+ verify(_curLogFile == 0);
+ boost::filesystem::path fname = getFilePathFor(_nextFileNumber);
+ // if we have a prealloced file, use it
+ {
+ boost::filesystem::path p = findPrealloced();
+ if (!p.empty()) {
try {
- // os can flush as it likes. if it flushes slowly, we will just do extra work on recovery.
- // however, given we actually close the file when writing, that seems unlikely.
- LSNFile L;
- File f;
- f.open(lsnPath().string().c_str());
- verify(f.is_open());
- if( f.len() == 0 ) {
- // this could be 'normal' if we crashed at the right moment
- log() << "info lsn file is zero bytes long" << endl;
- return 0;
+ {
+ // JHeader::fileId must be updated before renaming to be race-safe
+ LogFile f(p.string());
+ JHeader h(p.string());
+ AlignedBuilder b(8192);
+ b.appendStruct(h);
+ f.synchronousAppend(b.buf(), b.len());
}
- f.read(0,(char*)&L, sizeof(L));
- unsigned long long lsn = L.get();
- return lsn;
- }
- catch(std::exception& e) {
- uasserted(13611, str::stream() << "can't read lsn file in journal directory : " << e.what());
+ boost::filesystem::rename(p, fname);
+ } catch (const std::exception& e) {
+ log() << "warning couldn't write to / rename file " << p.string() << ": "
+ << e.what() << endl;
}
- return 0;
}
+ }
- unsigned long long getLastDataFileFlushTime() {
- return j.lastFlushTime();
- }
+ _curLogFile = new LogFile(fname.string());
+ _nextFileNumber++;
+ {
+ JHeader h(fname.string());
+ _curFileId = h.fileId;
+ verify(_curFileId);
+ AlignedBuilder b(8192);
+ b.appendStruct(h);
+ _curLogFile->synchronousAppend(b.buf(), b.len());
+ }
+}
- /** remember "last sequence number" to speed recoveries
- concurrency: called by durThread only.
- */
- void Journal::updateLSNFile() {
- if( !_writeToLSNNeeded )
- return;
- _writeToLSNNeeded = false;
- try {
- // os can flush as it likes. if it flushes slowly, we will just do extra work on recovery.
- // however, given we actually close the file, that seems unlikely.
- File f;
- f.open(lsnPath().string().c_str());
- if( !f.is_open() ) {
- // can get 0 if an i/o error
- log() << "warning: open of lsn file failed" << endl;
- return;
- }
- LOG(1) << "lsn set " << _lastFlushTime << endl;
- LSNFile lsnf;
- lsnf.set(_lastFlushTime);
- f.write(0, (char*)&lsnf, sizeof(lsnf));
- // do we want to fsync here? if we do it probably needs to be async so the durthread
- // is not delayed.
- }
- catch(std::exception& e) {
- log() << "warning: write to lsn file failed " << e.what() << endl;
- // keep running (ignore the error). recovery will be slow.
- }
- }
+void Journal::init() {
+ verify(_curLogFile == 0);
+ MongoFile::notifyPreFlush = preFlush;
+ MongoFile::notifyPostFlush = postFlush;
+}
- void Journal::preFlush() {
- j._preFlushTime = Listener::getElapsedTimeMillis();
- }
+void Journal::open() {
+ verify(MongoFile::notifyPreFlush == preFlush);
+ stdx::lock_guard<SimpleMutex> lk(_curLogFileMutex);
+ _open();
+}
- void Journal::postFlush() {
- j._lastFlushTime = j._preFlushTime;
- j._writeToLSNNeeded = true;
- }
+void LSNFile::set(unsigned long long x) {
+ memset(this, 0, sizeof(*this));
+ lsn = x;
+ checkbytes = ~x;
+}
- // call from within _curLogFileMutex
- void Journal::closeCurrentJournalFile() {
- if (!_curLogFile)
- return;
+/** logs details of the situation, and returns 0, if anything surprising in the LSNFile
+ if something highly surprising, throws to abort
+*/
+unsigned long long LSNFile::get() {
+ uassert(
+ 13614,
+ str::stream() << "unexpected version number of lsn file in journal/ directory got: " << ver,
+ ver == 0);
+ if (~lsn != checkbytes) {
+ log() << "lsnfile not valid. recovery will be from log start. lsn: " << hex << lsn
+ << " checkbytes: " << hex << checkbytes << endl;
+ return 0;
+ }
+ return lsn;
+}
- JFile jf;
- jf.filename = _curLogFile->_name;
- jf.lastEventTimeMs = Listener::getElapsedTimeMillis();
- _oldJournalFiles.push_back(jf);
+/** called during recovery (the error message text below assumes that)
+*/
+unsigned long long journalReadLSN() {
+ if (!exists(lsnPath())) {
+ log() << "info no lsn file in journal/ directory" << endl;
+ return 0;
+ }
- delete _curLogFile; // close
- _curLogFile = 0;
- _written = 0;
+ try {
+ // os can flush as it likes. if it flushes slowly, we will just do extra work on recovery.
+ // however, given we actually close the file when writing, that seems unlikely.
+ LSNFile L;
+ File f;
+ f.open(lsnPath().string().c_str());
+ verify(f.is_open());
+ if (f.len() == 0) {
+ // this could be 'normal' if we crashed at the right moment
+ log() << "info lsn file is zero bytes long" << endl;
+ return 0;
}
+ f.read(0, (char*)&L, sizeof(L));
+ unsigned long long lsn = L.get();
+ return lsn;
+ } catch (std::exception& e) {
+ uasserted(13611,
+ str::stream() << "can't read lsn file in journal directory : " << e.what());
+ }
+ return 0;
+}
- /** remove older journal files.
- be in _curLogFileMutex but not dbMutex when calling
- */
- void Journal::removeUnneededJournalFiles() {
- while( !_oldJournalFiles.empty() ) {
- JFile f = _oldJournalFiles.front();
-
- if( f.lastEventTimeMs < _lastFlushTime + ExtraKeepTimeMs ) {
- // eligible for deletion
- boost::filesystem::path p( f.filename );
- log() << "old journal file will be removed: " << f.filename << endl;
- removeOldJournalFile(p);
- }
- else {
- break;
- }
+unsigned long long getLastDataFileFlushTime() {
+ return j.lastFlushTime();
+}
- _oldJournalFiles.pop_front();
- }
- }
+/** remember "last sequence number" to speed recoveries
+ concurrency: called by durThread only.
+*/
+void Journal::updateLSNFile() {
+ if (!_writeToLSNNeeded)
+ return;
+ _writeToLSNNeeded = false;
+ try {
+ // os can flush as it likes. if it flushes slowly, we will just do extra work on recovery.
+ // however, given we actually close the file, that seems unlikely.
+ File f;
+ f.open(lsnPath().string().c_str());
+ if (!f.is_open()) {
+ // can get 0 if an i/o error
+ log() << "warning: open of lsn file failed" << endl;
+ return;
+ }
+ LOG(1) << "lsn set " << _lastFlushTime << endl;
+ LSNFile lsnf;
+ lsnf.set(_lastFlushTime);
+ f.write(0, (char*)&lsnf, sizeof(lsnf));
+ // do we want to fsync here? if we do it probably needs to be async so the durthread
+ // is not delayed.
+ } catch (std::exception& e) {
+ log() << "warning: write to lsn file failed " << e.what() << endl;
+ // keep running (ignore the error). recovery will be slow.
+ }
+}
- void Journal::_rotate() {
+void Journal::preFlush() {
+ j._preFlushTime = Listener::getElapsedTimeMillis();
+}
- if ( inShutdown() || !_curLogFile )
- return;
+void Journal::postFlush() {
+ j._lastFlushTime = j._preFlushTime;
+ j._writeToLSNNeeded = true;
+}
- j.updateLSNFile();
+// call from within _curLogFileMutex
+void Journal::closeCurrentJournalFile() {
+ if (!_curLogFile)
+ return;
- if( _curLogFile && _written < DataLimitPerJournalFile )
- return;
+ JFile jf;
+ jf.filename = _curLogFile->_name;
+ jf.lastEventTimeMs = Listener::getElapsedTimeMillis();
+ _oldJournalFiles.push_back(jf);
- if( _curLogFile ) {
- _curLogFile->truncate();
- closeCurrentJournalFile();
- removeUnneededJournalFiles();
- }
+ delete _curLogFile; // close
+ _curLogFile = 0;
+ _written = 0;
+}
- try {
- Timer t;
- _open();
- int ms = t.millis();
- if( ms >= 200 ) {
- log() << "DR101 latency warning on journal file open " << ms << "ms" << endl;
- }
- }
- catch(std::exception& e) {
- log() << "warning exception opening journal file " << e.what() << endl;
- throw;
- }
- }
+/** remove older journal files.
+ be in _curLogFileMutex but not dbMutex when calling
+*/
+void Journal::removeUnneededJournalFiles() {
+ while (!_oldJournalFiles.empty()) {
+ JFile f = _oldJournalFiles.front();
- /** write (append) the buffer we have built to the journal and fsync it.
- outside of dbMutex lock as this could be slow.
- @param uncompressed - a buffer that will be written to the journal after compression
- will not return until on disk
- */
- void WRITETOJOURNAL(const JSectHeader& h, const AlignedBuilder& uncompressed) {
- Timer t;
- j.journal(h, uncompressed);
- stats.curr()->_writeToJournalMicros += t.micros();
+ if (f.lastEventTimeMs < _lastFlushTime + ExtraKeepTimeMs) {
+ // eligible for deletion
+ boost::filesystem::path p(f.filename);
+ log() << "old journal file will be removed: " << f.filename << endl;
+ removeOldJournalFile(p);
+ } else {
+ break;
}
- void Journal::journal(const JSectHeader& h, const AlignedBuilder& uncompressed) {
- static AlignedBuilder b(32*1024*1024);
- /* buffer to journal will be
- JSectHeader
- compressed operations
- JSectFooter
- */
- const unsigned headTailSize = sizeof(JSectHeader) + sizeof(JSectFooter);
- const unsigned max = maxCompressedLength(uncompressed.len()) + headTailSize;
- b.reset(max);
-
- {
- dassert( h.sectionLen() == (unsigned) 0xffffffff ); // we will backfill later
- b.appendStruct(h);
- }
+ _oldJournalFiles.pop_front();
+ }
+}
- size_t compressedLength = 0;
- rawCompress(uncompressed.buf(), uncompressed.len(), b.cur(), &compressedLength);
- verify( compressedLength < 0xffffffff );
- verify( compressedLength < max );
- b.skip(compressedLength);
-
- // footer
- unsigned L = 0xffffffff;
- {
- // pad to alignment, and set the total section length in the JSectHeader
- verify( 0xffffe000 == (~(Alignment-1)) );
- unsigned lenUnpadded = b.len() + sizeof(JSectFooter);
- L = (lenUnpadded + Alignment-1) & (~(Alignment-1));
- dassert( L >= lenUnpadded );
-
- ((JSectHeader*)b.atOfs(0))->setSectionLen(lenUnpadded);
-
- JSectFooter f(b.buf(), b.len()); // computes checksum
- b.appendStruct(f);
- dassert( b.len() == lenUnpadded );
-
- b.skip(L - lenUnpadded);
- dassert( b.len() % Alignment == 0 );
- }
+void Journal::_rotate() {
+ if (inShutdown() || !_curLogFile)
+ return;
- try {
- stdx::lock_guard<SimpleMutex> lk(_curLogFileMutex);
-
- // must already be open -- so that _curFileId is correct for previous buffer building
- verify( _curLogFile );
-
- stats.curr()->_uncompressedBytes += uncompressed.len();
- unsigned w = b.len();
- _written += w;
- verify( w <= L );
- stats.curr()->_journaledBytes += L;
- _curLogFile->synchronousAppend((const void *) b.buf(), L);
- _rotate();
- }
- catch(std::exception& e) {
- log() << "error exception in dur::journal " << e.what() << endl;
- throw;
- }
+ j.updateLSNFile();
+
+ if (_curLogFile && _written < DataLimitPerJournalFile)
+ return;
+
+ if (_curLogFile) {
+ _curLogFile->truncate();
+ closeCurrentJournalFile();
+ removeUnneededJournalFiles();
+ }
+
+ try {
+ Timer t;
+ _open();
+ int ms = t.millis();
+ if (ms >= 200) {
+ log() << "DR101 latency warning on journal file open " << ms << "ms" << endl;
}
+ } catch (std::exception& e) {
+ log() << "warning exception opening journal file " << e.what() << endl;
+ throw;
+ }
+}
+
+/** write (append) the buffer we have built to the journal and fsync it.
+ outside of dbMutex lock as this could be slow.
+ @param uncompressed - a buffer that will be written to the journal after compression
+ will not return until on disk
+*/
+void WRITETOJOURNAL(const JSectHeader& h, const AlignedBuilder& uncompressed) {
+ Timer t;
+ j.journal(h, uncompressed);
+ stats.curr()->_writeToJournalMicros += t.micros();
+}
+void Journal::journal(const JSectHeader& h, const AlignedBuilder& uncompressed) {
+ static AlignedBuilder b(32 * 1024 * 1024);
+ /* buffer to journal will be
+ JSectHeader
+ compressed operations
+ JSectFooter
+ */
+ const unsigned headTailSize = sizeof(JSectHeader) + sizeof(JSectFooter);
+ const unsigned max = maxCompressedLength(uncompressed.len()) + headTailSize;
+ b.reset(max);
+
+ {
+ dassert(h.sectionLen() == (unsigned)0xffffffff); // we will backfill later
+ b.appendStruct(h);
}
+
+ size_t compressedLength = 0;
+ rawCompress(uncompressed.buf(), uncompressed.len(), b.cur(), &compressedLength);
+ verify(compressedLength < 0xffffffff);
+ verify(compressedLength < max);
+ b.skip(compressedLength);
+
+ // footer
+ unsigned L = 0xffffffff;
+ {
+ // pad to alignment, and set the total section length in the JSectHeader
+ verify(0xffffe000 == (~(Alignment - 1)));
+ unsigned lenUnpadded = b.len() + sizeof(JSectFooter);
+ L = (lenUnpadded + Alignment - 1) & (~(Alignment - 1));
+ dassert(L >= lenUnpadded);
+
+ ((JSectHeader*)b.atOfs(0))->setSectionLen(lenUnpadded);
+
+ JSectFooter f(b.buf(), b.len()); // computes checksum
+ b.appendStruct(f);
+ dassert(b.len() == lenUnpadded);
+
+ b.skip(L - lenUnpadded);
+ dassert(b.len() % Alignment == 0);
+ }
+
+ try {
+ stdx::lock_guard<SimpleMutex> lk(_curLogFileMutex);
+
+ // must already be open -- so that _curFileId is correct for previous buffer building
+ verify(_curLogFile);
+
+ stats.curr()->_uncompressedBytes += uncompressed.len();
+ unsigned w = b.len();
+ _written += w;
+ verify(w <= L);
+ stats.curr()->_journaledBytes += L;
+ _curLogFile->synchronousAppend((const void*)b.buf(), L);
+ _rotate();
+ } catch (std::exception& e) {
+ log() << "error exception in dur::journal " << e.what() << endl;
+ throw;
+ }
+}
+}
}
diff --git a/src/mongo/db/storage/mmap_v1/dur_journal.h b/src/mongo/db/storage/mmap_v1/dur_journal.h
index 469732c59a2..07def586090 100644
--- a/src/mongo/db/storage/mmap_v1/dur_journal.h
+++ b/src/mongo/db/storage/mmap_v1/dur_journal.h
@@ -32,59 +32,58 @@
namespace mongo {
- class AlignedBuilder;
- class JSectHeader;
+class AlignedBuilder;
+class JSectHeader;
- namespace dur {
+namespace dur {
- /** true if ok to cleanup journal files at termination. otherwise, files journal will be retained.
- */
- extern bool okToCleanUp;
-
- /** at termination after db files closed & fsynced
- also after recovery
- closes and removes journal files
- @param log report in log that we are cleaning up if we actually do any work
- */
- void journalCleanup(bool log = false);
+/** true if ok to cleanup journal files at termination. otherwise, files journal will be retained.
+*/
+extern bool okToCleanUp;
- /** assure journal/ dir exists. throws */
- void journalMakeDir();
+/** at termination after db files closed & fsynced
+ also after recovery
+ closes and removes journal files
+ @param log report in log that we are cleaning up if we actually do any work
+*/
+void journalCleanup(bool log = false);
- /** check if time to rotate files; assure a file is open.
- done separately from the journal() call as we can do this part
- outside of lock.
- only called by durThread.
- */
- void journalRotate();
+/** assure journal/ dir exists. throws */
+void journalMakeDir();
- /** flag that something has gone wrong during writing to the journal
- (not for recovery mode)
- */
- void journalingFailure(const char *msg);
+/** check if time to rotate files; assure a file is open.
+ done separately from the journal() call as we can do this part
+ outside of lock.
+ only called by durThread.
+ */
+void journalRotate();
- /** read lsn from disk from the last run before doing recovery */
- unsigned long long journalReadLSN();
+/** flag that something has gone wrong during writing to the journal
+ (not for recovery mode)
+*/
+void journalingFailure(const char* msg);
- unsigned long long getLastDataFileFlushTime();
+/** read lsn from disk from the last run before doing recovery */
+unsigned long long journalReadLSN();
- /** never throws.
- @param anyFiles by default we only look at j._* files. If anyFiles is true, return true
- if there are any files in the journal directory. acquirePathLock() uses this to
- make sure that the journal directory is mounted.
- @return true if there are any journal files in the journal dir.
- */
- bool haveJournalFiles(bool anyFiles=false);
+unsigned long long getLastDataFileFlushTime();
- /**
- * Writes the specified uncompressed buffer to the journal.
- */
- void WRITETOJOURNAL(const JSectHeader& h, const AlignedBuilder& uncompressed);
+/** never throws.
+ @param anyFiles by default we only look at j._* files. If anyFiles is true, return true
+ if there are any files in the journal directory. acquirePathLock() uses this to
+ make sure that the journal directory is mounted.
+ @return true if there are any journal files in the journal dir.
+*/
+bool haveJournalFiles(bool anyFiles = false);
- // in case disk controller buffers writes
- const long long ExtraKeepTimeMs = 10000;
+/**
+ * Writes the specified uncompressed buffer to the journal.
+ */
+void WRITETOJOURNAL(const JSectHeader& h, const AlignedBuilder& uncompressed);
- const unsigned JournalCommitIntervalDefault = 100;
+// in case disk controller buffers writes
+const long long ExtraKeepTimeMs = 10000;
- }
+const unsigned JournalCommitIntervalDefault = 100;
+}
}
diff --git a/src/mongo/db/storage/mmap_v1/dur_journal_writer.cpp b/src/mongo/db/storage/mmap_v1/dur_journal_writer.cpp
index 4c6eb8ec8cc..971f2aa0e60 100644
--- a/src/mongo/db/storage/mmap_v1/dur_journal_writer.cpp
+++ b/src/mongo/db/storage/mmap_v1/dur_journal_writer.cpp
@@ -47,268 +47,251 @@ namespace dur {
namespace {
- /**
- * Apply the writes back to the non-private MMF after they are for certain in the journal.
- *
- * (1) TODO we don't need to write back everything every group commit. We MUST write back that
- * which is going to be a remapped on its private view - but that might not be all views.
- *
- * (2) TODO should we do this using N threads? Would be quite easy see Hackenberg paper table
- * 5 and 6. 2 threads might be a good balance.
- */
- void WRITETODATAFILES(const JSectHeader& h, const AlignedBuilder& uncompressed) {
- Timer t;
-
- LOG(4) << "WRITETODATAFILES BEGIN";
-
- RecoveryJob::get().processSection(&h, uncompressed.buf(), uncompressed.len(), NULL);
-
- const long long m = t.micros();
- stats.curr()->_writeToDataFilesMicros += m;
-
- LOG(4) << "journal WRITETODATAFILES " << m / 1000.0 << "ms";
- }
-
-} // namespace
-
+/**
+ * Apply the writes back to the non-private MMF after they are for certain in the journal.
+ *
+ * (1) TODO we don't need to write back everything every group commit. We MUST write back that
+ * which is going to be a remapped on its private view - but that might not be all views.
+ *
+ * (2) TODO should we do this using N threads? Would be quite easy see Hackenberg paper table
+ * 5 and 6. 2 threads might be a good balance.
+ */
+void WRITETODATAFILES(const JSectHeader& h, const AlignedBuilder& uncompressed) {
+ Timer t;
- /**
- * Used inside the journal writer thread to ensure that used buffers are cleaned up properly.
- */
- class BufferGuard {
- MONGO_DISALLOW_COPYING(BufferGuard);
- public:
- BufferGuard(JournalWriter::Buffer* buffer, JournalWriter::BufferQueue* bufferQueue)
- : _buffer(buffer),
- _bufferQueue(bufferQueue) {
+ LOG(4) << "WRITETODATAFILES BEGIN";
- }
+ RecoveryJob::get().processSection(&h, uncompressed.buf(), uncompressed.len(), NULL);
- ~BufferGuard() {
- // This buffer is done. Reset and remove it from the journal queue and put it on
- // the ready queue.
- _buffer->_reset();
+ const long long m = t.micros();
+ stats.curr()->_writeToDataFilesMicros += m;
- // This should never block. Otherwise we will stall the journaling pipeline
- // permanently and cause deadlock.
- invariant(_bufferQueue->count() < _bufferQueue->maxSize());
- _bufferQueue->push(_buffer);
- }
+ LOG(4) << "journal WRITETODATAFILES " << m / 1000.0 << "ms";
+}
- private:
- // Buffer that this scoped object is managing. Owned until destruction time. Then, the
- // bufferQueue owns it.
- JournalWriter::Buffer* const _buffer;
+} // namespace
- // Queue where the buffer should be returned to at destruction time. Not owned.
- JournalWriter::BufferQueue* const _bufferQueue;
- };
+/**
+ * Used inside the journal writer thread to ensure that used buffers are cleaned up properly.
+ */
+class BufferGuard {
+ MONGO_DISALLOW_COPYING(BufferGuard);
+
+public:
+ BufferGuard(JournalWriter::Buffer* buffer, JournalWriter::BufferQueue* bufferQueue)
+ : _buffer(buffer), _bufferQueue(bufferQueue) {}
+
+ ~BufferGuard() {
+ // This buffer is done. Reset and remove it from the journal queue and put it on
+ // the ready queue.
+ _buffer->_reset();
+
+ // This should never block. Otherwise we will stall the journaling pipeline
+ // permanently and cause deadlock.
+ invariant(_bufferQueue->count() < _bufferQueue->maxSize());
+ _bufferQueue->push(_buffer);
+ }
- //
- // JournalWriter
- //
+private:
+ // Buffer that this scoped object is managing. Owned until destruction time. Then, the
+ // bufferQueue owns it.
+ JournalWriter::Buffer* const _buffer;
+
+ // Queue where the buffer should be returned to at destruction time. Not owned.
+ JournalWriter::BufferQueue* const _bufferQueue;
+};
+
+
+//
+// JournalWriter
+//
+
+JournalWriter::JournalWriter(NotifyAll* commitNotify,
+ NotifyAll* applyToDataFilesNotify,
+ size_t numBuffers)
+ : _commitNotify(commitNotify),
+ _applyToDataFilesNotify(applyToDataFilesNotify),
+ _shutdownRequested(false),
+ _journalQueue(numBuffers),
+ _lastCommitNumber(0),
+ _readyQueue(numBuffers) {
+ invariant(_journalQueue.maxSize() == _readyQueue.maxSize());
+}
+
+JournalWriter::~JournalWriter() {
+ // Never close the journal writer with outstanding or unaccounted writes
+ invariant(_journalQueue.empty());
+ invariant(_readyQueue.empty());
+}
+
+void JournalWriter::start() {
+ // Do not allow reuse
+ invariant(!_shutdownRequested);
+
+ // Pre-allocate the journal buffers and push them on the ready queue
+ for (size_t i = 0; i < _readyQueue.maxSize(); i++) {
+ _readyQueue.push(new Buffer(InitialBufferSizeBytes));
+ }
- JournalWriter::JournalWriter(NotifyAll* commitNotify,
- NotifyAll* applyToDataFilesNotify,
- size_t numBuffers)
- : _commitNotify(commitNotify),
- _applyToDataFilesNotify(applyToDataFilesNotify),
- _shutdownRequested(false),
- _journalQueue(numBuffers),
- _lastCommitNumber(0),
- _readyQueue(numBuffers) {
+ // Start the thread
+ stdx::thread t(stdx::bind(&JournalWriter::_journalWriterThread, this));
+ _journalWriterThreadHandle.swap(t);
+}
- invariant(_journalQueue.maxSize() == _readyQueue.maxSize());
- }
+void JournalWriter::shutdown() {
+ // There is no reason to call shutdown multiple times
+ invariant(!_shutdownRequested);
+ _shutdownRequested = true;
- JournalWriter::~JournalWriter() {
- // Never close the journal writer with outstanding or unaccounted writes
- invariant(_journalQueue.empty());
- invariant(_readyQueue.empty());
- }
+ // Never terminate the journal writer with outstanding or unaccounted writes
+ assertIdle();
- void JournalWriter::start() {
- // Do not allow reuse
- invariant(!_shutdownRequested);
+ Buffer* const shutdownBuffer = newBuffer();
+ shutdownBuffer->_setShutdown();
- // Pre-allocate the journal buffers and push them on the ready queue
- for (size_t i = 0; i < _readyQueue.maxSize(); i++) {
- _readyQueue.push(new Buffer(InitialBufferSizeBytes));
- }
+ // This will terminate the journal thread. No need to specify commit number, since we are
+ // shutting down and nothing will be notified anyways.
+ writeBuffer(shutdownBuffer, 0);
- // Start the thread
- stdx::thread t(stdx::bind(&JournalWriter::_journalWriterThread, this));
- _journalWriterThreadHandle.swap(t);
- }
+ // Ensure the journal thread has stopped and everything accounted for.
+ _journalWriterThreadHandle.join();
+ assertIdle();
- void JournalWriter::shutdown() {
- // There is no reason to call shutdown multiple times
- invariant(!_shutdownRequested);
- _shutdownRequested = true;
-
- // Never terminate the journal writer with outstanding or unaccounted writes
- assertIdle();
-
- Buffer* const shutdownBuffer = newBuffer();
- shutdownBuffer->_setShutdown();
-
- // This will terminate the journal thread. No need to specify commit number, since we are
- // shutting down and nothing will be notified anyways.
- writeBuffer(shutdownBuffer, 0);
-
- // Ensure the journal thread has stopped and everything accounted for.
- _journalWriterThreadHandle.join();
- assertIdle();
-
- // Delete the buffers (this deallocates the journal buffer memory)
- while (!_readyQueue.empty()) {
- Buffer* const buffer = _readyQueue.blockingPop();
- delete buffer;
- }
+ // Delete the buffers (this deallocates the journal buffer memory)
+ while (!_readyQueue.empty()) {
+ Buffer* const buffer = _readyQueue.blockingPop();
+ delete buffer;
}
+}
- void JournalWriter::assertIdle() {
- // All buffers are in the ready queue means there is nothing pending.
- invariant(_journalQueue.empty());
- invariant(_readyQueue.count() == _readyQueue.maxSize());
- }
+void JournalWriter::assertIdle() {
+ // All buffers are in the ready queue means there is nothing pending.
+ invariant(_journalQueue.empty());
+ invariant(_readyQueue.count() == _readyQueue.maxSize());
+}
- JournalWriter::Buffer* JournalWriter::newBuffer() {
- Buffer* const buffer = _readyQueue.blockingPop();
- buffer->_assertEmpty();
+JournalWriter::Buffer* JournalWriter::newBuffer() {
+ Buffer* const buffer = _readyQueue.blockingPop();
+ buffer->_assertEmpty();
- return buffer;
- }
+ return buffer;
+}
- void JournalWriter::writeBuffer(Buffer* buffer, NotifyAll::When commitNumber) {
- invariant(buffer->_commitNumber == 0);
- invariant((commitNumber > _lastCommitNumber) ||
- (buffer->_isShutdown && (commitNumber == 0)));
+void JournalWriter::writeBuffer(Buffer* buffer, NotifyAll::When commitNumber) {
+ invariant(buffer->_commitNumber == 0);
+ invariant((commitNumber > _lastCommitNumber) || (buffer->_isShutdown && (commitNumber == 0)));
- buffer->_commitNumber = commitNumber;
+ buffer->_commitNumber = commitNumber;
- _journalQueue.push(buffer);
- }
+ _journalQueue.push(buffer);
+}
- void JournalWriter::flush() {
- std::vector<Buffer*> buffers;
+void JournalWriter::flush() {
+ std::vector<Buffer*> buffers;
- // Pop the expected number of buffers from the ready queue. This will block until all
- // in-progress buffers have completed.
- for (size_t i = 0; i < _readyQueue.maxSize(); i++) {
- buffers.push_back(_readyQueue.blockingPop());
- }
+ // Pop the expected number of buffers from the ready queue. This will block until all
+ // in-progress buffers have completed.
+ for (size_t i = 0; i < _readyQueue.maxSize(); i++) {
+ buffers.push_back(_readyQueue.blockingPop());
+ }
- // Put them back in to restore the original state.
- for (size_t i = 0; i < buffers.size(); i++) {
- _readyQueue.push(buffers[i]);
- }
+ // Put them back in to restore the original state.
+ for (size_t i = 0; i < buffers.size(); i++) {
+ _readyQueue.push(buffers[i]);
}
+}
- void JournalWriter::_journalWriterThread() {
- Client::initThread("journal writer");
+void JournalWriter::_journalWriterThread() {
+ Client::initThread("journal writer");
- log() << "Journal writer thread started";
+ log() << "Journal writer thread started";
- try {
- while (true) {
- Buffer* const buffer = _journalQueue.blockingPop();
- BufferGuard bufferGuard(buffer, &_readyQueue);
+ try {
+ while (true) {
+ Buffer* const buffer = _journalQueue.blockingPop();
+ BufferGuard bufferGuard(buffer, &_readyQueue);
- if (buffer->_isShutdown) {
- invariant(buffer->_builder.len() == 0);
+ if (buffer->_isShutdown) {
+ invariant(buffer->_builder.len() == 0);
- // The journal writer thread is terminating. Nothing to notify or write.
- break;
- }
+ // The journal writer thread is terminating. Nothing to notify or write.
+ break;
+ }
- if (buffer->_isNoop) {
- invariant(buffer->_builder.len() == 0);
+ if (buffer->_isNoop) {
+ invariant(buffer->_builder.len() == 0);
- // There's nothing to be writen, but we still need to notify this commit number
- _commitNotify->notifyAll(buffer->_commitNumber);
- _applyToDataFilesNotify->notifyAll(buffer->_commitNumber);
- continue;
- }
+ // There's nothing to be writen, but we still need to notify this commit number
+ _commitNotify->notifyAll(buffer->_commitNumber);
+ _applyToDataFilesNotify->notifyAll(buffer->_commitNumber);
+ continue;
+ }
- LOG(4) << "Journaling commit number " << buffer->_commitNumber
- << " (journal file " << buffer->_header.fileId
- << ", sequence " << buffer->_header.seqNumber
- << ", size " << buffer->_builder.len() << " bytes)";
+ LOG(4) << "Journaling commit number " << buffer->_commitNumber << " (journal file "
+ << buffer->_header.fileId << ", sequence " << buffer->_header.seqNumber
+ << ", size " << buffer->_builder.len() << " bytes)";
- // This performs synchronous I/O to the journal file and will block.
- WRITETOJOURNAL(buffer->_header, buffer->_builder);
+ // This performs synchronous I/O to the journal file and will block.
+ WRITETOJOURNAL(buffer->_header, buffer->_builder);
- // Data is now persisted in the journal, which is sufficient for acknowledging
- // getLastError
- _commitNotify->notifyAll(buffer->_commitNumber);
+ // Data is now persisted in the journal, which is sufficient for acknowledging
+ // getLastError
+ _commitNotify->notifyAll(buffer->_commitNumber);
- // Apply the journal entries on top of the shared view so that when flush is
- // requested it would write the latest.
- WRITETODATAFILES(buffer->_header, buffer->_builder);
+ // Apply the journal entries on top of the shared view so that when flush is
+ // requested it would write the latest.
+ WRITETODATAFILES(buffer->_header, buffer->_builder);
- // Data is now persisted on the shared view, so notify any potential journal file
- // cleanup waiters.
- _applyToDataFilesNotify->notifyAll(buffer->_commitNumber);
- }
+ // Data is now persisted on the shared view, so notify any potential journal file
+ // cleanup waiters.
+ _applyToDataFilesNotify->notifyAll(buffer->_commitNumber);
}
- catch (const DBException& e) {
- severe() << "dbexception in journalWriterThread causing immediate shutdown: "
- << e.toString();
- invariant(false);
- }
- catch (const std::ios_base::failure& e) {
- severe() << "ios_base exception in journalWriterThread causing immediate shutdown: "
- << e.what();
- invariant(false);
- }
- catch (const std::bad_alloc& e) {
- severe() << "bad_alloc exception in journalWriterThread causing immediate shutdown: "
- << e.what();
- invariant(false);
- }
- catch (const std::exception& e) {
- severe() << "exception in journalWriterThread causing immediate shutdown: "
- << e.what();
- invariant(false);
- }
- catch (...) {
- severe() << "unhandled exception in journalWriterThread causing immediate shutdown";
- invariant(false);
- }
-
- log() << "Journal writer thread stopped";
+ } catch (const DBException& e) {
+ severe() << "dbexception in journalWriterThread causing immediate shutdown: "
+ << e.toString();
+ invariant(false);
+ } catch (const std::ios_base::failure& e) {
+ severe() << "ios_base exception in journalWriterThread causing immediate shutdown: "
+ << e.what();
+ invariant(false);
+ } catch (const std::bad_alloc& e) {
+ severe() << "bad_alloc exception in journalWriterThread causing immediate shutdown: "
+ << e.what();
+ invariant(false);
+ } catch (const std::exception& e) {
+ severe() << "exception in journalWriterThread causing immediate shutdown: " << e.what();
+ invariant(false);
+ } catch (...) {
+ severe() << "unhandled exception in journalWriterThread causing immediate shutdown";
+ invariant(false);
}
+ log() << "Journal writer thread stopped";
+}
- //
- // Buffer
- //
-
- JournalWriter::Buffer::Buffer(size_t initialSize)
- : _commitNumber(0),
- _isNoop(false),
- _isShutdown(false),
- _header(),
- _builder(initialSize) {
- }
+//
+// Buffer
+//
- JournalWriter::Buffer::~Buffer() {
- _assertEmpty();
- }
+JournalWriter::Buffer::Buffer(size_t initialSize)
+ : _commitNumber(0), _isNoop(false), _isShutdown(false), _header(), _builder(initialSize) {}
- void JournalWriter::Buffer::_assertEmpty() {
- invariant(_commitNumber == 0);
- invariant(_builder.len() == 0);
- }
+JournalWriter::Buffer::~Buffer() {
+ _assertEmpty();
+}
- void JournalWriter::Buffer::_reset() {
- _commitNumber = 0;
- _isNoop = false;
- _builder.reset();
- }
+void JournalWriter::Buffer::_assertEmpty() {
+ invariant(_commitNumber == 0);
+ invariant(_builder.len() == 0);
+}
+
+void JournalWriter::Buffer::_reset() {
+ _commitNumber = 0;
+ _isNoop = false;
+ _builder.reset();
+}
-} // namespace dur
-} // namespace mongo
+} // namespace dur
+} // namespace mongo
diff --git a/src/mongo/db/storage/mmap_v1/dur_journal_writer.h b/src/mongo/db/storage/mmap_v1/dur_journal_writer.h
index 6ac91de6532..2f738cbb380 100644
--- a/src/mongo/db/storage/mmap_v1/dur_journal_writer.h
+++ b/src/mongo/db/storage/mmap_v1/dur_journal_writer.h
@@ -38,150 +38,158 @@
namespace mongo {
namespace dur {
+/**
+ * Manages the thread and queues used for writing the journal to disk and notify parties with
+ * are waiting on the write concern.
+ *
+ * NOTE: Not thread-safe and must not be used from more than one thread.
+ */
+class JournalWriter {
+ MONGO_DISALLOW_COPYING(JournalWriter);
+
+public:
/**
- * Manages the thread and queues used for writing the journal to disk and notify parties with
- * are waiting on the write concern.
- *
- * NOTE: Not thread-safe and must not be used from more than one thread.
+ * Stores the memory and the header for a complete journal buffer which is pending to be
+ * written by the journal writer thread.
*/
- class JournalWriter {
- MONGO_DISALLOW_COPYING(JournalWriter);
+ class Buffer {
public:
+ Buffer(size_t initialSize);
+ ~Buffer();
+
+ JSectHeader& getHeader() {
+ return _header;
+ }
+ AlignedBuilder& getBuilder() {
+ return _builder;
+ }
- /**
- * Stores the memory and the header for a complete journal buffer which is pending to be
- * written by the journal writer thread.
- */
- class Buffer {
- public:
- Buffer(size_t initialSize);
- ~Buffer();
-
- JSectHeader& getHeader() { return _header; }
- AlignedBuilder& getBuilder() { return _builder; }
-
- void setNoop() { _isNoop = true; }
-
- private:
- friend class BufferGuard;
- friend class JournalWriter;
-
-
- void _assertEmpty();
- void _reset();
- void _setShutdown() { _isShutdown = true; }
-
- // Specifies the commit number which flushing this buffer would notify. This value is
- // zero, if there is no data to be flushed or if the buffer is noop/shutdown.
- NotifyAll::When _commitNumber;
-
- // Special buffer that's posted when there is nothing to be written to the journal,
- // but we want to order a notification so it happens after all other writes have
- // completed.
- bool _isNoop;
-
- // Special buffer that's posted when the receiving thread must terminate. This should
- // be the last entry posted to the queue and the commit number should be zero.
- bool _isShutdown;
-
- JSectHeader _header;
- AlignedBuilder _builder;
- };
-
-
- /**
- * Initializes the journal writer.
- *
- * @param commitNotify Notification object to be called after journal entries have been
- * written to disk. The caller retains ownership and the notify object must outlive
- * the journal writer object.
- * @param applyToDataFilesNotify Notification object to be called after journal entries
- * have been applied to the shared view. This means that if the shared view were to be
- * flushed at this point, the journal files before this point are not necessary. The
- * caller retains ownership and the notify object must outlive the journal writer
- * object.
- * @param numBuffers How many buffers to create to hold outstanding writes. If there are
- * more than this number of journal writes that have not completed, the write calls
- * will block.
- */
- JournalWriter(NotifyAll* commitNotify, NotifyAll* applyToDataFilesNotify, size_t numBuffers);
- ~JournalWriter();
-
- /**
- * Allocates buffer memory and starts the journal writer thread.
- */
- void start();
-
- /**
- * Terminates the journal writer thread and frees memory for the buffers. Must not be
- * called if there are any pending journal writes.
- */
- void shutdown();
-
- /**
- * Asserts that there are no pending journal writes.
- */
- void assertIdle();
-
- /**
- * Obtains a new empty buffer into which a journal entry should be written.
- *
- * This method may block if there are no free buffers.
- *
- * The caller does not own the buffer and needs to "return" it to the writer by calling
- * writeBuffer. Buffers with data on them should never be discarded until they are written.
- */
- Buffer* newBuffer();
-
- /**
- * Requests that the specified buffer be written asynchronously.
- *
- * This method may block if there are too many outstanding unwritten buffers.
- *
- * @param buffer Buffer entry to be written. The buffer object must not be used anymore
- * after it has been given to this function.
- * @param commitNumber What commit number to be notified once the buffer has been written
- * to disk.
- */
- void writeBuffer(Buffer* buffer, NotifyAll::When commitNumber);
-
- /**
- * Ensures that all previously submitted write requests complete. This call is blocking.
- */
- void flush();
+ void setNoop() {
+ _isNoop = true;
+ }
private:
friend class BufferGuard;
+ friend class JournalWriter;
- typedef BlockingQueue<Buffer*> BufferQueue;
- // Start all buffers with 4MB of size
- enum { InitialBufferSizeBytes = 4 * 1024 * 1024 };
+ void _assertEmpty();
+ void _reset();
+ void _setShutdown() {
+ _isShutdown = true;
+ }
+ // Specifies the commit number which flushing this buffer would notify. This value is
+ // zero, if there is no data to be flushed or if the buffer is noop/shutdown.
+ NotifyAll::When _commitNumber;
- void _journalWriterThread();
+ // Special buffer that's posted when there is nothing to be written to the journal,
+ // but we want to order a notification so it happens after all other writes have
+ // completed.
+ bool _isNoop;
+ // Special buffer that's posted when the receiving thread must terminate. This should
+ // be the last entry posted to the queue and the commit number should be zero.
+ bool _isShutdown;
- // This gets notified as journal buffers are written. It is not owned and needs to outlive
- // the journal writer object.
- NotifyAll* const _commitNotify;
+ JSectHeader _header;
+ AlignedBuilder _builder;
+ };
- // This gets notified as journal buffers are done being applied to the shared view
- NotifyAll* const _applyToDataFilesNotify;
- // Wraps and controls the journal writer thread
- stdx::thread _journalWriterThreadHandle;
+ /**
+ * Initializes the journal writer.
+ *
+ * @param commitNotify Notification object to be called after journal entries have been
+ * written to disk. The caller retains ownership and the notify object must outlive
+ * the journal writer object.
+ * @param applyToDataFilesNotify Notification object to be called after journal entries
+ * have been applied to the shared view. This means that if the shared view were to be
+ * flushed at this point, the journal files before this point are not necessary. The
+ * caller retains ownership and the notify object must outlive the journal writer
+ * object.
+ * @param numBuffers How many buffers to create to hold outstanding writes. If there are
+ * more than this number of journal writes that have not completed, the write calls
+ * will block.
+ */
+ JournalWriter(NotifyAll* commitNotify, NotifyAll* applyToDataFilesNotify, size_t numBuffers);
+ ~JournalWriter();
- // Indicates that shutdown has been requested. Used for idempotency of the shutdown call.
- bool _shutdownRequested;
+ /**
+ * Allocates buffer memory and starts the journal writer thread.
+ */
+ void start();
- // Queue of buffers, which need to be written by the journal writer thread
- BufferQueue _journalQueue;
- NotifyAll::When _lastCommitNumber;
+ /**
+ * Terminates the journal writer thread and frees memory for the buffers. Must not be
+ * called if there are any pending journal writes.
+ */
+ void shutdown();
- // Queue of buffers, whose write has been completed by the journal writer thread.
- BufferQueue _readyQueue;
- };
+ /**
+ * Asserts that there are no pending journal writes.
+ */
+ void assertIdle();
+
+ /**
+ * Obtains a new empty buffer into which a journal entry should be written.
+ *
+ * This method may block if there are no free buffers.
+ *
+ * The caller does not own the buffer and needs to "return" it to the writer by calling
+ * writeBuffer. Buffers with data on them should never be discarded until they are written.
+ */
+ Buffer* newBuffer();
+
+ /**
+ * Requests that the specified buffer be written asynchronously.
+ *
+ * This method may block if there are too many outstanding unwritten buffers.
+ *
+ * @param buffer Buffer entry to be written. The buffer object must not be used anymore
+ * after it has been given to this function.
+ * @param commitNumber What commit number to be notified once the buffer has been written
+ * to disk.
+ */
+ void writeBuffer(Buffer* buffer, NotifyAll::When commitNumber);
+
+ /**
+ * Ensures that all previously submitted write requests complete. This call is blocking.
+ */
+ void flush();
+
+private:
+ friend class BufferGuard;
+
+ typedef BlockingQueue<Buffer*> BufferQueue;
+
+ // Start all buffers with 4MB of size
+ enum { InitialBufferSizeBytes = 4 * 1024 * 1024 };
+
+
+ void _journalWriterThread();
+
+
+ // This gets notified as journal buffers are written. It is not owned and needs to outlive
+ // the journal writer object.
+ NotifyAll* const _commitNotify;
+
+ // This gets notified as journal buffers are done being applied to the shared view
+ NotifyAll* const _applyToDataFilesNotify;
+
+ // Wraps and controls the journal writer thread
+ stdx::thread _journalWriterThreadHandle;
+
+ // Indicates that shutdown has been requested. Used for idempotency of the shutdown call.
+ bool _shutdownRequested;
+
+ // Queue of buffers, which need to be written by the journal writer thread
+ BufferQueue _journalQueue;
+ NotifyAll::When _lastCommitNumber;
+
+ // Queue of buffers, whose write has been completed by the journal writer thread.
+ BufferQueue _readyQueue;
+};
-} // namespace dur
-} // namespace mongo
+} // namespace dur
+} // namespace mongo
diff --git a/src/mongo/db/storage/mmap_v1/dur_journalformat.h b/src/mongo/db/storage/mmap_v1/dur_journalformat.h
index 80ea90bd78a..3c31c2686dd 100644
--- a/src/mongo/db/storage/mmap_v1/dur_journalformat.h
+++ b/src/mongo/db/storage/mmap_v1/dur_journalformat.h
@@ -37,155 +37,181 @@
namespace mongo {
- namespace dur {
+namespace dur {
- const unsigned Alignment = 8192;
+const unsigned Alignment = 8192;
#pragma pack(1)
- /** beginning header for a journal/j._<n> file
- there is nothing important int this header at this time. except perhaps version #.
- */
- struct JHeader {
- JHeader() { }
- JHeader(std::string fname);
+/** beginning header for a journal/j._<n> file
+ there is nothing important int this header at this time. except perhaps version #.
+*/
+struct JHeader {
+ JHeader() {}
+ JHeader(std::string fname);
- char magic[2]; // "j\n". j means journal, then a linefeed, fwiw if you were to run "less" on the file or something...
+ char magic
+ [2]; // "j\n". j means journal, then a linefeed, fwiw if you were to run "less" on the file or something...
- // x4142 is asci--readable if you look at the file with head/less -- thus the starting values were near
- // that. simply incrementing the version # is safe on a fwd basis.
+// x4142 is asci--readable if you look at the file with head/less -- thus the starting values were near
+// that. simply incrementing the version # is safe on a fwd basis.
#if defined(_NOCOMPRESS)
- enum { CurrentVersion = 0x4148 };
+ enum { CurrentVersion = 0x4148 };
#else
- enum { CurrentVersion = 0x4149 };
+ enum { CurrentVersion = 0x4149 };
#endif
- unsigned short _version;
-
- // these are just for diagnostic ease (make header more useful as plain text)
- char n1; // '\n'
- char ts[20]; // ascii timestamp of file generation. for user reading, not used by code.
- char n2; // '\n'
- char dbpath[128]; // path/filename of this file for human reading and diagnostics. not used by code.
- char n3, n4; // '\n', '\n'
-
- unsigned long long fileId; // unique identifier that will be in each JSectHeader. important as we recycle prealloced files
-
- char reserved3[8026]; // 8KB total for the file header
- char txt2[2]; // "\n\n" at the end
-
- bool versionOk() const { return _version == CurrentVersion; }
- bool valid() const { return magic[0] == 'j' && txt2[1] == '\n' && fileId; }
- };
-
- /** "Section" header. A section corresponds to a group commit.
- len is length of the entire section including header and footer.
- header and footer are not compressed, just the stuff in between.
- */
- struct JSectHeader {
- private:
- unsigned _sectionLen; // unpadded length in bytes of the whole section
- public:
- unsigned long long seqNumber; // sequence number that can be used on recovery to not do too much work
- unsigned long long fileId; // matches JHeader::fileId
- unsigned sectionLen() const { return _sectionLen; }
-
- // we store the unpadded length so we can use that when we uncompress. to
- // get the true total size this must be rounded up to the Alignment.
- void setSectionLen(unsigned lenUnpadded) { _sectionLen = lenUnpadded; }
-
- unsigned sectionLenWithPadding() const {
- unsigned x = (sectionLen() + (Alignment-1)) & (~(Alignment-1));
- dassert( x % Alignment == 0 );
- return x;
- }
- };
-
- /** an individual write operation within a group commit section. Either the entire section should
- be applied, or nothing. (We check the md5 for the whole section before doing anything on recovery.)
- */
- struct JEntry {
- enum OpCodes {
- OpCode_Footer = 0xffffffff,
- OpCode_DbContext = 0xfffffffe,
- OpCode_FileCreated = 0xfffffffd,
- OpCode_DropDb = 0xfffffffc,
- OpCode_Min = 0xfffff000
- };
- union {
- unsigned len; // length in bytes of the data of the JEntry. does not include the JEntry header
- OpCodes opcode;
- };
-
- unsigned ofs; // offset in file
-
- // sentinel and masks for _fileNo
- enum {
- DotNsSuffix = 0x7fffffff, // ".ns" file
- LocalDbBit = 0x80000000 // assuming "local" db instead of using the JDbContext
- };
- int _fileNo; // high bit is set to indicate it should be the <dbpath>/local database
- // char data[len] follows
-
- const char * srcData() const {
- const int *i = &_fileNo;
- return (const char *) (i+1);
- }
-
- int getFileNo() const { return _fileNo & (~LocalDbBit); }
- void setFileNo(int f) { _fileNo = f; }
- bool isNsSuffix() const { return getFileNo() == DotNsSuffix; }
-
- void setLocalDbContextBit() { _fileNo |= LocalDbBit; }
- bool isLocalDbContext() const { return _fileNo & LocalDbBit; }
- void clearLocalDbContextBit() { _fileNo = getFileNo(); }
-
- static std::string suffix(int fileno) {
- if( fileno == DotNsSuffix ) return "ns";
- std::stringstream ss;
- ss << fileno;
- return ss.str();
- }
- };
-
- /** group commit section footer. md5 is a key field. */
- struct JSectFooter {
- JSectFooter();
- JSectFooter(const void* begin, int len); // needs buffer to compute hash
- unsigned sentinel;
- unsigned char hash[16];
- unsigned long long reserved;
- char magic[4]; // "\n\n\n\n"
-
- /** used by recovery to see if buffer is valid
- @param begin the buffer
- @param len buffer len
- @return true if buffer looks valid
- */
- bool checkHash(const void* begin, int len) const;
-
- bool magicOk() const { return *((unsigned*)magic) == 0x0a0a0a0a; }
- };
-
- /** declares "the next entry(s) are for this database / file path prefix" */
- struct JDbContext {
- JDbContext() : sentinel(JEntry::OpCode_DbContext) { }
- const unsigned sentinel; // compare to JEntry::len -- zero is our sentinel
- //char dbname[];
- };
-
- /** "last sequence number" */
- struct LSNFile {
- unsigned ver;
- unsigned reserved2;
- unsigned long long lsn;
- unsigned long long checkbytes;
- unsigned long long reserved[8];
-
- void set(unsigned long long lsn);
- unsigned long long get();
- };
+ unsigned short _version;
-#pragma pack()
+ // these are just for diagnostic ease (make header more useful as plain text)
+ char n1; // '\n'
+ char ts[20]; // ascii timestamp of file generation. for user reading, not used by code.
+ char n2; // '\n'
+ char dbpath
+ [128]; // path/filename of this file for human reading and diagnostics. not used by code.
+ char n3, n4; // '\n', '\n'
+
+ unsigned long long
+ fileId; // unique identifier that will be in each JSectHeader. important as we recycle prealloced files
+
+ char reserved3[8026]; // 8KB total for the file header
+ char txt2[2]; // "\n\n" at the end
+
+ bool versionOk() const {
+ return _version == CurrentVersion;
+ }
+ bool valid() const {
+ return magic[0] == 'j' && txt2[1] == '\n' && fileId;
+ }
+};
+
+/** "Section" header. A section corresponds to a group commit.
+ len is length of the entire section including header and footer.
+ header and footer are not compressed, just the stuff in between.
+*/
+struct JSectHeader {
+private:
+ unsigned _sectionLen; // unpadded length in bytes of the whole section
+public:
+ unsigned long long
+ seqNumber; // sequence number that can be used on recovery to not do too much work
+ unsigned long long fileId; // matches JHeader::fileId
+ unsigned sectionLen() const {
+ return _sectionLen;
+ }
+
+ // we store the unpadded length so we can use that when we uncompress. to
+ // get the true total size this must be rounded up to the Alignment.
+ void setSectionLen(unsigned lenUnpadded) {
+ _sectionLen = lenUnpadded;
+ }
+
+ unsigned sectionLenWithPadding() const {
+ unsigned x = (sectionLen() + (Alignment - 1)) & (~(Alignment - 1));
+ dassert(x % Alignment == 0);
+ return x;
+ }
+};
+/** an individual write operation within a group commit section. Either the entire section should
+ be applied, or nothing. (We check the md5 for the whole section before doing anything on recovery.)
+*/
+struct JEntry {
+ enum OpCodes {
+ OpCode_Footer = 0xffffffff,
+ OpCode_DbContext = 0xfffffffe,
+ OpCode_FileCreated = 0xfffffffd,
+ OpCode_DropDb = 0xfffffffc,
+ OpCode_Min = 0xfffff000
+ };
+ union {
+ unsigned
+ len; // length in bytes of the data of the JEntry. does not include the JEntry header
+ OpCodes opcode;
+ };
+
+ unsigned ofs; // offset in file
+
+ // sentinel and masks for _fileNo
+ enum {
+ DotNsSuffix = 0x7fffffff, // ".ns" file
+ LocalDbBit = 0x80000000 // assuming "local" db instead of using the JDbContext
+ };
+ int _fileNo; // high bit is set to indicate it should be the <dbpath>/local database
+ // char data[len] follows
+
+ const char* srcData() const {
+ const int* i = &_fileNo;
+ return (const char*)(i + 1);
+ }
+
+ int getFileNo() const {
+ return _fileNo & (~LocalDbBit);
+ }
+ void setFileNo(int f) {
+ _fileNo = f;
+ }
+ bool isNsSuffix() const {
+ return getFileNo() == DotNsSuffix;
+ }
+
+ void setLocalDbContextBit() {
+ _fileNo |= LocalDbBit;
+ }
+ bool isLocalDbContext() const {
+ return _fileNo & LocalDbBit;
+ }
+ void clearLocalDbContextBit() {
+ _fileNo = getFileNo();
}
+ static std::string suffix(int fileno) {
+ if (fileno == DotNsSuffix)
+ return "ns";
+ std::stringstream ss;
+ ss << fileno;
+ return ss.str();
+ }
+};
+
+/** group commit section footer. md5 is a key field. */
+struct JSectFooter {
+ JSectFooter();
+ JSectFooter(const void* begin, int len); // needs buffer to compute hash
+ unsigned sentinel;
+ unsigned char hash[16];
+ unsigned long long reserved;
+ char magic[4]; // "\n\n\n\n"
+
+ /** used by recovery to see if buffer is valid
+ @param begin the buffer
+ @param len buffer len
+ @return true if buffer looks valid
+ */
+ bool checkHash(const void* begin, int len) const;
+
+ bool magicOk() const {
+ return *((unsigned*)magic) == 0x0a0a0a0a;
+ }
+};
+
+/** declares "the next entry(s) are for this database / file path prefix" */
+struct JDbContext {
+ JDbContext() : sentinel(JEntry::OpCode_DbContext) {}
+ const unsigned sentinel; // compare to JEntry::len -- zero is our sentinel
+ // char dbname[];
+};
+
+/** "last sequence number" */
+struct LSNFile {
+ unsigned ver;
+ unsigned reserved2;
+ unsigned long long lsn;
+ unsigned long long checkbytes;
+ unsigned long long reserved[8];
+
+ void set(unsigned long long lsn);
+ unsigned long long get();
+};
+
+#pragma pack()
+}
}
diff --git a/src/mongo/db/storage/mmap_v1/dur_journalimpl.h b/src/mongo/db/storage/mmap_v1/dur_journalimpl.h
index 365f38aec71..86a2d19de97 100644
--- a/src/mongo/db/storage/mmap_v1/dur_journalimpl.h
+++ b/src/mongo/db/storage/mmap_v1/dur_journalimpl.h
@@ -34,80 +34,84 @@
#include "mongo/db/storage/mmap_v1/logfile.h"
namespace mongo {
- namespace dur {
+namespace dur {
- /** the writeahead journal for durability */
- class Journal {
- public:
- std::string dir; // set by journalMakeDir() during initialization
+/** the writeahead journal for durability */
+class Journal {
+public:
+ std::string dir; // set by journalMakeDir() during initialization
- Journal();
+ Journal();
- /** call during startup by journalMakeDir() */
- void init();
+ /** call during startup by journalMakeDir() */
+ void init();
- /** check if time to rotate files. assure a file is open.
- done separately from the journal() call as we can do this part
- outside of lock.
- thread: durThread()
- */
- void rotate();
+ /** check if time to rotate files. assure a file is open.
+ done separately from the journal() call as we can do this part
+ outside of lock.
+ thread: durThread()
+ */
+ void rotate();
- /** append to the journal file
- */
- void journal(const JSectHeader& h, const AlignedBuilder& b);
+ /** append to the journal file
+ */
+ void journal(const JSectHeader& h, const AlignedBuilder& b);
- boost::filesystem::path getFilePathFor(int filenumber) const;
+ boost::filesystem::path getFilePathFor(int filenumber) const;
- unsigned long long lastFlushTime() const { return _lastFlushTime; }
- void cleanup(bool log); // closes and removes journal files
-
- unsigned long long curFileId() const { return _curFileId; }
-
- void assureLogFileOpen() {
- stdx::lock_guard<SimpleMutex> lk(_curLogFileMutex);
- if( _curLogFile == 0 )
- _open();
- }
-
- /** open a journal file to journal operations to. */
- void open();
-
- private:
- /** check if time to rotate files. assure a file is open.
- * internally called with every commit
- */
- void _rotate();
-
- void _open();
- void closeCurrentJournalFile();
- void removeUnneededJournalFiles();
-
- unsigned long long _written; // bytes written so far to the current journal (log) file
- unsigned _nextFileNumber;
-
- SimpleMutex _curLogFileMutex;
-
- LogFile *_curLogFile; // use _curLogFileMutex
- unsigned long long _curFileId; // current file id see JHeader::fileId
-
- struct JFile {
- std::string filename;
- unsigned long long lastEventTimeMs;
- };
-
- // files which have been closed but not unlinked (rotated out) yet
- // ordered oldest to newest
- std::list<JFile> _oldJournalFiles; // use _curLogFileMutex
+ unsigned long long lastFlushTime() const {
+ return _lastFlushTime;
+ }
+ void cleanup(bool log); // closes and removes journal files
- // lsn related
- static void preFlush();
- static void postFlush();
- unsigned long long _preFlushTime;
- unsigned long long _lastFlushTime; // data < this time is fsynced in the datafiles (unless hard drive controller is caching)
- bool _writeToLSNNeeded;
- void updateLSNFile();
- };
+ unsigned long long curFileId() const {
+ return _curFileId;
+ }
+ void assureLogFileOpen() {
+ stdx::lock_guard<SimpleMutex> lk(_curLogFileMutex);
+ if (_curLogFile == 0)
+ _open();
}
+
+ /** open a journal file to journal operations to. */
+ void open();
+
+private:
+ /** check if time to rotate files. assure a file is open.
+ * internally called with every commit
+ */
+ void _rotate();
+
+ void _open();
+ void closeCurrentJournalFile();
+ void removeUnneededJournalFiles();
+
+ unsigned long long _written; // bytes written so far to the current journal (log) file
+ unsigned _nextFileNumber;
+
+ SimpleMutex _curLogFileMutex;
+
+ LogFile* _curLogFile; // use _curLogFileMutex
+ unsigned long long _curFileId; // current file id see JHeader::fileId
+
+ struct JFile {
+ std::string filename;
+ unsigned long long lastEventTimeMs;
+ };
+
+ // files which have been closed but not unlinked (rotated out) yet
+ // ordered oldest to newest
+ std::list<JFile> _oldJournalFiles; // use _curLogFileMutex
+
+ // lsn related
+ static void preFlush();
+ static void postFlush();
+ unsigned long long _preFlushTime;
+ unsigned long long
+ _lastFlushTime; // data < this time is fsynced in the datafiles (unless hard drive controller is caching)
+ bool _writeToLSNNeeded;
+ void updateLSNFile();
+};
+}
}
diff --git a/src/mongo/db/storage/mmap_v1/dur_preplogbuffer.cpp b/src/mongo/db/storage/mmap_v1/dur_preplogbuffer.cpp
index 171254eb946..dc9d7fb2b7a 100644
--- a/src/mongo/db/storage/mmap_v1/dur_preplogbuffer.cpp
+++ b/src/mongo/db/storage/mmap_v1/dur_preplogbuffer.cpp
@@ -53,152 +53,147 @@
namespace mongo {
- using std::endl;
- using std::min;
- using std::stringstream;
+using std::endl;
+using std::min;
+using std::stringstream;
- namespace dur {
+namespace dur {
- extern Journal j;
- extern CommitJob commitJob;
+extern Journal j;
+extern CommitJob commitJob;
- const RelativePath local = RelativePath::fromRelativePath("local");
+const RelativePath local = RelativePath::fromRelativePath("local");
- static DurableMappedFile* findMMF_inlock(void *ptr, size_t &ofs) {
- DurableMappedFile *f = privateViews.find_inlock(ptr, ofs);
- if( f == 0 ) {
- error() << "findMMF_inlock failed " << privateViews.numberOfViews_inlock() << endl;
- printStackTrace(); // we want a stack trace and the assert below didn't print a trace once in the real world - not sure why
- stringstream ss;
- ss << "view pointer cannot be resolved " << std::hex << (size_t) ptr;
- journalingFailure(ss.str().c_str()); // asserts, which then abends
- }
- return f;
- }
-
- /** put the basic write operation into the buffer (bb) to be journaled */
- static void prepBasicWrite_inlock(AlignedBuilder&bb, const WriteIntent *i, RelativePath& lastDbPath) {
- size_t ofs = 1;
- DurableMappedFile *mmf = findMMF_inlock(i->start(), /*out*/ofs);
+static DurableMappedFile* findMMF_inlock(void* ptr, size_t& ofs) {
+ DurableMappedFile* f = privateViews.find_inlock(ptr, ofs);
+ if (f == 0) {
+ error() << "findMMF_inlock failed " << privateViews.numberOfViews_inlock() << endl;
+ printStackTrace(); // we want a stack trace and the assert below didn't print a trace once in the real world - not sure why
+ stringstream ss;
+ ss << "view pointer cannot be resolved " << std::hex << (size_t)ptr;
+ journalingFailure(ss.str().c_str()); // asserts, which then abends
+ }
+ return f;
+}
- if( MONGO_unlikely(!mmf->willNeedRemap()) ) {
- // tag this mmf as needed a remap of its private view later.
- // usually it will already be dirty/already set, so we do the if above first
- // to avoid possibility of cpu cache line contention
- mmf->setWillNeedRemap();
- }
+/** put the basic write operation into the buffer (bb) to be journaled */
+static void prepBasicWrite_inlock(AlignedBuilder& bb,
+ const WriteIntent* i,
+ RelativePath& lastDbPath) {
+ size_t ofs = 1;
+ DurableMappedFile* mmf = findMMF_inlock(i->start(), /*out*/ ofs);
+
+ if (MONGO_unlikely(!mmf->willNeedRemap())) {
+ // tag this mmf as needed a remap of its private view later.
+ // usually it will already be dirty/already set, so we do the if above first
+ // to avoid possibility of cpu cache line contention
+ mmf->setWillNeedRemap();
+ }
- // since we have already looked up the mmf, we go ahead and remember the write view location
- // so we don't have to find the DurableMappedFile again later in WRITETODATAFILES()
- //
- // this was for WRITETODATAFILES_Impl2 so commented out now
- //
- /*
- dassert( i->w_ptr == 0 );
- i->w_ptr = ((char*)mmf->view_write()) + ofs;
- */
-
- JEntry e;
- e.len = min(i->length(), (unsigned)(mmf->length() - ofs)); //don't write past end of file
- verify( ofs <= 0x80000000 );
- e.ofs = (unsigned) ofs;
- e.setFileNo( mmf->fileSuffixNo() );
-
- if( mmf->relativePath() == local ) {
- e.setLocalDbContextBit();
- }
- else if( mmf->relativePath() != lastDbPath ) {
- lastDbPath = mmf->relativePath();
- JDbContext c;
- bb.appendStruct(c);
- bb.appendStr(lastDbPath.toString());
- }
+ // since we have already looked up the mmf, we go ahead and remember the write view location
+ // so we don't have to find the DurableMappedFile again later in WRITETODATAFILES()
+ //
+ // this was for WRITETODATAFILES_Impl2 so commented out now
+ //
+ /*
+ dassert( i->w_ptr == 0 );
+ i->w_ptr = ((char*)mmf->view_write()) + ofs;
+ */
+
+ JEntry e;
+ e.len = min(i->length(), (unsigned)(mmf->length() - ofs)); // don't write past end of file
+ verify(ofs <= 0x80000000);
+ e.ofs = (unsigned)ofs;
+ e.setFileNo(mmf->fileSuffixNo());
+
+ if (mmf->relativePath() == local) {
+ e.setLocalDbContextBit();
+ } else if (mmf->relativePath() != lastDbPath) {
+ lastDbPath = mmf->relativePath();
+ JDbContext c;
+ bb.appendStruct(c);
+ bb.appendStr(lastDbPath.toString());
+ }
- bb.appendStruct(e);
- bb.appendBuf(i->start(), e.len);
+ bb.appendStruct(e);
+ bb.appendBuf(i->start(), e.len);
- if (MONGO_unlikely(e.len != (unsigned)i->length())) {
- log() << "journal info splitting prepBasicWrite at boundary" << endl;
+ if (MONGO_unlikely(e.len != (unsigned)i->length())) {
+ log() << "journal info splitting prepBasicWrite at boundary" << endl;
- // This only happens if we write to the last byte in a file and
- // the fist byte in another file that is mapped adjacently. I
- // think most OSs leave at least a one page gap between
- // mappings, but better to be safe.
+ // This only happens if we write to the last byte in a file and
+ // the fist byte in another file that is mapped adjacently. I
+ // think most OSs leave at least a one page gap between
+ // mappings, but better to be safe.
- WriteIntent next ((char*)i->start() + e.len, i->length() - e.len);
- prepBasicWrite_inlock(bb, &next, lastDbPath);
- }
- }
+ WriteIntent next((char*)i->start() + e.len, i->length() - e.len);
+ prepBasicWrite_inlock(bb, &next, lastDbPath);
+ }
+}
- /** basic write ops / write intents. note there is no particular order to these : if we have
- two writes to the same location during the group commit interval, it is likely
- (although not assured) that it is journaled here once.
- */
- static void prepBasicWrites(AlignedBuilder& bb, const std::vector<WriteIntent>& intents) {
- stdx::lock_guard<stdx::mutex> lk(privateViews._mutex());
-
- // Each time write intents switch to a different database we journal a JDbContext.
- // Switches will be rare as we sort by memory location first and we batch commit.
- RelativePath lastDbPath;
-
- invariant(!intents.empty());
-
- WriteIntent last;
- for (std::vector<WriteIntent>::const_iterator i = intents.begin();
- i != intents.end();
- i++) {
-
- if( i->start() < last.end() ) {
- // overlaps
- last.absorb(*i);
- }
- else {
- // discontinuous
- if (i != intents.begin()) {
- prepBasicWrite_inlock(bb, &last, lastDbPath);
- }
-
- last = *i;
- }
+/** basic write ops / write intents. note there is no particular order to these : if we have
+ two writes to the same location during the group commit interval, it is likely
+ (although not assured) that it is journaled here once.
+*/
+static void prepBasicWrites(AlignedBuilder& bb, const std::vector<WriteIntent>& intents) {
+ stdx::lock_guard<stdx::mutex> lk(privateViews._mutex());
+
+ // Each time write intents switch to a different database we journal a JDbContext.
+ // Switches will be rare as we sort by memory location first and we batch commit.
+ RelativePath lastDbPath;
+
+ invariant(!intents.empty());
+
+ WriteIntent last;
+ for (std::vector<WriteIntent>::const_iterator i = intents.begin(); i != intents.end(); i++) {
+ if (i->start() < last.end()) {
+ // overlaps
+ last.absorb(*i);
+ } else {
+ // discontinuous
+ if (i != intents.begin()) {
+ prepBasicWrite_inlock(bb, &last, lastDbPath);
}
- prepBasicWrite_inlock(bb, &last, lastDbPath);
+ last = *i;
}
+ }
- /** we will build an output buffer ourself and then use O_DIRECT
- we could be in read lock for this
- caller handles locking
- @return partially populated sectheader and _ab set
- */
- static void _PREPLOGBUFFER(JSectHeader& h, AlignedBuilder& bb) {
- // Add the JSectHeader
-
- // Invalidate the total length, we will fill it in later.
- h.setSectionLen(0xffffffff);
- h.seqNumber = getLastDataFileFlushTime();
- h.fileId = j.curFileId();
-
- // Ops other than basic writes (DurOp's) go first
- const std::vector<std::shared_ptr<DurOp> >& durOps = commitJob.ops();
- for (std::vector<std::shared_ptr<DurOp> >::const_iterator i = durOps.begin();
- i != durOps.end();
- i++) {
-
- (*i)->serialize(bb);
- }
+ prepBasicWrite_inlock(bb, &last, lastDbPath);
+}
- // Write intents
- const std::vector<WriteIntent>& intents = commitJob.getIntentsSorted();
- if (!intents.empty()) {
- prepBasicWrites(bb, intents);
- }
- }
+/** we will build an output buffer ourself and then use O_DIRECT
+ we could be in read lock for this
+ caller handles locking
+ @return partially populated sectheader and _ab set
+*/
+static void _PREPLOGBUFFER(JSectHeader& h, AlignedBuilder& bb) {
+ // Add the JSectHeader
+
+ // Invalidate the total length, we will fill it in later.
+ h.setSectionLen(0xffffffff);
+ h.seqNumber = getLastDataFileFlushTime();
+ h.fileId = j.curFileId();
+
+ // Ops other than basic writes (DurOp's) go first
+ const std::vector<std::shared_ptr<DurOp>>& durOps = commitJob.ops();
+ for (std::vector<std::shared_ptr<DurOp>>::const_iterator i = durOps.begin(); i != durOps.end();
+ i++) {
+ (*i)->serialize(bb);
+ }
- void PREPLOGBUFFER(/*out*/ JSectHeader& outHeader, AlignedBuilder& outBuffer) {
- Timer t;
- j.assureLogFileOpen(); // so fileId is set
- _PREPLOGBUFFER(outHeader, outBuffer);
- stats.curr()->_prepLogBufferMicros += t.micros();
- }
+ // Write intents
+ const std::vector<WriteIntent>& intents = commitJob.getIntentsSorted();
+ if (!intents.empty()) {
+ prepBasicWrites(bb, intents);
}
}
+
+void PREPLOGBUFFER(/*out*/ JSectHeader& outHeader, AlignedBuilder& outBuffer) {
+ Timer t;
+ j.assureLogFileOpen(); // so fileId is set
+ _PREPLOGBUFFER(outHeader, outBuffer);
+ stats.curr()->_prepLogBufferMicros += t.micros();
+}
+}
+}
diff --git a/src/mongo/db/storage/mmap_v1/dur_recover.cpp b/src/mongo/db/storage/mmap_v1/dur_recover.cpp
index bfd023affab..a6958ad1aec 100644
--- a/src/mongo/db/storage/mmap_v1/dur_recover.cpp
+++ b/src/mongo/db/storage/mmap_v1/dur_recover.cpp
@@ -58,571 +58,564 @@
namespace mongo {
- using std::shared_ptr;
- using std::unique_ptr;
- using std::endl;
- using std::hex;
- using std::map;
- using std::pair;
- using std::setw;
- using std::string;
- using std::stringstream;
- using std::vector;
-
- /**
- * Thrown when a journal section is corrupt. This is considered OK as long as it occurs while
- * processing the last file. Processing stops at the first corrupt section.
- *
- * Any logging about the nature of the corruption should happen before throwing as this class
- * contains no data.
- */
- class JournalSectionCorruptException {};
+using std::shared_ptr;
+using std::unique_ptr;
+using std::endl;
+using std::hex;
+using std::map;
+using std::pair;
+using std::setw;
+using std::string;
+using std::stringstream;
+using std::vector;
- namespace dur {
+/**
+ * Thrown when a journal section is corrupt. This is considered OK as long as it occurs while
+ * processing the last file. Processing stops at the first corrupt section.
+ *
+ * Any logging about the nature of the corruption should happen before throwing as this class
+ * contains no data.
+ */
+class JournalSectionCorruptException {};
- // The singleton recovery job object
- RecoveryJob& RecoveryJob::_instance = *(new RecoveryJob());
+namespace dur {
+// The singleton recovery job object
+RecoveryJob& RecoveryJob::_instance = *(new RecoveryJob());
- void removeJournalFiles();
- boost::filesystem::path getJournalDir();
+void removeJournalFiles();
+boost::filesystem::path getJournalDir();
- struct ParsedJournalEntry { /*copyable*/
- ParsedJournalEntry() : e(0) { }
- // relative path of database for the operation.
- // might be a pointer into mmaped Journal file
- const char *dbName;
+struct ParsedJournalEntry {/*copyable*/
+ ParsedJournalEntry() : e(0) {}
- // those are pointers into the memory mapped journal file
- const JEntry *e; // local db sentinel is already parsed out here into dbName
+ // relative path of database for the operation.
+ // might be a pointer into mmaped Journal file
+ const char* dbName;
- // if not one of the two simple JEntry's above, this is the operation:
- std::shared_ptr<DurOp> op;
- };
+ // those are pointers into the memory mapped journal file
+ const JEntry* e; // local db sentinel is already parsed out here into dbName
+ // if not one of the two simple JEntry's above, this is the operation:
+ std::shared_ptr<DurOp> op;
+};
- /**
- * Get journal filenames, in order. Throws if unexpected content found.
- */
- static void getFiles(boost::filesystem::path dir, vector<boost::filesystem::path>& files) {
- map<unsigned,boost::filesystem::path> m;
- for ( boost::filesystem::directory_iterator i( dir );
- i != boost::filesystem::directory_iterator();
- ++i ) {
- boost::filesystem::path filepath = *i;
- string fileName = boost::filesystem::path(*i).leaf().string();
- if( str::startsWith(fileName, "j._") ) {
- unsigned u = str::toUnsigned( str::after(fileName, '_') );
- if( m.count(u) ) {
- uasserted(13531, str::stream() << "unexpected files in journal directory " << dir.string() << " : " << fileName);
- }
- m.insert( pair<unsigned,boost::filesystem::path>(u,filepath) );
- }
- }
- for( map<unsigned,boost::filesystem::path>::iterator i = m.begin(); i != m.end(); ++i ) {
- if( i != m.begin() && m.count(i->first - 1) == 0 ) {
- uasserted(13532,
- str::stream() << "unexpected file in journal directory " << dir.string()
- << " : " << boost::filesystem::path(i->second).leaf().string() << " : can't find its preceding file");
- }
- files.push_back(i->second);
+
+/**
+ * Get journal filenames, in order. Throws if unexpected content found.
+ */
+static void getFiles(boost::filesystem::path dir, vector<boost::filesystem::path>& files) {
+ map<unsigned, boost::filesystem::path> m;
+ for (boost::filesystem::directory_iterator i(dir); i != boost::filesystem::directory_iterator();
+ ++i) {
+ boost::filesystem::path filepath = *i;
+ string fileName = boost::filesystem::path(*i).leaf().string();
+ if (str::startsWith(fileName, "j._")) {
+ unsigned u = str::toUnsigned(str::after(fileName, '_'));
+ if (m.count(u)) {
+ uasserted(13531,
+ str::stream() << "unexpected files in journal directory " << dir.string()
+ << " : " << fileName);
}
+ m.insert(pair<unsigned, boost::filesystem::path>(u, filepath));
}
+ }
+ for (map<unsigned, boost::filesystem::path>::iterator i = m.begin(); i != m.end(); ++i) {
+ if (i != m.begin() && m.count(i->first - 1) == 0) {
+ uasserted(13532,
+ str::stream() << "unexpected file in journal directory " << dir.string()
+ << " : " << boost::filesystem::path(i->second).leaf().string()
+ << " : can't find its preceding file");
+ }
+ files.push_back(i->second);
+ }
+}
- /** read through the memory mapped data of a journal file (journal/j._<n> file)
- throws
- */
- class JournalSectionIterator {
- MONGO_DISALLOW_COPYING(JournalSectionIterator);
- public:
- JournalSectionIterator(const JSectHeader& h,
- const void *compressed,
- unsigned compressedLen,
- bool doDurOpsRecovering)
- : _h(h),
- _lastDbName(0),
- _doDurOps(doDurOpsRecovering) {
-
- verify(doDurOpsRecovering);
-
- if (!uncompress((const char *)compressed, compressedLen, &_uncompressed)) {
- // We check the checksum before we uncompress, but this may still fail as the
- // checksum isn't foolproof.
- log() << "couldn't uncompress journal section" << endl;
- throw JournalSectionCorruptException();
- }
-
- const char *p = _uncompressed.c_str();
- verify(compressedLen == _h.sectionLen() - sizeof(JSectFooter) - sizeof(JSectHeader));
-
- _entries = unique_ptr<BufReader>(new BufReader(p, _uncompressed.size()));
- }
+/** read through the memory mapped data of a journal file (journal/j._<n> file)
+ throws
+*/
+class JournalSectionIterator {
+ MONGO_DISALLOW_COPYING(JournalSectionIterator);
+
+public:
+ JournalSectionIterator(const JSectHeader& h,
+ const void* compressed,
+ unsigned compressedLen,
+ bool doDurOpsRecovering)
+ : _h(h), _lastDbName(0), _doDurOps(doDurOpsRecovering) {
+ verify(doDurOpsRecovering);
+
+ if (!uncompress((const char*)compressed, compressedLen, &_uncompressed)) {
+ // We check the checksum before we uncompress, but this may still fail as the
+ // checksum isn't foolproof.
+ log() << "couldn't uncompress journal section" << endl;
+ throw JournalSectionCorruptException();
+ }
- // We work with the uncompressed buffer when doing a WRITETODATAFILES (for speed)
- JournalSectionIterator(const JSectHeader &h, const void *p, unsigned len)
- : _entries(new BufReader((const char *)p, len)),
- _h(h),
- _lastDbName(0),
- _doDurOps(false) {
+ const char* p = _uncompressed.c_str();
+ verify(compressedLen == _h.sectionLen() - sizeof(JSectFooter) - sizeof(JSectHeader));
- }
+ _entries = unique_ptr<BufReader>(new BufReader(p, _uncompressed.size()));
+ }
- bool atEof() const { return _entries->atEof(); }
+ // We work with the uncompressed buffer when doing a WRITETODATAFILES (for speed)
+ JournalSectionIterator(const JSectHeader& h, const void* p, unsigned len)
+ : _entries(new BufReader((const char*)p, len)), _h(h), _lastDbName(0), _doDurOps(false) {}
- unsigned long long seqNumber() const { return _h.seqNumber; }
+ bool atEof() const {
+ return _entries->atEof();
+ }
- /** get the next entry from the log. this function parses and combines JDbContext and JEntry's.
- * throws on premature end of section.
- */
- void next(ParsedJournalEntry& e) {
- unsigned lenOrOpCode;
- _entries->read(lenOrOpCode);
+ unsigned long long seqNumber() const {
+ return _h.seqNumber;
+ }
- if (lenOrOpCode > JEntry::OpCode_Min) {
- switch( lenOrOpCode ) {
+ /** get the next entry from the log. this function parses and combines JDbContext and JEntry's.
+ * throws on premature end of section.
+ */
+ void next(ParsedJournalEntry& e) {
+ unsigned lenOrOpCode;
+ _entries->read(lenOrOpCode);
- case JEntry::OpCode_Footer: {
- verify( false );
- }
+ if (lenOrOpCode > JEntry::OpCode_Min) {
+ switch (lenOrOpCode) {
+ case JEntry::OpCode_Footer: {
+ verify(false);
+ }
- case JEntry::OpCode_FileCreated:
- case JEntry::OpCode_DropDb: {
- e.dbName = 0;
- std::shared_ptr<DurOp> op = DurOp::read(lenOrOpCode, *_entries);
- if (_doDurOps) {
- e.op = op;
- }
- return;
+ case JEntry::OpCode_FileCreated:
+ case JEntry::OpCode_DropDb: {
+ e.dbName = 0;
+ std::shared_ptr<DurOp> op = DurOp::read(lenOrOpCode, *_entries);
+ if (_doDurOps) {
+ e.op = op;
}
+ return;
+ }
- case JEntry::OpCode_DbContext: {
- _lastDbName = (const char*) _entries->pos();
- const unsigned limit = _entries->remaining();
- const unsigned len = strnlen(_lastDbName, limit);
- if (_lastDbName[len] != '\0') {
- log() << "problem processing journal file during recovery";
- throw JournalSectionCorruptException();
- }
-
- _entries->skip(len+1); // skip '\0' too
- _entries->read(lenOrOpCode); // read this for the fall through
+ case JEntry::OpCode_DbContext: {
+ _lastDbName = (const char*)_entries->pos();
+ const unsigned limit = _entries->remaining();
+ const unsigned len = strnlen(_lastDbName, limit);
+ if (_lastDbName[len] != '\0') {
+ log() << "problem processing journal file during recovery";
+ throw JournalSectionCorruptException();
}
- // fall through as a basic operation always follows jdbcontext, and we don't have anything to return yet
- default:
- // fall through
- ;
- }
+ _entries->skip(len + 1); // skip '\0' too
+ _entries->read(lenOrOpCode); // read this for the fall through
}
+ // fall through as a basic operation always follows jdbcontext, and we don't have anything to return yet
- // JEntry - a basic write
- verify( lenOrOpCode && lenOrOpCode < JEntry::OpCode_Min );
- _entries->rewind(4);
- e.e = (JEntry *) _entries->skip(sizeof(JEntry));
- e.dbName = e.e->isLocalDbContext() ? "local" : _lastDbName;
- verify( e.e->len == lenOrOpCode );
- _entries->skip(e.e->len);
+ default:
+ // fall through
+ ;
}
+ }
-
- private:
- unique_ptr<BufReader> _entries;
- const JSectHeader _h;
- const char *_lastDbName; // pointer into mmaped journal file
- const bool _doDurOps;
- string _uncompressed;
- };
-
-
- static string fileName(const char* dbName, int fileNo) {
+ // JEntry - a basic write
+ verify(lenOrOpCode && lenOrOpCode < JEntry::OpCode_Min);
+ _entries->rewind(4);
+ e.e = (JEntry*)_entries->skip(sizeof(JEntry));
+ e.dbName = e.e->isLocalDbContext() ? "local" : _lastDbName;
+ verify(e.e->len == lenOrOpCode);
+ _entries->skip(e.e->len);
+ }
+
+
+private:
+ unique_ptr<BufReader> _entries;
+ const JSectHeader _h;
+ const char* _lastDbName; // pointer into mmaped journal file
+ const bool _doDurOps;
+ string _uncompressed;
+};
+
+
+static string fileName(const char* dbName, int fileNo) {
+ stringstream ss;
+ ss << dbName << '.';
+ verify(fileNo >= 0);
+ if (fileNo == JEntry::DotNsSuffix)
+ ss << "ns";
+ else
+ ss << fileNo;
+
+ // relative name -> full path name
+ boost::filesystem::path full(storageGlobalParams.dbpath);
+ full /= ss.str();
+ return full.string();
+}
+
+
+RecoveryJob::RecoveryJob()
+ : _recovering(false), _lastDataSyncedFromLastRun(0), _lastSeqMentionedInConsoleLog(1) {}
+
+RecoveryJob::~RecoveryJob() {
+ DESTRUCTOR_GUARD(if (!_mmfs.empty()) {} close();)
+}
+
+void RecoveryJob::close() {
+ stdx::lock_guard<stdx::mutex> lk(_mx);
+ _close();
+}
+
+void RecoveryJob::_close() {
+ MongoFile::flushAll(true);
+ _mmfs.clear();
+}
+
+RecoveryJob::Last::Last() : mmf(NULL), fileNo(-1) {
+ // Make sure the files list does not change from underneath
+ LockMongoFilesShared::assertAtLeastReadLocked();
+}
+
+DurableMappedFile* RecoveryJob::Last::newEntry(const dur::ParsedJournalEntry& entry,
+ RecoveryJob& rj) {
+ int num = entry.e->getFileNo();
+ if (num == fileNo && entry.dbName == dbName)
+ return mmf;
+
+ string fn = fileName(entry.dbName, num);
+ MongoFile* file;
+ {
+ MongoFileFinder finder; // must release lock before creating new DurableMappedFile
+ file = finder.findByPath(fn);
+ }
+
+ if (file) {
+ verify(file->isDurableMappedFile());
+ mmf = (DurableMappedFile*)file;
+ } else {
+ if (!rj._recovering) {
+ log() << "journal error applying writes, file " << fn << " is not open" << endl;
+ verify(false);
+ }
+ std::shared_ptr<DurableMappedFile> sp(new DurableMappedFile);
+ verify(sp->open(fn, false));
+ rj._mmfs.push_back(sp);
+ mmf = sp.get();
+ }
+
+ // we do this last so that if an exception were thrown, there isn't any wrong memory
+ dbName = entry.dbName;
+ fileNo = num;
+ return mmf;
+}
+
+void RecoveryJob::write(Last& last, const ParsedJournalEntry& entry) {
+ // TODO(mathias): look into making some of these dasserts
+ verify(entry.e);
+ verify(entry.dbName);
+
+ DurableMappedFile* mmf = last.newEntry(entry, *this);
+
+ if ((entry.e->ofs + entry.e->len) <= mmf->length()) {
+ verify(mmf->view_write());
+ verify(entry.e->srcData());
+
+ void* dest = (char*)mmf->view_write() + entry.e->ofs;
+ memcpy(dest, entry.e->srcData(), entry.e->len);
+ stats.curr()->_writeToDataFilesBytes += entry.e->len;
+ } else {
+ massert(13622, "Trying to write past end of file in WRITETODATAFILES", _recovering);
+ }
+}
+
+void RecoveryJob::applyEntry(Last& last, const ParsedJournalEntry& entry, bool apply, bool dump) {
+ if (entry.e) {
+ if (dump) {
stringstream ss;
- ss << dbName << '.';
- verify( fileNo >= 0 );
- if( fileNo == JEntry::DotNsSuffix )
+ ss << " BASICWRITE " << setw(20) << entry.dbName << '.';
+ if (entry.e->isNsSuffix())
ss << "ns";
else
- ss << fileNo;
-
- // relative name -> full path name
- boost::filesystem::path full(storageGlobalParams.dbpath);
- full /= ss.str();
- return full.string();
- }
-
-
- RecoveryJob::RecoveryJob()
- : _recovering(false),
- _lastDataSyncedFromLastRun(0),
- _lastSeqMentionedInConsoleLog(1) {
-
- }
-
- RecoveryJob::~RecoveryJob() {
- DESTRUCTOR_GUARD(
- if (!_mmfs.empty()) {}
- close();
- )
+ ss << setw(2) << entry.e->getFileNo();
+ ss << ' ' << setw(6) << entry.e->len << ' '
+ << /*hex << setw(8) << (size_t) fqe.srcData << dec <<*/
+ " " << hexdump(entry.e->srcData(), entry.e->len);
+ log() << ss.str() << endl;
}
-
- void RecoveryJob::close() {
- stdx::lock_guard<stdx::mutex> lk(_mx);
- _close();
+ if (apply) {
+ write(last, entry);
}
-
- void RecoveryJob::_close() {
- MongoFile::flushAll(true);
- _mmfs.clear();
+ } else if (entry.op) {
+ // a DurOp subclass operation
+ if (dump) {
+ log() << " OP " << entry.op->toString() << endl;
}
-
- RecoveryJob::Last::Last() : mmf(NULL), fileNo(-1) {
- // Make sure the files list does not change from underneath
- LockMongoFilesShared::assertAtLeastReadLocked();
- }
-
- DurableMappedFile* RecoveryJob::Last::newEntry(const dur::ParsedJournalEntry& entry, RecoveryJob& rj) {
- int num = entry.e->getFileNo();
- if( num == fileNo && entry.dbName == dbName )
- return mmf;
-
- string fn = fileName(entry.dbName, num);
- MongoFile *file;
- {
- MongoFileFinder finder; // must release lock before creating new DurableMappedFile
- file = finder.findByPath(fn);
- }
-
- if (file) {
- verify(file->isDurableMappedFile());
- mmf = (DurableMappedFile*)file;
- }
- else {
- if( !rj._recovering ) {
- log() << "journal error applying writes, file " << fn << " is not open" << endl;
- verify(false);
- }
- std::shared_ptr<DurableMappedFile> sp (new DurableMappedFile);
- verify(sp->open(fn, false));
- rj._mmfs.push_back(sp);
- mmf = sp.get();
+ if (apply) {
+ if (entry.op->needFilesClosed()) {
+ _close(); // locked in processSection
}
-
- // we do this last so that if an exception were thrown, there isn't any wrong memory
- dbName = entry.dbName;
- fileNo = num;
- return mmf;
+ entry.op->replay();
}
-
- void RecoveryJob::write(Last& last, const ParsedJournalEntry& entry) {
- //TODO(mathias): look into making some of these dasserts
- verify(entry.e);
- verify(entry.dbName);
-
- DurableMappedFile *mmf = last.newEntry(entry, *this);
-
- if ((entry.e->ofs + entry.e->len) <= mmf->length()) {
- verify(mmf->view_write());
- verify(entry.e->srcData());
-
- void* dest = (char*)mmf->view_write() + entry.e->ofs;
- memcpy(dest, entry.e->srcData(), entry.e->len);
- stats.curr()->_writeToDataFilesBytes += entry.e->len;
- }
- else {
- massert(13622, "Trying to write past end of file in WRITETODATAFILES", _recovering);
- }
+ }
+}
+
+void RecoveryJob::applyEntries(const vector<ParsedJournalEntry>& entries) {
+ const bool apply = (mmapv1GlobalOptions.journalOptions & MMAPV1Options::JournalScanOnly) == 0;
+ const bool dump = (mmapv1GlobalOptions.journalOptions & MMAPV1Options::JournalDumpJournal);
+
+ if (dump) {
+ log() << "BEGIN section" << endl;
+ }
+
+ Last last;
+ for (vector<ParsedJournalEntry>::const_iterator i = entries.begin(); i != entries.end(); ++i) {
+ applyEntry(last, *i, apply, dump);
+ }
+
+ if (dump) {
+ log() << "END section" << endl;
+ }
+}
+
+void RecoveryJob::processSection(const JSectHeader* h,
+ const void* p,
+ unsigned len,
+ const JSectFooter* f) {
+ LockMongoFilesShared lkFiles; // for RecoveryJob::Last
+ stdx::lock_guard<stdx::mutex> lk(_mx);
+
+ // Check the footer checksum before doing anything else.
+ if (_recovering) {
+ verify(((const char*)h) + sizeof(JSectHeader) == p);
+ if (!f->checkHash(h, len + sizeof(JSectHeader))) {
+ log() << "journal section checksum doesn't match";
+ throw JournalSectionCorruptException();
}
-
- void RecoveryJob::applyEntry(Last& last, const ParsedJournalEntry& entry, bool apply, bool dump) {
- if( entry.e ) {
- if( dump ) {
- stringstream ss;
- ss << " BASICWRITE " << setw(20) << entry.dbName << '.';
- if( entry.e->isNsSuffix() )
- ss << "ns";
- else
- ss << setw(2) << entry.e->getFileNo();
- ss << ' ' << setw(6) << entry.e->len << ' ' << /*hex << setw(8) << (size_t) fqe.srcData << dec <<*/
- " " << hexdump(entry.e->srcData(), entry.e->len);
- log() << ss.str() << endl;
- }
- if( apply ) {
- write(last, entry);
- }
- }
- else if(entry.op) {
- // a DurOp subclass operation
- if( dump ) {
- log() << " OP " << entry.op->toString() << endl;
- }
- if( apply ) {
- if( entry.op->needFilesClosed() ) {
- _close(); // locked in processSection
- }
- entry.op->replay();
- }
+ }
+
+ if (_recovering && _lastDataSyncedFromLastRun > h->seqNumber + ExtraKeepTimeMs) {
+ if (h->seqNumber != _lastSeqMentionedInConsoleLog) {
+ static int n;
+ if (++n < 10) {
+ log() << "recover skipping application of section seq:" << h->seqNumber
+ << " < lsn:" << _lastDataSyncedFromLastRun << endl;
+ } else if (n == 10) {
+ log() << "recover skipping application of section more..." << endl;
}
+ _lastSeqMentionedInConsoleLog = h->seqNumber;
}
-
- void RecoveryJob::applyEntries(const vector<ParsedJournalEntry> &entries) {
- const bool apply =
- (mmapv1GlobalOptions.journalOptions & MMAPV1Options::JournalScanOnly) == 0;
- const bool dump =
- (mmapv1GlobalOptions.journalOptions & MMAPV1Options::JournalDumpJournal);
-
- if (dump) {
- log() << "BEGIN section" << endl;
- }
-
- Last last;
- for (vector<ParsedJournalEntry>::const_iterator i = entries.begin(); i != entries.end(); ++i) {
- applyEntry(last, *i, apply, dump);
- }
-
- if (dump) {
- log() << "END section" << endl;
- }
- }
-
- void RecoveryJob::processSection(const JSectHeader *h, const void *p, unsigned len, const JSectFooter *f) {
- LockMongoFilesShared lkFiles; // for RecoveryJob::Last
- stdx::lock_guard<stdx::mutex> lk(_mx);
-
- // Check the footer checksum before doing anything else.
- if (_recovering) {
- verify( ((const char *)h) + sizeof(JSectHeader) == p );
- if (!f->checkHash(h, len + sizeof(JSectHeader))) {
- log() << "journal section checksum doesn't match";
- throw JournalSectionCorruptException();
- }
- }
-
- if( _recovering && _lastDataSyncedFromLastRun > h->seqNumber + ExtraKeepTimeMs ) {
- if( h->seqNumber != _lastSeqMentionedInConsoleLog ) {
- static int n;
- if( ++n < 10 ) {
- log() << "recover skipping application of section seq:" << h->seqNumber << " < lsn:" << _lastDataSyncedFromLastRun << endl;
+ return;
+ }
+
+ unique_ptr<JournalSectionIterator> i;
+ if (_recovering) {
+ i = unique_ptr<JournalSectionIterator>(new JournalSectionIterator(*h, p, len, _recovering));
+ } else {
+ i = unique_ptr<JournalSectionIterator>(
+ new JournalSectionIterator(*h, /*after header*/ p, /*w/out header*/ len));
+ }
+
+ // we use a static so that we don't have to reallocate every time through. occasionally we
+ // go back to a small allocation so that if there were a spiky growth it won't stick forever.
+ static vector<ParsedJournalEntry> entries;
+ entries.clear();
+ /** TEMP uncomment
+ RARELY OCCASIONALLY {
+ if( entries.capacity() > 2048 ) {
+ entries.shrink_to_fit();
+ entries.reserve(2048);
}
- else if( n == 10 ) {
- log() << "recover skipping application of section more..." << endl;
- }
- _lastSeqMentionedInConsoleLog = h->seqNumber;
- }
- return;
- }
-
- unique_ptr<JournalSectionIterator> i;
- if( _recovering ) {
- i = unique_ptr<JournalSectionIterator>(new JournalSectionIterator(*h, p, len, _recovering));
- }
- else {
- i = unique_ptr<JournalSectionIterator>(new JournalSectionIterator(*h, /*after header*/p, /*w/out header*/len));
- }
-
- // we use a static so that we don't have to reallocate every time through. occasionally we
- // go back to a small allocation so that if there were a spiky growth it won't stick forever.
- static vector<ParsedJournalEntry> entries;
- entries.clear();
-/** TEMP uncomment
- RARELY OCCASIONALLY {
- if( entries.capacity() > 2048 ) {
- entries.shrink_to_fit();
- entries.reserve(2048);
}
- }
+ */
+
+ // first read all entries to make sure this section is valid
+ ParsedJournalEntry e;
+ while (!i->atEof()) {
+ i->next(e);
+ entries.push_back(e);
+ }
+
+ // got all the entries for one group commit. apply them:
+ applyEntries(entries);
+}
+
+/** apply a specific journal file, that is already mmap'd
+ @param p start of the memory mapped file
+ @return true if this is detected to be the last file (ends abruptly)
*/
-
- // first read all entries to make sure this section is valid
- ParsedJournalEntry e;
- while( !i->atEof() ) {
- i->next(e);
- entries.push_back(e);
+bool RecoveryJob::processFileBuffer(const void* p, unsigned len) {
+ try {
+ unsigned long long fileId;
+ BufReader br(p, len);
+
+ {
+ // read file header
+ JHeader h;
+ br.read(h);
+
+ if (!h.valid()) {
+ log() << "Journal file header invalid. This could indicate corruption, or "
+ << "an unclean shutdown while writing the first section in a journal "
+ << "file.";
+ throw JournalSectionCorruptException();
}
- // got all the entries for one group commit. apply them:
- applyEntries(entries);
- }
-
- /** apply a specific journal file, that is already mmap'd
- @param p start of the memory mapped file
- @return true if this is detected to be the last file (ends abruptly)
- */
- bool RecoveryJob::processFileBuffer(const void *p, unsigned len) {
- try {
- unsigned long long fileId;
- BufReader br(p,len);
-
- {
- // read file header
- JHeader h;
- br.read(h);
-
- if (!h.valid()) {
- log() << "Journal file header invalid. This could indicate corruption, or "
- << "an unclean shutdown while writing the first section in a journal "
- << "file.";
- throw JournalSectionCorruptException();
- }
-
- if( !h.versionOk() ) {
- log() << "journal file version number mismatch got:" << hex << h._version
- << " expected:" << hex << (unsigned) JHeader::CurrentVersion
- << ". if you have just upgraded, recover with old version of mongod, terminate cleanly, then upgrade."
- << endl;
- // Not using JournalSectionCurruptException as we don't want to ignore
- // journal files on upgrade.
- uasserted(13536, str::stream() << "journal version number mismatch " << h._version);
- }
- fileId = h.fileId;
- if (mmapv1GlobalOptions.journalOptions &
- MMAPV1Options::JournalDumpJournal) {
- log() << "JHeader::fileId=" << fileId << endl;
- }
- }
-
- // read sections
- while ( !br.atEof() ) {
- JSectHeader h;
- br.peek(h);
- if( h.fileId != fileId ) {
- if (kDebugBuild || (mmapv1GlobalOptions.journalOptions &
- MMAPV1Options::JournalDumpJournal)) {
- log() << "Ending processFileBuffer at differing fileId want:" << fileId << " got:" << h.fileId << endl;
- log() << " sect len:" << h.sectionLen() << " seqnum:" << h.seqNumber << endl;
- }
- return true;
- }
- unsigned slen = h.sectionLen();
- unsigned dataLen = slen - sizeof(JSectHeader) - sizeof(JSectFooter);
- const char *hdr = (const char *) br.skip(h.sectionLenWithPadding());
- const char *data = hdr + sizeof(JSectHeader);
- const char *footer = data + dataLen;
- processSection((const JSectHeader*) hdr, data, dataLen, (const JSectFooter*) footer);
-
- // ctrl c check
- uassert(ErrorCodes::Interrupted, "interrupted during journal recovery", !inShutdown());
- }
+ if (!h.versionOk()) {
+ log() << "journal file version number mismatch got:" << hex << h._version
+ << " expected:" << hex << (unsigned)JHeader::CurrentVersion
+ << ". if you have just upgraded, recover with old version of mongod, "
+ "terminate cleanly, then upgrade." << endl;
+ // Not using JournalSectionCurruptException as we don't want to ignore
+ // journal files on upgrade.
+ uasserted(13536, str::stream() << "journal version number mismatch " << h._version);
}
- catch (const BufReader::eof&) {
- if (mmapv1GlobalOptions.journalOptions & MMAPV1Options::JournalDumpJournal)
- log() << "ABRUPT END" << endl;
- return true; // abrupt end
+ fileId = h.fileId;
+ if (mmapv1GlobalOptions.journalOptions & MMAPV1Options::JournalDumpJournal) {
+ log() << "JHeader::fileId=" << fileId << endl;
}
- catch (const JournalSectionCorruptException&) {
- if (mmapv1GlobalOptions.journalOptions & MMAPV1Options::JournalDumpJournal)
- log() << "ABRUPT END" << endl;
- return true; // abrupt end
- }
-
- return false; // non-abrupt end
}
- /** apply a specific journal file */
- bool RecoveryJob::processFile(boost::filesystem::path journalfile) {
- log() << "recover " << journalfile.string() << endl;
-
- try {
- if( boost::filesystem::file_size( journalfile.string() ) == 0 ) {
- log() << "recover info " << journalfile.string() << " has zero length" << endl;
- return true;
+ // read sections
+ while (!br.atEof()) {
+ JSectHeader h;
+ br.peek(h);
+ if (h.fileId != fileId) {
+ if (kDebugBuild ||
+ (mmapv1GlobalOptions.journalOptions & MMAPV1Options::JournalDumpJournal)) {
+ log() << "Ending processFileBuffer at differing fileId want:" << fileId
+ << " got:" << h.fileId << endl;
+ log() << " sect len:" << h.sectionLen() << " seqnum:" << h.seqNumber << endl;
}
- } catch(...) {
- // if something weird like a permissions problem keep going so the massert down below can happen (presumably)
- log() << "recover exception checking filesize" << endl;
+ return true;
}
-
- MemoryMappedFile f;
- void *p = f.mapWithOptions(journalfile.string().c_str(), MongoFile::READONLY | MongoFile::SEQUENTIAL);
- massert(13544, str::stream() << "recover error couldn't open " << journalfile.string(), p);
- return processFileBuffer(p, (unsigned) f.length());
+ unsigned slen = h.sectionLen();
+ unsigned dataLen = slen - sizeof(JSectHeader) - sizeof(JSectFooter);
+ const char* hdr = (const char*)br.skip(h.sectionLenWithPadding());
+ const char* data = hdr + sizeof(JSectHeader);
+ const char* footer = data + dataLen;
+ processSection((const JSectHeader*)hdr, data, dataLen, (const JSectFooter*)footer);
+
+ // ctrl c check
+ uassert(ErrorCodes::Interrupted, "interrupted during journal recovery", !inShutdown());
}
-
- /** @param files all the j._0 style files we need to apply for recovery */
- void RecoveryJob::go(vector<boost::filesystem::path>& files) {
- log() << "recover begin" << endl;
- LockMongoFilesExclusive lkFiles; // for RecoveryJob::Last
- _recovering = true;
-
- // load the last sequence number synced to the datafiles on disk before the last crash
- _lastDataSyncedFromLastRun = journalReadLSN();
- log() << "recover lsn: " << _lastDataSyncedFromLastRun << endl;
-
- for( unsigned i = 0; i != files.size(); ++i ) {
- bool abruptEnd = processFile(files[i]);
- if( abruptEnd && i+1 < files.size() ) {
- log() << "recover error: abrupt end to file " << files[i].string() << ", yet it isn't the last journal file" << endl;
- close();
- uasserted(13535, "recover abrupt journal file end");
- }
- }
-
- close();
-
- if (mmapv1GlobalOptions.journalOptions & MMAPV1Options::JournalScanOnly) {
- uasserted(13545, str::stream() << "--durOptions "
- << (int) MMAPV1Options::JournalScanOnly
- << " (scan only) specified");
- }
-
- log() << "recover cleaning up" << endl;
- removeJournalFiles();
- log() << "recover done" << endl;
- okToCleanUp = true;
- _recovering = false;
+ } catch (const BufReader::eof&) {
+ if (mmapv1GlobalOptions.journalOptions & MMAPV1Options::JournalDumpJournal)
+ log() << "ABRUPT END" << endl;
+ return true; // abrupt end
+ } catch (const JournalSectionCorruptException&) {
+ if (mmapv1GlobalOptions.journalOptions & MMAPV1Options::JournalDumpJournal)
+ log() << "ABRUPT END" << endl;
+ return true; // abrupt end
+ }
+
+ return false; // non-abrupt end
+}
+
+/** apply a specific journal file */
+bool RecoveryJob::processFile(boost::filesystem::path journalfile) {
+ log() << "recover " << journalfile.string() << endl;
+
+ try {
+ if (boost::filesystem::file_size(journalfile.string()) == 0) {
+ log() << "recover info " << journalfile.string() << " has zero length" << endl;
+ return true;
}
-
- void _recover() {
- verify(storageGlobalParams.dur);
-
- boost::filesystem::path p = getJournalDir();
- if( !exists(p) ) {
- log() << "directory " << p.string() << " does not exist, there will be no recovery startup step" << endl;
- okToCleanUp = true;
- return;
- }
-
- vector<boost::filesystem::path> journalFiles;
- getFiles(p, journalFiles);
-
- if( journalFiles.empty() ) {
- log() << "recover : no journal files present, no recovery needed" << endl;
- okToCleanUp = true;
- return;
- }
-
- RecoveryJob::get().go(journalFiles);
- }
-
- /** recover from a crash
- called during startup
- throws on error
- */
- void replayJournalFilesAtStartup() {
- // we use a lock so that exitCleanly will wait for us
- // to finish (or at least to notice what is up and stop)
- OperationContextImpl txn;
- ScopedTransaction transaction(&txn, MODE_X);
- Lock::GlobalWrite lk(txn.lockState());
-
- _recover(); // throws on interruption
+ } catch (...) {
+ // if something weird like a permissions problem keep going so the massert down below can happen (presumably)
+ log() << "recover exception checking filesize" << endl;
+ }
+
+ MemoryMappedFile f;
+ void* p =
+ f.mapWithOptions(journalfile.string().c_str(), MongoFile::READONLY | MongoFile::SEQUENTIAL);
+ massert(13544, str::stream() << "recover error couldn't open " << journalfile.string(), p);
+ return processFileBuffer(p, (unsigned)f.length());
+}
+
+/** @param files all the j._0 style files we need to apply for recovery */
+void RecoveryJob::go(vector<boost::filesystem::path>& files) {
+ log() << "recover begin" << endl;
+ LockMongoFilesExclusive lkFiles; // for RecoveryJob::Last
+ _recovering = true;
+
+ // load the last sequence number synced to the datafiles on disk before the last crash
+ _lastDataSyncedFromLastRun = journalReadLSN();
+ log() << "recover lsn: " << _lastDataSyncedFromLastRun << endl;
+
+ for (unsigned i = 0; i != files.size(); ++i) {
+ bool abruptEnd = processFile(files[i]);
+ if (abruptEnd && i + 1 < files.size()) {
+ log() << "recover error: abrupt end to file " << files[i].string()
+ << ", yet it isn't the last journal file" << endl;
+ close();
+ uasserted(13535, "recover abrupt journal file end");
}
-
- struct BufReaderY { int a,b; };
- class BufReaderUnitTest : public StartupTest {
- public:
- void run() {
- BufReader r((void*) "abcdabcdabcd", 12);
- char x;
- BufReaderY y;
- r.read(x); //cout << x; // a
- verify( x == 'a' );
- r.read(y);
- r.read(x);
- verify( x == 'b' );
- }
- } brunittest;
-
- } // namespace dur
-} // namespace mongo
-
+ }
+
+ close();
+
+ if (mmapv1GlobalOptions.journalOptions & MMAPV1Options::JournalScanOnly) {
+ uasserted(13545,
+ str::stream() << "--durOptions " << (int)MMAPV1Options::JournalScanOnly
+ << " (scan only) specified");
+ }
+
+ log() << "recover cleaning up" << endl;
+ removeJournalFiles();
+ log() << "recover done" << endl;
+ okToCleanUp = true;
+ _recovering = false;
+}
+
+void _recover() {
+ verify(storageGlobalParams.dur);
+
+ boost::filesystem::path p = getJournalDir();
+ if (!exists(p)) {
+ log() << "directory " << p.string()
+ << " does not exist, there will be no recovery startup step" << endl;
+ okToCleanUp = true;
+ return;
+ }
+
+ vector<boost::filesystem::path> journalFiles;
+ getFiles(p, journalFiles);
+
+ if (journalFiles.empty()) {
+ log() << "recover : no journal files present, no recovery needed" << endl;
+ okToCleanUp = true;
+ return;
+ }
+
+ RecoveryJob::get().go(journalFiles);
+}
+
+/** recover from a crash
+ called during startup
+ throws on error
+*/
+void replayJournalFilesAtStartup() {
+ // we use a lock so that exitCleanly will wait for us
+ // to finish (or at least to notice what is up and stop)
+ OperationContextImpl txn;
+ ScopedTransaction transaction(&txn, MODE_X);
+ Lock::GlobalWrite lk(txn.lockState());
+
+ _recover(); // throws on interruption
+}
+
+struct BufReaderY {
+ int a, b;
+};
+class BufReaderUnitTest : public StartupTest {
+public:
+ void run() {
+ BufReader r((void*)"abcdabcdabcd", 12);
+ char x;
+ BufReaderY y;
+ r.read(x); // cout << x; // a
+ verify(x == 'a');
+ r.read(y);
+ r.read(x);
+ verify(x == 'b');
+ }
+} brunittest;
+
+} // namespace dur
+} // namespace mongo
diff --git a/src/mongo/db/storage/mmap_v1/dur_recover.h b/src/mongo/db/storage/mmap_v1/dur_recover.h
index 886f278a66a..e05e7926215 100644
--- a/src/mongo/db/storage/mmap_v1/dur_recover.h
+++ b/src/mongo/db/storage/mmap_v1/dur_recover.h
@@ -38,67 +38,69 @@
namespace mongo {
- class DurableMappedFile;
+class DurableMappedFile;
- namespace dur {
+namespace dur {
- struct ParsedJournalEntry;
+struct ParsedJournalEntry;
- /** call go() to execute a recovery from existing journal files.
- */
- class RecoveryJob {
- MONGO_DISALLOW_COPYING(RecoveryJob);
- public:
- RecoveryJob();
- ~RecoveryJob();
+/** call go() to execute a recovery from existing journal files.
+ */
+class RecoveryJob {
+ MONGO_DISALLOW_COPYING(RecoveryJob);
- void go(std::vector<boost::filesystem::path>& files);
+public:
+ RecoveryJob();
+ ~RecoveryJob();
- /** @param data data between header and footer. compressed if recovering. */
- void processSection(const JSectHeader *h, const void *data, unsigned len, const JSectFooter *f);
+ void go(std::vector<boost::filesystem::path>& files);
- // locks and calls _close()
- void close();
+ /** @param data data between header and footer. compressed if recovering. */
+ void processSection(const JSectHeader* h, const void* data, unsigned len, const JSectFooter* f);
- static RecoveryJob& get() { return _instance; }
+ // locks and calls _close()
+ void close();
- private:
+ static RecoveryJob& get() {
+ return _instance;
+ }
- class Last {
- public:
- Last();
- DurableMappedFile* newEntry(const ParsedJournalEntry&, RecoveryJob&);
+private:
+ class Last {
+ public:
+ Last();
+ DurableMappedFile* newEntry(const ParsedJournalEntry&, RecoveryJob&);
- private:
- DurableMappedFile* mmf;
- std::string dbName;
- int fileNo;
- };
+ private:
+ DurableMappedFile* mmf;
+ std::string dbName;
+ int fileNo;
+ };
- void write(Last& last, const ParsedJournalEntry& entry); // actually writes to the file
- void applyEntry(Last& last, const ParsedJournalEntry& entry, bool apply, bool dump);
- void applyEntries(const std::vector<ParsedJournalEntry> &entries);
- bool processFileBuffer(const void *, unsigned len);
- bool processFile(boost::filesystem::path journalfile);
- void _close(); // doesn't lock
+ void write(Last& last, const ParsedJournalEntry& entry); // actually writes to the file
+ void applyEntry(Last& last, const ParsedJournalEntry& entry, bool apply, bool dump);
+ void applyEntries(const std::vector<ParsedJournalEntry>& entries);
+ bool processFileBuffer(const void*, unsigned len);
+ bool processFile(boost::filesystem::path journalfile);
+ void _close(); // doesn't lock
- // Set of memory mapped files and a mutex to protect them
- stdx::mutex _mx;
- std::list<std::shared_ptr<DurableMappedFile> > _mmfs;
+ // Set of memory mapped files and a mutex to protect them
+ stdx::mutex _mx;
+ std::list<std::shared_ptr<DurableMappedFile>> _mmfs;
- // Are we in recovery or WRITETODATAFILES
- bool _recovering;
+ // Are we in recovery or WRITETODATAFILES
+ bool _recovering;
- unsigned long long _lastDataSyncedFromLastRun;
- unsigned long long _lastSeqMentionedInConsoleLog;
+ unsigned long long _lastDataSyncedFromLastRun;
+ unsigned long long _lastSeqMentionedInConsoleLog;
- static RecoveryJob& _instance;
- };
+ static RecoveryJob& _instance;
+};
- void replayJournalFilesAtStartup();
- }
+void replayJournalFilesAtStartup();
+}
}
diff --git a/src/mongo/db/storage/mmap_v1/dur_recovery_unit.cpp b/src/mongo/db/storage/mmap_v1/dur_recovery_unit.cpp
index e826277e7ff..0c9f58988e2 100644
--- a/src/mongo/db/storage/mmap_v1/dur_recovery_unit.cpp
+++ b/src/mongo/db/storage/mmap_v1/dur_recovery_unit.cpp
@@ -45,284 +45,272 @@
namespace mongo {
- DurRecoveryUnit::DurRecoveryUnit()
- : _writeCount(0), _writeBytes(0), _inUnitOfWork(false), _rollbackWritesDisabled(false) {
- }
+DurRecoveryUnit::DurRecoveryUnit()
+ : _writeCount(0), _writeBytes(0), _inUnitOfWork(false), _rollbackWritesDisabled(false) {}
- void DurRecoveryUnit::beginUnitOfWork(OperationContext* opCtx) {
- invariant(!_inUnitOfWork);
- _inUnitOfWork = true;
- }
+void DurRecoveryUnit::beginUnitOfWork(OperationContext* opCtx) {
+ invariant(!_inUnitOfWork);
+ _inUnitOfWork = true;
+}
- void DurRecoveryUnit::commitUnitOfWork() {
- invariant(_inUnitOfWork);
+void DurRecoveryUnit::commitUnitOfWork() {
+ invariant(_inUnitOfWork);
- commitChanges();
+ commitChanges();
- // global journal flush opportunity
- getDur().commitIfNeeded();
+ // global journal flush opportunity
+ getDur().commitIfNeeded();
- resetChanges();
- }
+ resetChanges();
+}
- void DurRecoveryUnit::abortUnitOfWork() {
- invariant(_inUnitOfWork);
+void DurRecoveryUnit::abortUnitOfWork() {
+ invariant(_inUnitOfWork);
- rollbackChanges();
- resetChanges();
- }
+ rollbackChanges();
+ resetChanges();
+}
- void DurRecoveryUnit::abandonSnapshot() {
- invariant(!_inUnitOfWork);
- // no-op since we have no transaction
- }
+void DurRecoveryUnit::abandonSnapshot() {
+ invariant(!_inUnitOfWork);
+ // no-op since we have no transaction
+}
- void DurRecoveryUnit::commitChanges() {
- if (getDur().isDurable())
- markWritesForJournaling();
+void DurRecoveryUnit::commitChanges() {
+ if (getDur().isDurable())
+ markWritesForJournaling();
- try {
- for (Changes::const_iterator it = _changes.begin(), end = _changes.end();
- it != end; ++it) {
- (*it)->commit();
- }
- }
- catch (...) {
- std::terminate();
+ try {
+ for (Changes::const_iterator it = _changes.begin(), end = _changes.end(); it != end; ++it) {
+ (*it)->commit();
}
+ } catch (...) {
+ std::terminate();
}
-
- void DurRecoveryUnit::markWritesForJournaling() {
- if (!_writeCount)
- return;
-
- typedef std::pair<void*, unsigned> Intent;
- std::vector<Intent> intents;
- const size_t numStoredWrites = _initialWrites.size() + _mergedWrites.size();
- intents.reserve(numStoredWrites);
-
- // Show very large units of work at LOG(1) level as they may hint at performance issues
- const int logLevel = (_writeCount > 100*1000 || _writeBytes > 50*1024*1024) ? 1 : 3;
-
- LOG(logLevel) << _writeCount << " writes (" << _writeBytes / 1024 << " kB) covered by "
- << numStoredWrites << " pre-images ("
- << _preimageBuffer.size() / 1024 << " kB) ";
-
- // orders the initial, unmerged writes, by address so we can coalesce overlapping and
- // adjacent writes
- std::sort(_initialWrites.begin(), _initialWrites.end());
-
- if (!_initialWrites.empty()) {
- intents.push_back(std::make_pair(_initialWrites.front().addr,
- _initialWrites.front().len));
- for (InitialWrites::iterator it = (_initialWrites.begin() + 1),
- end = _initialWrites.end();
- it != end;
- ++it) {
- Intent& lastIntent = intents.back();
- char* lastEnd = static_cast<char*>(lastIntent.first) + lastIntent.second;
- if (it->addr <= lastEnd) {
- // overlapping or adjacent, so extend.
- ptrdiff_t extendedLen = (it->end()) - static_cast<char*>(lastIntent.first);
- lastIntent.second = std::max(lastIntent.second, unsigned(extendedLen));
- }
- else {
- // not overlapping, so create a new intent
- intents.push_back(std::make_pair(it->addr, it->len));
- }
+}
+
+void DurRecoveryUnit::markWritesForJournaling() {
+ if (!_writeCount)
+ return;
+
+ typedef std::pair<void*, unsigned> Intent;
+ std::vector<Intent> intents;
+ const size_t numStoredWrites = _initialWrites.size() + _mergedWrites.size();
+ intents.reserve(numStoredWrites);
+
+ // Show very large units of work at LOG(1) level as they may hint at performance issues
+ const int logLevel = (_writeCount > 100 * 1000 || _writeBytes > 50 * 1024 * 1024) ? 1 : 3;
+
+ LOG(logLevel) << _writeCount << " writes (" << _writeBytes / 1024 << " kB) covered by "
+ << numStoredWrites << " pre-images (" << _preimageBuffer.size() / 1024 << " kB) ";
+
+ // orders the initial, unmerged writes, by address so we can coalesce overlapping and
+ // adjacent writes
+ std::sort(_initialWrites.begin(), _initialWrites.end());
+
+ if (!_initialWrites.empty()) {
+ intents.push_back(std::make_pair(_initialWrites.front().addr, _initialWrites.front().len));
+ for (InitialWrites::iterator it = (_initialWrites.begin() + 1), end = _initialWrites.end();
+ it != end;
+ ++it) {
+ Intent& lastIntent = intents.back();
+ char* lastEnd = static_cast<char*>(lastIntent.first) + lastIntent.second;
+ if (it->addr <= lastEnd) {
+ // overlapping or adjacent, so extend.
+ ptrdiff_t extendedLen = (it->end()) - static_cast<char*>(lastIntent.first);
+ lastIntent.second = std::max(lastIntent.second, unsigned(extendedLen));
+ } else {
+ // not overlapping, so create a new intent
+ intents.push_back(std::make_pair(it->addr, it->len));
}
}
+ }
- MergedWrites::iterator it = _mergedWrites.begin();
- if (it != _mergedWrites.end()) {
- intents.push_back(std::make_pair(it->addr, it->len));
- while (++it != _mergedWrites.end()) {
- // Check the property that write intents are sorted and don't overlap.
- invariant(it->addr >= intents.back().first);
- Intent& lastIntent = intents.back();
- char* lastEnd = static_cast<char*>(lastIntent.first) + lastIntent.second;
- if (it->addr == lastEnd) {
- // adjacent, so extend.
- lastIntent.second += it->len;
- }
- else {
- // not overlapping, so create a new intent
- invariant(it->addr > lastEnd);
- intents.push_back(std::make_pair(it->addr, it->len));
- }
+ MergedWrites::iterator it = _mergedWrites.begin();
+ if (it != _mergedWrites.end()) {
+ intents.push_back(std::make_pair(it->addr, it->len));
+ while (++it != _mergedWrites.end()) {
+ // Check the property that write intents are sorted and don't overlap.
+ invariant(it->addr >= intents.back().first);
+ Intent& lastIntent = intents.back();
+ char* lastEnd = static_cast<char*>(lastIntent.first) + lastIntent.second;
+ if (it->addr == lastEnd) {
+ // adjacent, so extend.
+ lastIntent.second += it->len;
+ } else {
+ // not overlapping, so create a new intent
+ invariant(it->addr > lastEnd);
+ intents.push_back(std::make_pair(it->addr, it->len));
}
}
- LOG(logLevel) << _mergedWrites.size() << " pre-images " << "coalesced into "
- << intents.size() << " write intents";
-
- getDur().declareWriteIntents(intents);
- }
-
- void DurRecoveryUnit::resetChanges() {
- _writeCount = 0;
- _writeBytes = 0;
- _initialWrites.clear();
- _mergedWrites.clear();
- _changes.clear();
- _preimageBuffer.clear();
- _rollbackWritesDisabled = false;
- _inUnitOfWork = false;
}
-
- void DurRecoveryUnit::rollbackChanges() {
- // First rollback disk writes, then Changes. This matches behavior in other storage engines
- // that either rollback a transaction or don't write a writebatch.
-
- if (_rollbackWritesDisabled) {
- LOG(2) << " ***** NOT ROLLING BACK " << _writeCount << " disk writes";
+ LOG(logLevel) << _mergedWrites.size() << " pre-images "
+ << "coalesced into " << intents.size() << " write intents";
+
+ getDur().declareWriteIntents(intents);
+}
+
+void DurRecoveryUnit::resetChanges() {
+ _writeCount = 0;
+ _writeBytes = 0;
+ _initialWrites.clear();
+ _mergedWrites.clear();
+ _changes.clear();
+ _preimageBuffer.clear();
+ _rollbackWritesDisabled = false;
+ _inUnitOfWork = false;
+}
+
+void DurRecoveryUnit::rollbackChanges() {
+ // First rollback disk writes, then Changes. This matches behavior in other storage engines
+ // that either rollback a transaction or don't write a writebatch.
+
+ if (_rollbackWritesDisabled) {
+ LOG(2) << " ***** NOT ROLLING BACK " << _writeCount << " disk writes";
+ } else {
+ LOG(2) << " ***** ROLLING BACK " << _writeCount << " disk writes";
+
+ // First roll back the merged writes. These have no overlap or ordering requirement
+ // other than needing to be rolled back before all _initialWrites.
+ for (MergedWrites::iterator it = _mergedWrites.begin(); it != _mergedWrites.end(); ++it) {
+ _preimageBuffer.copy(it->addr, it->len, it->offset);
}
- else {
- LOG(2) << " ***** ROLLING BACK " << _writeCount << " disk writes";
-
- // First roll back the merged writes. These have no overlap or ordering requirement
- // other than needing to be rolled back before all _initialWrites.
- for (MergedWrites::iterator it = _mergedWrites.begin();
- it != _mergedWrites.end();
- ++it) {
- _preimageBuffer.copy(it->addr, it->len, it->offset);
- }
- // Then roll back the initial writes in LIFO order, as these might have overlaps.
- for (InitialWrites::reverse_iterator rit = _initialWrites.rbegin();
- rit != _initialWrites.rend();
- ++rit) {
- _preimageBuffer.copy(rit->addr, rit->len, rit->offset);
- }
+ // Then roll back the initial writes in LIFO order, as these might have overlaps.
+ for (InitialWrites::reverse_iterator rit = _initialWrites.rbegin();
+ rit != _initialWrites.rend();
+ ++rit) {
+ _preimageBuffer.copy(rit->addr, rit->len, rit->offset);
}
+ }
- LOG(2) << " ***** ROLLING BACK " << (_changes.size()) << " custom changes";
+ LOG(2) << " ***** ROLLING BACK " << (_changes.size()) << " custom changes";
- try {
- for (int i = _changes.size() - 1; i >= 0; i--) {
- LOG(2) << "CUSTOM ROLLBACK " << demangleName(typeid(*_changes[i]));
- _changes[i]->rollback();
- }
- }
- catch (...) {
- std::terminate();
+ try {
+ for (int i = _changes.size() - 1; i >= 0; i--) {
+ LOG(2) << "CUSTOM ROLLBACK " << demangleName(typeid(*_changes[i]));
+ _changes[i]->rollback();
}
+ } catch (...) {
+ std::terminate();
}
+}
- bool DurRecoveryUnit::waitUntilDurable() {
- invariant(!_inUnitOfWork);
- return getDur().waitUntilDurable();
- }
+bool DurRecoveryUnit::waitUntilDurable() {
+ invariant(!_inUnitOfWork);
+ return getDur().waitUntilDurable();
+}
- void DurRecoveryUnit::mergingWritingPtr(char* addr, size_t len) {
- // The invariant is that all writes are non-overlapping and non-empty. So, a single
- // writingPtr call may result in a number of new segments added. At this point, we cannot
- // in general merge adjacent writes, as that would require inefficient operations on the
- // preimage buffer.
+void DurRecoveryUnit::mergingWritingPtr(char* addr, size_t len) {
+ // The invariant is that all writes are non-overlapping and non-empty. So, a single
+ // writingPtr call may result in a number of new segments added. At this point, we cannot
+ // in general merge adjacent writes, as that would require inefficient operations on the
+ // preimage buffer.
- MergedWrites::iterator coveringWrite = _mergedWrites.upper_bound(Write(addr, 0, 0));
+ MergedWrites::iterator coveringWrite = _mergedWrites.upper_bound(Write(addr, 0, 0));
- char* const end = addr + len;
- while (addr < end) {
- dassert(coveringWrite == _mergedWrites.end() || coveringWrite->end() > addr);
+ char* const end = addr + len;
+ while (addr < end) {
+ dassert(coveringWrite == _mergedWrites.end() || coveringWrite->end() > addr);
- // Determine whether addr[0] is already covered by a write or not.
- // If covered, adjust addr and len to exclude the covered run from addr[0] onwards.
+ // Determine whether addr[0] is already covered by a write or not.
+ // If covered, adjust addr and len to exclude the covered run from addr[0] onwards.
- if (coveringWrite != _mergedWrites.end()) {
- char* const cwEnd = coveringWrite->end();
+ if (coveringWrite != _mergedWrites.end()) {
+ char* const cwEnd = coveringWrite->end();
- if (coveringWrite->addr <= addr) {
- // If the begin of the covering write at or before addr[0], addr[0] is covered.
- // While the existing pre-image will not generally be the same as the data
- // being written now, during rollback only the oldest pre-image matters.
+ if (coveringWrite->addr <= addr) {
+ // If the begin of the covering write at or before addr[0], addr[0] is covered.
+ // While the existing pre-image will not generally be the same as the data
+ // being written now, during rollback only the oldest pre-image matters.
- if (end <= cwEnd) {
- break; // fully covered
- }
-
- addr = cwEnd;
- coveringWrite++;
- dassert(coveringWrite == _mergedWrites.end() || coveringWrite->addr >= cwEnd);
+ if (end <= cwEnd) {
+ break; // fully covered
}
- }
- dassert(coveringWrite == _mergedWrites.end() || coveringWrite->end() > addr);
- // If the next coveringWrite overlaps, adjust the end of the uncovered region.
- char* uncoveredEnd = end;
- if (coveringWrite != _mergedWrites.end() && coveringWrite->addr < end) {
- uncoveredEnd = coveringWrite->addr;
+ addr = cwEnd;
+ coveringWrite++;
+ dassert(coveringWrite == _mergedWrites.end() || coveringWrite->addr >= cwEnd);
}
+ }
+ dassert(coveringWrite == _mergedWrites.end() || coveringWrite->end() > addr);
- const size_t uncoveredLen = uncoveredEnd - addr;
- if (uncoveredLen) {
- // We are writing to a region that hasn't been declared previously.
- _mergedWrites.insert(Write(addr, uncoveredLen, _preimageBuffer.size()));
+ // If the next coveringWrite overlaps, adjust the end of the uncovered region.
+ char* uncoveredEnd = end;
+ if (coveringWrite != _mergedWrites.end() && coveringWrite->addr < end) {
+ uncoveredEnd = coveringWrite->addr;
+ }
- // Windows requires us to adjust the address space *before* we write to anything.
- privateViews.makeWritable(addr, uncoveredLen);
+ const size_t uncoveredLen = uncoveredEnd - addr;
+ if (uncoveredLen) {
+ // We are writing to a region that hasn't been declared previously.
+ _mergedWrites.insert(Write(addr, uncoveredLen, _preimageBuffer.size()));
- if (!_rollbackWritesDisabled) {
- _preimageBuffer.append(addr, uncoveredLen);
- }
- addr = uncoveredEnd;
+ // Windows requires us to adjust the address space *before* we write to anything.
+ privateViews.makeWritable(addr, uncoveredLen);
+
+ if (!_rollbackWritesDisabled) {
+ _preimageBuffer.append(addr, uncoveredLen);
}
+ addr = uncoveredEnd;
}
}
+}
- void* DurRecoveryUnit::writingPtr(void* addr, size_t len) {
- invariant(_inUnitOfWork);
-
- if (len == 0) {
- return addr; // Don't need to do anything for empty ranges.
- }
+void* DurRecoveryUnit::writingPtr(void* addr, size_t len) {
+ invariant(_inUnitOfWork);
- invariant(len < size_t(std::numeric_limits<int>::max()));
+ if (len == 0) {
+ return addr; // Don't need to do anything for empty ranges.
+ }
- _writeCount++;
- _writeBytes += len;
- char* const data = static_cast<char*>(addr);
+ invariant(len < size_t(std::numeric_limits<int>::max()));
- // The initial writes are stored in a faster, but less memory-efficient way. This will
- // typically be enough for simple operations, where the extra cost of incremental
- // coalescing and merging would be too much. For larger writes, more redundancy is
- // is expected, so the cost of checking for duplicates is offset by savings in copying
- // and allocating preimage buffers. Total memory use of the preimage buffer may be up to
- // kMaxUnmergedPreimageBytes larger than the amount memory covered by the write intents.
+ _writeCount++;
+ _writeBytes += len;
+ char* const data = static_cast<char*>(addr);
- const size_t kMaxUnmergedPreimageBytes = kDebugBuild ? 16*1024 : 10*1024*1024;
+ // The initial writes are stored in a faster, but less memory-efficient way. This will
+ // typically be enough for simple operations, where the extra cost of incremental
+ // coalescing and merging would be too much. For larger writes, more redundancy is
+ // is expected, so the cost of checking for duplicates is offset by savings in copying
+ // and allocating preimage buffers. Total memory use of the preimage buffer may be up to
+ // kMaxUnmergedPreimageBytes larger than the amount memory covered by the write intents.
- if (_preimageBuffer.size() + len > kMaxUnmergedPreimageBytes) {
- mergingWritingPtr(data, len);
+ const size_t kMaxUnmergedPreimageBytes = kDebugBuild ? 16 * 1024 : 10 * 1024 * 1024;
- // After a merged write, no more initial writes can occur or there would be an
- // ordering violation during rollback. So, ensure that the if-condition will be true
- // for any future write regardless of length. This is true now because
- // mergingWritingPtr also will store its first write in _preimageBuffer as well.
- invariant(_preimageBuffer.size() >= kMaxUnmergedPreimageBytes);
+ if (_preimageBuffer.size() + len > kMaxUnmergedPreimageBytes) {
+ mergingWritingPtr(data, len);
- return addr;
- }
+ // After a merged write, no more initial writes can occur or there would be an
+ // ordering violation during rollback. So, ensure that the if-condition will be true
+ // for any future write regardless of length. This is true now because
+ // mergingWritingPtr also will store its first write in _preimageBuffer as well.
+ invariant(_preimageBuffer.size() >= kMaxUnmergedPreimageBytes);
- // Windows requires us to adjust the address space *before* we write to anything.
- privateViews.makeWritable(data, len);
+ return addr;
+ }
- _initialWrites.push_back(Write(data, len, _preimageBuffer.size()));
+ // Windows requires us to adjust the address space *before* we write to anything.
+ privateViews.makeWritable(data, len);
- if (!_rollbackWritesDisabled) {
- _preimageBuffer.append(data, len);
- }
+ _initialWrites.push_back(Write(data, len, _preimageBuffer.size()));
- return addr;
+ if (!_rollbackWritesDisabled) {
+ _preimageBuffer.append(data, len);
}
- void DurRecoveryUnit::setRollbackWritesDisabled() {
- invariant(_inUnitOfWork);
- _rollbackWritesDisabled = true;
- }
+ return addr;
+}
- void DurRecoveryUnit::registerChange(Change* change) {
- invariant(_inUnitOfWork);
- _changes.push_back(change);
- }
+void DurRecoveryUnit::setRollbackWritesDisabled() {
+ invariant(_inUnitOfWork);
+ _rollbackWritesDisabled = true;
+}
+
+void DurRecoveryUnit::registerChange(Change* change) {
+ invariant(_inUnitOfWork);
+ _changes.push_back(change);
+}
} // namespace mongo
diff --git a/src/mongo/db/storage/mmap_v1/dur_recovery_unit.h b/src/mongo/db/storage/mmap_v1/dur_recovery_unit.h
index d26032e8f26..52f717d29b2 100644
--- a/src/mongo/db/storage/mmap_v1/dur_recovery_unit.h
+++ b/src/mongo/db/storage/mmap_v1/dur_recovery_unit.h
@@ -39,127 +39,131 @@
namespace mongo {
+/**
+ * Just pass through to getDur().
+ */
+class DurRecoveryUnit : public RecoveryUnit {
+public:
+ DurRecoveryUnit();
+
+ void beginUnitOfWork(OperationContext* opCtx) final;
+ void commitUnitOfWork() final;
+ void abortUnitOfWork() final;
+
+ virtual bool waitUntilDurable();
+
+ virtual void abandonSnapshot();
+
+ // The recovery unit takes ownership of change.
+ virtual void registerChange(Change* change);
+
+ virtual void* writingPtr(void* addr, size_t len);
+
+ virtual void setRollbackWritesDisabled();
+
+ virtual SnapshotId getSnapshotId() const {
+ return SnapshotId();
+ }
+
+private:
/**
- * Just pass through to getDur().
+ * Marks writes for journaling, if enabled, and then commits all other Changes in order.
+ * Returns with empty _initialWrites, _mergedWrites, _changes and _preimageBuffer, but
+ * does not reset the _rollbackWritesDisabled or _mustRollback flags. This leaves the
+ * RecoveryUnit ready for more changes that may be committed or rolled back.
*/
- class DurRecoveryUnit : public RecoveryUnit {
- public:
- DurRecoveryUnit();
-
- void beginUnitOfWork(OperationContext* opCtx) final;
- void commitUnitOfWork() final;
- void abortUnitOfWork() final;
-
- virtual bool waitUntilDurable();
-
- virtual void abandonSnapshot();
-
- // The recovery unit takes ownership of change.
- virtual void registerChange(Change* change);
-
- virtual void* writingPtr(void* addr, size_t len);
-
- virtual void setRollbackWritesDisabled();
-
- virtual SnapshotId getSnapshotId() const { return SnapshotId(); }
-
- private:
- /**
- * Marks writes for journaling, if enabled, and then commits all other Changes in order.
- * Returns with empty _initialWrites, _mergedWrites, _changes and _preimageBuffer, but
- * does not reset the _rollbackWritesDisabled or _mustRollback flags. This leaves the
- * RecoveryUnit ready for more changes that may be committed or rolled back.
- */
- void commitChanges();
-
- /**
- * Creates a list of write intents to be journaled, and hands it of to the active
- * DurabilityInterface.
- */
- void markWritesForJournaling();
-
- /**
- * Restores state by rolling back all writes using the saved pre-images, and then
- * rolling back all other Changes in LIFO order. Resets internal state.
- */
- void rollbackChanges();
-
-
- /**
- * Version of writingPtr that checks existing writes for overlap and only stores those
- * changes not yet covered by an existing write intent and pre-image.
- */
- void mergingWritingPtr(char* data, size_t len);
-
- /**
- * Reset to a clean state without any uncommitted changes or write.
- */
- void resetChanges();
-
- // Changes are ordered from oldest to newest.
- typedef OwnedPointerVector<Change> Changes;
- Changes _changes;
-
-
- // Number of pending uncommitted writes. Incremented even if new write is fully covered by
- // existing writes.
- size_t _writeCount;
- // Total size of the pending uncommitted writes.
- size_t _writeBytes;
-
- /**
- * These are memory writes inside the mmapv1 mmap-ed files. A pointer past the end is just
- * instead of a pointer to the beginning for the benefit of MergedWrites.
- */
- struct Write {
- Write(char* addr, int len, int offset) : addr(addr), len(len), offset(offset) { }
- Write(const Write& rhs) : addr(rhs.addr), len(rhs.len), offset(rhs.offset) { }
- Write() : addr(0), len(0), offset(0) { }
- bool operator< (const Write& rhs) const { return addr < rhs.addr; }
-
- struct compareEnd {
- bool operator() (const Write& lhs, const Write& rhs) const {
- return lhs.addr + lhs.len < rhs.addr + rhs.len;
- }
- };
-
- char* end() const {
- return addr + len;
- }
+ void commitChanges();
+
+ /**
+ * Creates a list of write intents to be journaled, and hands it of to the active
+ * DurabilityInterface.
+ */
+ void markWritesForJournaling();
+
+ /**
+ * Restores state by rolling back all writes using the saved pre-images, and then
+ * rolling back all other Changes in LIFO order. Resets internal state.
+ */
+ void rollbackChanges();
+
- char* addr;
- int len;
- int offset; // index into _preimageBuffer
+ /**
+ * Version of writingPtr that checks existing writes for overlap and only stores those
+ * changes not yet covered by an existing write intent and pre-image.
+ */
+ void mergingWritingPtr(char* data, size_t len);
+
+ /**
+ * Reset to a clean state without any uncommitted changes or write.
+ */
+ void resetChanges();
+
+ // Changes are ordered from oldest to newest.
+ typedef OwnedPointerVector<Change> Changes;
+ Changes _changes;
+
+
+ // Number of pending uncommitted writes. Incremented even if new write is fully covered by
+ // existing writes.
+ size_t _writeCount;
+ // Total size of the pending uncommitted writes.
+ size_t _writeBytes;
+
+ /**
+ * These are memory writes inside the mmapv1 mmap-ed files. A pointer past the end is just
+ * instead of a pointer to the beginning for the benefit of MergedWrites.
+ */
+ struct Write {
+ Write(char* addr, int len, int offset) : addr(addr), len(len), offset(offset) {}
+ Write(const Write& rhs) : addr(rhs.addr), len(rhs.len), offset(rhs.offset) {}
+ Write() : addr(0), len(0), offset(0) {}
+ bool operator<(const Write& rhs) const {
+ return addr < rhs.addr;
+ }
+
+ struct compareEnd {
+ bool operator()(const Write& lhs, const Write& rhs) const {
+ return lhs.addr + lhs.len < rhs.addr + rhs.len;
+ }
};
- /**
- * Writes are ordered by ending address, so MergedWrites::upper_bound() can find the first
- * overlapping write, if any. Overlapping and duplicate regions are forbidden, as rollback
- * of MergedChanges undoes changes by address rather than LIFO order. In addition, empty
- * regions are not allowed. Storing writes by age does not work well for large indexed
- * arrays, as coalescing is needed to bound the size of the preimage buffer.
- */
- typedef std::set<Write, Write::compareEnd> MergedWrites;
- MergedWrites _mergedWrites;
-
- // Generally it's more efficient to just store pre-images unconditionally and then
- // sort/eliminate duplicates at commit time. However, this can lead to excessive memory
- // use in cases involving large indexes arrays, where the same memory is written many
- // times. To keep the speed for the general case and bound memory use, the first few MB of
- // pre-images are stored unconditionally, but once the threshold has been exceeded, the
- // remainder is stored in a more space-efficient datastructure.
- typedef std::vector<Write> InitialWrites;
- InitialWrites _initialWrites;
-
- std::string _preimageBuffer;
-
- bool _inUnitOfWork;
-
-
- // Default is false.
- // If true, no preimages are tracked. If rollback is subsequently attempted, the process
- // will abort.
- bool _rollbackWritesDisabled;
+ char* end() const {
+ return addr + len;
+ }
+
+ char* addr;
+ int len;
+ int offset; // index into _preimageBuffer
};
+ /**
+ * Writes are ordered by ending address, so MergedWrites::upper_bound() can find the first
+ * overlapping write, if any. Overlapping and duplicate regions are forbidden, as rollback
+ * of MergedChanges undoes changes by address rather than LIFO order. In addition, empty
+ * regions are not allowed. Storing writes by age does not work well for large indexed
+ * arrays, as coalescing is needed to bound the size of the preimage buffer.
+ */
+ typedef std::set<Write, Write::compareEnd> MergedWrites;
+ MergedWrites _mergedWrites;
+
+ // Generally it's more efficient to just store pre-images unconditionally and then
+ // sort/eliminate duplicates at commit time. However, this can lead to excessive memory
+ // use in cases involving large indexes arrays, where the same memory is written many
+ // times. To keep the speed for the general case and bound memory use, the first few MB of
+ // pre-images are stored unconditionally, but once the threshold has been exceeded, the
+ // remainder is stored in a more space-efficient datastructure.
+ typedef std::vector<Write> InitialWrites;
+ InitialWrites _initialWrites;
+
+ std::string _preimageBuffer;
+
+ bool _inUnitOfWork;
+
+
+ // Default is false.
+ // If true, no preimages are tracked. If rollback is subsequently attempted, the process
+ // will abort.
+ bool _rollbackWritesDisabled;
+};
+
} // namespace mongo
diff --git a/src/mongo/db/storage/mmap_v1/dur_stats.h b/src/mongo/db/storage/mmap_v1/dur_stats.h
index 27532e9ee59..8ec6f8c024f 100644
--- a/src/mongo/db/storage/mmap_v1/dur_stats.h
+++ b/src/mongo/db/storage/mmap_v1/dur_stats.h
@@ -31,61 +31,64 @@
#include "mongo/db/jsobj.h"
namespace mongo {
- namespace dur {
+namespace dur {
- /** journaling stats. the model here is that the commit thread is the only writer, and that reads are
- uncommon (from a serverStatus command and such). Thus, there should not be multicore chatter overhead.
- */
- struct Stats {
-
- struct S {
- std::string _CSVHeader() const;
- std::string _asCSV() const;
+/** journaling stats. the model here is that the commit thread is the only writer, and that reads are
+ uncommon (from a serverStatus command and such). Thus, there should not be multicore chatter overhead.
+*/
+struct Stats {
+ struct S {
+ std::string _CSVHeader() const;
+ std::string _asCSV() const;
- void _asObj(BSONObjBuilder* builder) const;
+ void _asObj(BSONObjBuilder* builder) const;
- void reset();
+ void reset();
- uint64_t getCurrentDurationMillis() const {
- return ((curTimeMicros64() - _startTimeMicros) / 1000);
- }
+ uint64_t getCurrentDurationMillis() const {
+ return ((curTimeMicros64() - _startTimeMicros) / 1000);
+ }
- // Not reported. Internal use only.
- uint64_t _startTimeMicros;
+ // Not reported. Internal use only.
+ uint64_t _startTimeMicros;
- // Reported statistics
- unsigned _durationMillis;
+ // Reported statistics
+ unsigned _durationMillis;
- unsigned _commits;
- unsigned _commitsInWriteLock;
+ unsigned _commits;
+ unsigned _commitsInWriteLock;
- uint64_t _journaledBytes;
- uint64_t _uncompressedBytes;
- uint64_t _writeToDataFilesBytes;
+ uint64_t _journaledBytes;
+ uint64_t _uncompressedBytes;
+ uint64_t _writeToDataFilesBytes;
- uint64_t _prepLogBufferMicros;
- uint64_t _writeToJournalMicros;
- uint64_t _writeToDataFilesMicros;
- uint64_t _remapPrivateViewMicros;
- uint64_t _commitsMicros;
- uint64_t _commitsInWriteLockMicros;
- };
+ uint64_t _prepLogBufferMicros;
+ uint64_t _writeToJournalMicros;
+ uint64_t _writeToDataFilesMicros;
+ uint64_t _remapPrivateViewMicros;
+ uint64_t _commitsMicros;
+ uint64_t _commitsInWriteLockMicros;
+ };
- Stats();
- void reset();
+ Stats();
+ void reset();
- BSONObj asObj() const;
+ BSONObj asObj() const;
- const S* curr() const { return &_stats[_currIdx]; }
- S* curr() { return &_stats[_currIdx]; }
+ const S* curr() const {
+ return &_stats[_currIdx];
+ }
+ S* curr() {
+ return &_stats[_currIdx];
+ }
- private:
- S _stats[5];
- unsigned _currIdx;
- };
+private:
+ S _stats[5];
+ unsigned _currIdx;
+};
- extern Stats stats;
- }
+extern Stats stats;
+}
}
diff --git a/src/mongo/db/storage/mmap_v1/durable_mapped_file.cpp b/src/mongo/db/storage/mmap_v1/durable_mapped_file.cpp
index e32c0b15ffe..fad28753372 100644
--- a/src/mongo/db/storage/mmap_v1/durable_mapped_file.cpp
+++ b/src/mongo/db/storage/mmap_v1/durable_mapped_file.cpp
@@ -53,258 +53,260 @@ using namespace mongoutils;
namespace mongo {
- using std::dec;
- using std::endl;
- using std::hex;
- using std::map;
- using std::pair;
- using std::string;
-
- void DurableMappedFile::remapThePrivateView() {
- verify(storageGlobalParams.dur);
-
- _willNeedRemap = false;
-
- // todo 1.9 : it turns out we require that we always remap to the same address.
- // so the remove / add isn't necessary and can be removed?
- void *old = _view_private;
- //privateViews.remove(_view_private);
- _view_private = remapPrivateView(_view_private);
- //privateViews.add(_view_private, this);
- fassert( 16112, _view_private == old );
- }
+using std::dec;
+using std::endl;
+using std::hex;
+using std::map;
+using std::pair;
+using std::string;
+
+void DurableMappedFile::remapThePrivateView() {
+ verify(storageGlobalParams.dur);
+
+ _willNeedRemap = false;
+
+ // todo 1.9 : it turns out we require that we always remap to the same address.
+ // so the remove / add isn't necessary and can be removed?
+ void* old = _view_private;
+ // privateViews.remove(_view_private);
+ _view_private = remapPrivateView(_view_private);
+ // privateViews.add(_view_private, this);
+ fassert(16112, _view_private == old);
+}
- /** register view. threadsafe */
- void PointerToDurableMappedFile::add_inlock(void *view, DurableMappedFile *f) {
- verify(view);
- verify(f);
- clearWritableBits_inlock(view, f->length());
- _views.insert(pair<void*, DurableMappedFile*>(view, f));
- }
+/** register view. threadsafe */
+void PointerToDurableMappedFile::add_inlock(void* view, DurableMappedFile* f) {
+ verify(view);
+ verify(f);
+ clearWritableBits_inlock(view, f->length());
+ _views.insert(pair<void*, DurableMappedFile*>(view, f));
+}
- /** de-register view. threadsafe */
- void PointerToDurableMappedFile::remove(void *view, size_t len) {
- if( view ) {
- stdx::lock_guard<stdx::mutex> lk(_m);
- clearWritableBits_inlock(view, len);
- _views.erase(view);
- }
+/** de-register view. threadsafe */
+void PointerToDurableMappedFile::remove(void* view, size_t len) {
+ if (view) {
+ stdx::lock_guard<stdx::mutex> lk(_m);
+ clearWritableBits_inlock(view, len);
+ _views.erase(view);
}
+}
#ifdef _WIN32
- void PointerToDurableMappedFile::clearWritableBits(void *privateView, size_t len) {
- stdx::lock_guard<stdx::mutex> lk(_m);
- clearWritableBits_inlock(privateView, len);
- }
+void PointerToDurableMappedFile::clearWritableBits(void* privateView, size_t len) {
+ stdx::lock_guard<stdx::mutex> lk(_m);
+ clearWritableBits_inlock(privateView, len);
+}
- /** notification on unmapping so we can clear writable bits */
- void PointerToDurableMappedFile::clearWritableBits_inlock(void *privateView, size_t len) {
- for (unsigned i = reinterpret_cast<size_t>(privateView) / MemoryMappedCOWBitset::ChunkSize;
- i <= (reinterpret_cast<size_t>(privateView) + len) / MemoryMappedCOWBitset::ChunkSize;
- ++i) {
- writable.clear(i);
- dassert(!writable.get(i));
- }
+/** notification on unmapping so we can clear writable bits */
+void PointerToDurableMappedFile::clearWritableBits_inlock(void* privateView, size_t len) {
+ for (unsigned i = reinterpret_cast<size_t>(privateView) / MemoryMappedCOWBitset::ChunkSize;
+ i <= (reinterpret_cast<size_t>(privateView) + len) / MemoryMappedCOWBitset::ChunkSize;
+ ++i) {
+ writable.clear(i);
+ dassert(!writable.get(i));
}
+}
- extern stdx::mutex mapViewMutex;
+extern stdx::mutex mapViewMutex;
- __declspec(noinline) void PointerToDurableMappedFile::makeChunkWritable(size_t chunkno) {
- stdx::lock_guard<stdx::mutex> lkPrivateViews(_m);
+__declspec(noinline) void PointerToDurableMappedFile::makeChunkWritable(size_t chunkno) {
+ stdx::lock_guard<stdx::mutex> lkPrivateViews(_m);
- if (writable.get(chunkno)) // double check lock
- return;
+ if (writable.get(chunkno)) // double check lock
+ return;
- // remap all maps in this chunk.
- // common case is a single map, but could have more than one with smallfiles or .ns files
- size_t chunkStart = chunkno * MemoryMappedCOWBitset::ChunkSize;
- size_t chunkNext = chunkStart + MemoryMappedCOWBitset::ChunkSize;
+ // remap all maps in this chunk.
+ // common case is a single map, but could have more than one with smallfiles or .ns files
+ size_t chunkStart = chunkno * MemoryMappedCOWBitset::ChunkSize;
+ size_t chunkNext = chunkStart + MemoryMappedCOWBitset::ChunkSize;
- stdx::lock_guard<stdx::mutex> lkMapView(mapViewMutex);
+ stdx::lock_guard<stdx::mutex> lkMapView(mapViewMutex);
- map<void*, DurableMappedFile*>::iterator i = _views.upper_bound((void*)(chunkNext - 1));
- while (1) {
- const pair<void*, DurableMappedFile*> x = *(--i);
- DurableMappedFile *mmf = x.second;
- if (mmf == 0)
- break;
+ map<void*, DurableMappedFile*>::iterator i = _views.upper_bound((void*)(chunkNext - 1));
+ while (1) {
+ const pair<void*, DurableMappedFile*> x = *(--i);
+ DurableMappedFile* mmf = x.second;
+ if (mmf == 0)
+ break;
- size_t viewStart = reinterpret_cast<size_t>(x.first);
- size_t viewEnd = viewStart + mmf->length();
- if (viewEnd <= chunkStart)
- break;
+ size_t viewStart = reinterpret_cast<size_t>(x.first);
+ size_t viewEnd = viewStart + mmf->length();
+ if (viewEnd <= chunkStart)
+ break;
- size_t protectStart = std::max(viewStart, chunkStart);
- dassert(protectStart < chunkNext);
+ size_t protectStart = std::max(viewStart, chunkStart);
+ dassert(protectStart < chunkNext);
- size_t protectEnd = std::min(viewEnd, chunkNext);
- size_t protectSize = protectEnd - protectStart;
- dassert(protectSize > 0 && protectSize <= MemoryMappedCOWBitset::ChunkSize);
+ size_t protectEnd = std::min(viewEnd, chunkNext);
+ size_t protectSize = protectEnd - protectStart;
+ dassert(protectSize > 0 && protectSize <= MemoryMappedCOWBitset::ChunkSize);
- DWORD oldProtection;
- bool ok = VirtualProtect(reinterpret_cast<void*>(protectStart),
- protectSize,
- PAGE_WRITECOPY,
- &oldProtection);
- if (!ok) {
- DWORD dosError = GetLastError();
+ DWORD oldProtection;
+ bool ok = VirtualProtect(
+ reinterpret_cast<void*>(protectStart), protectSize, PAGE_WRITECOPY, &oldProtection);
+ if (!ok) {
+ DWORD dosError = GetLastError();
- if (dosError == ERROR_COMMITMENT_LIMIT) {
- // System has run out of memory between physical RAM & page file, tell the user
- BSONObjBuilder bb;
+ if (dosError == ERROR_COMMITMENT_LIMIT) {
+ // System has run out of memory between physical RAM & page file, tell the user
+ BSONObjBuilder bb;
- ProcessInfo p;
- p.getExtraInfo(bb);
+ ProcessInfo p;
+ p.getExtraInfo(bb);
- severe() << "MongoDB has exhausted the system memory capacity.";
- severe() << "Current Memory Status: " << bb.obj().toString();
- }
+ severe() << "MongoDB has exhausted the system memory capacity.";
+ severe() << "Current Memory Status: " << bb.obj().toString();
+ }
- severe() << "VirtualProtect for " << mmf->filename()
- << " chunk " << chunkno
- << " failed with " << errnoWithDescription(dosError)
- << " (chunk size is " << protectSize
- << ", address is " << hex << protectStart << dec << ")"
- << " in mongo::makeChunkWritable, terminating"
- << endl;
+ severe() << "VirtualProtect for " << mmf->filename() << " chunk " << chunkno
+ << " failed with " << errnoWithDescription(dosError) << " (chunk size is "
+ << protectSize << ", address is " << hex << protectStart << dec << ")"
+ << " in mongo::makeChunkWritable, terminating" << endl;
- fassertFailed(16362);
- }
+ fassertFailed(16362);
}
-
- writable.set(chunkno);
}
+
+ writable.set(chunkno);
+}
#else
- void PointerToDurableMappedFile::clearWritableBits(void *privateView, size_t len) {
- }
+void PointerToDurableMappedFile::clearWritableBits(void* privateView, size_t len) {}
- void PointerToDurableMappedFile::clearWritableBits_inlock(void *privateView, size_t len) {
- }
+void PointerToDurableMappedFile::clearWritableBits_inlock(void* privateView, size_t len) {}
#endif
- PointerToDurableMappedFile::PointerToDurableMappedFile() {
+PointerToDurableMappedFile::PointerToDurableMappedFile() {
#if defined(SIZE_MAX)
- size_t max = SIZE_MAX;
+ size_t max = SIZE_MAX;
#else
- size_t max = ~((size_t)0);
+ size_t max = ~((size_t)0);
#endif
- verify( max > (size_t) this ); // just checking that no one redef'd SIZE_MAX and that it is sane
+ verify(max > (size_t) this); // just checking that no one redef'd SIZE_MAX and that it is sane
- // this way we don't need any boundary checking in _find()
- _views.insert( pair<void*,DurableMappedFile*>((void*)0,(DurableMappedFile*)0) );
- _views.insert( pair<void*,DurableMappedFile*>((void*)max,(DurableMappedFile*)0) );
- }
+ // this way we don't need any boundary checking in _find()
+ _views.insert(pair<void*, DurableMappedFile*>((void*)0, (DurableMappedFile*)0));
+ _views.insert(pair<void*, DurableMappedFile*>((void*)max, (DurableMappedFile*)0));
+}
- /** underscore version of find is for when you are already locked
- @param ofs out return our offset in the view
- @return the DurableMappedFile to which this pointer belongs
- */
- DurableMappedFile* PointerToDurableMappedFile::find_inlock(void *p, /*out*/ size_t& ofs) {
- //
- // .................memory..........................
- // v1 p v2
- // [--------------------] [-------]
- //
- // e.g., _find(p) == v1
- //
- const pair<void*,DurableMappedFile*> x = *(--_views.upper_bound(p));
- DurableMappedFile *mmf = x.second;
- if( mmf ) {
- size_t o = ((char *)p) - ((char*)x.first);
- if( o < mmf->length() ) {
- ofs = o;
- return mmf;
- }
+/** underscore version of find is for when you are already locked
+ @param ofs out return our offset in the view
+ @return the DurableMappedFile to which this pointer belongs
+*/
+DurableMappedFile* PointerToDurableMappedFile::find_inlock(void* p, /*out*/ size_t& ofs) {
+ //
+ // .................memory..........................
+ // v1 p v2
+ // [--------------------] [-------]
+ //
+ // e.g., _find(p) == v1
+ //
+ const pair<void*, DurableMappedFile*> x = *(--_views.upper_bound(p));
+ DurableMappedFile* mmf = x.second;
+ if (mmf) {
+ size_t o = ((char*)p) - ((char*)x.first);
+ if (o < mmf->length()) {
+ ofs = o;
+ return mmf;
}
- return 0;
}
+ return 0;
+}
- /** find associated MMF object for a given pointer.
- threadsafe
- @param ofs out returns offset into the view of the pointer, if found.
- @return the DurableMappedFile to which this pointer belongs. null if not found.
- */
- DurableMappedFile* PointerToDurableMappedFile::find(void *p, /*out*/ size_t& ofs) {
- stdx::lock_guard<stdx::mutex> lk(_m);
- return find_inlock(p, ofs);
- }
-
- PointerToDurableMappedFile privateViews;
-
- // here so that it is precomputed...
- void DurableMappedFile::setPath(const std::string& f) {
- string suffix;
- string prefix;
- bool ok = str::rSplitOn(f, '.', prefix, suffix);
- uassert(13520, str::stream() << "DurableMappedFile only supports filenames in a certain format " << f, ok);
- if( suffix == "ns" )
- _fileSuffixNo = dur::JEntry::DotNsSuffix;
- else
- _fileSuffixNo = (int) str::toUnsigned(suffix);
+/** find associated MMF object for a given pointer.
+ threadsafe
+ @param ofs out returns offset into the view of the pointer, if found.
+ @return the DurableMappedFile to which this pointer belongs. null if not found.
+*/
+DurableMappedFile* PointerToDurableMappedFile::find(void* p, /*out*/ size_t& ofs) {
+ stdx::lock_guard<stdx::mutex> lk(_m);
+ return find_inlock(p, ofs);
+}
- _p = RelativePath::fromFullPath(storageGlobalParams.dbpath, prefix);
- }
+PointerToDurableMappedFile privateViews;
+
+// here so that it is precomputed...
+void DurableMappedFile::setPath(const std::string& f) {
+ string suffix;
+ string prefix;
+ bool ok = str::rSplitOn(f, '.', prefix, suffix);
+ uassert(13520,
+ str::stream() << "DurableMappedFile only supports filenames in a certain format " << f,
+ ok);
+ if (suffix == "ns")
+ _fileSuffixNo = dur::JEntry::DotNsSuffix;
+ else
+ _fileSuffixNo = (int)str::toUnsigned(suffix);
+
+ _p = RelativePath::fromFullPath(storageGlobalParams.dbpath, prefix);
+}
- bool DurableMappedFile::open(const std::string& fname, bool sequentialHint) {
- LOG(3) << "mmf open " << fname;
- invariant(!_view_write);
+bool DurableMappedFile::open(const std::string& fname, bool sequentialHint) {
+ LOG(3) << "mmf open " << fname;
+ invariant(!_view_write);
- setPath(fname);
- _view_write = mapWithOptions(fname.c_str(), sequentialHint ? SEQUENTIAL : 0);
- return finishOpening();
- }
+ setPath(fname);
+ _view_write = mapWithOptions(fname.c_str(), sequentialHint ? SEQUENTIAL : 0);
+ return finishOpening();
+}
- bool DurableMappedFile::create(const std::string& fname, unsigned long long& len, bool sequentialHint) {
- LOG(3) << "mmf create " << fname;
- invariant(!_view_write);
+bool DurableMappedFile::create(const std::string& fname,
+ unsigned long long& len,
+ bool sequentialHint) {
+ LOG(3) << "mmf create " << fname;
+ invariant(!_view_write);
- setPath(fname);
- _view_write = map(fname.c_str(), len, sequentialHint ? SEQUENTIAL : 0);
- return finishOpening();
- }
+ setPath(fname);
+ _view_write = map(fname.c_str(), len, sequentialHint ? SEQUENTIAL : 0);
+ return finishOpening();
+}
- bool DurableMappedFile::finishOpening() {
- LOG(3) << "mmf finishOpening " << (void*) _view_write << ' ' << filename() << " len:" << length();
- if( _view_write ) {
- if (storageGlobalParams.dur) {
- stdx::lock_guard<stdx::mutex> lk2(privateViews._mutex());
-
- _view_private = createPrivateMap();
- if( _view_private == 0 ) {
- msgasserted(13636, str::stream() << "file " << filename() << " open/create failed in createPrivateMap (look in log for more information)");
- }
- privateViews.add_inlock(_view_private, this); // note that testIntent builds use this, even though it points to view_write then...
- }
- else {
- _view_private = _view_write;
+bool DurableMappedFile::finishOpening() {
+ LOG(3) << "mmf finishOpening " << (void*)_view_write << ' ' << filename()
+ << " len:" << length();
+ if (_view_write) {
+ if (storageGlobalParams.dur) {
+ stdx::lock_guard<stdx::mutex> lk2(privateViews._mutex());
+
+ _view_private = createPrivateMap();
+ if (_view_private == 0) {
+ msgasserted(13636,
+ str::stream() << "file " << filename() << " open/create failed "
+ "in createPrivateMap "
+ "(look in log for "
+ "more information)");
}
- return true;
+ privateViews.add_inlock(
+ _view_private,
+ this); // note that testIntent builds use this, even though it points to view_write then...
+ } else {
+ _view_private = _view_write;
}
- return false;
+ return true;
}
+ return false;
+}
- DurableMappedFile::DurableMappedFile() : _willNeedRemap(false) {
- _view_write = _view_private = 0;
- }
+DurableMappedFile::DurableMappedFile() : _willNeedRemap(false) {
+ _view_write = _view_private = 0;
+}
- DurableMappedFile::~DurableMappedFile() {
- try {
- LOG(3) << "mmf close " << filename();
+DurableMappedFile::~DurableMappedFile() {
+ try {
+ LOG(3) << "mmf close " << filename();
- // If _view_private was not set, this means file open failed
- if (_view_private) {
- // Notify the durability system that we are closing a file so it can ensure we
- // will not have journaled operations with no corresponding file.
- getDur().closingFileNotification();
- }
+ // If _view_private was not set, this means file open failed
+ if (_view_private) {
+ // Notify the durability system that we are closing a file so it can ensure we
+ // will not have journaled operations with no corresponding file.
+ getDur().closingFileNotification();
+ }
- LockMongoFilesExclusive lk;
- privateViews.remove(_view_private, length());
+ LockMongoFilesExclusive lk;
+ privateViews.remove(_view_private, length());
- MemoryMappedFile::close();
- }
- catch (...) {
- error() << "exception in ~DurableMappedFile";
- }
+ MemoryMappedFile::close();
+ } catch (...) {
+ error() << "exception in ~DurableMappedFile";
}
}
+}
diff --git a/src/mongo/db/storage/mmap_v1/durable_mapped_file.h b/src/mongo/db/storage/mmap_v1/durable_mapped_file.h
index c4cfb5a6131..02906f112fe 100644
--- a/src/mongo/db/storage/mmap_v1/durable_mapped_file.h
+++ b/src/mongo/db/storage/mmap_v1/durable_mapped_file.h
@@ -37,220 +37,245 @@
namespace mongo {
- /** DurableMappedFile adds some layers atop memory mapped files - specifically our handling of private views & such.
- if you don't care about journaling/durability (temp sort files & such) use MemoryMappedFile class,
- not this.
- */
- class DurableMappedFile : private MemoryMappedFile {
- protected:
- virtual void* viewForFlushing() { return _view_write; }
+/** DurableMappedFile adds some layers atop memory mapped files - specifically our handling of private views & such.
+ if you don't care about journaling/durability (temp sort files & such) use MemoryMappedFile class,
+ not this.
+*/
+class DurableMappedFile : private MemoryMappedFile {
+protected:
+ virtual void* viewForFlushing() {
+ return _view_write;
+ }
- public:
- DurableMappedFile();
- virtual ~DurableMappedFile();
+public:
+ DurableMappedFile();
+ virtual ~DurableMappedFile();
- /** @return true if opened ok. */
- bool open(const std::string& fname, bool sequentialHint /*typically we open with this false*/);
+ /** @return true if opened ok. */
+ bool open(const std::string& fname, bool sequentialHint /*typically we open with this false*/);
- /** @return file length */
- unsigned long long length() const { return MemoryMappedFile::length(); }
+ /** @return file length */
+ unsigned long long length() const {
+ return MemoryMappedFile::length();
+ }
- std::string filename() const { return MemoryMappedFile::filename(); }
+ std::string filename() const {
+ return MemoryMappedFile::filename();
+ }
- void flush(bool sync) { MemoryMappedFile::flush(sync); }
+ void flush(bool sync) {
+ MemoryMappedFile::flush(sync);
+ }
- /* Creates with length if DNE, otherwise uses existing file length,
- passed length.
- @param sequentialHint if true will be sequentially accessed
- @return true for ok
- */
- bool create(const std::string& fname, unsigned long long& len, bool sequentialHint);
+ /* Creates with length if DNE, otherwise uses existing file length,
+ passed length.
+ @param sequentialHint if true will be sequentially accessed
+ @return true for ok
+ */
+ bool create(const std::string& fname, unsigned long long& len, bool sequentialHint);
- /* Get the "standard" view (which is the private one).
- @return the private view.
- */
- void* getView() const { return _view_private; }
-
- /* Get the "write" view (which is required for writing).
- @return the write view.
- */
- void* view_write() const { return _view_write; }
+ /* Get the "standard" view (which is the private one).
+ @return the private view.
+ */
+ void* getView() const {
+ return _view_private;
+ }
- /** for a filename a/b/c.3
- filePath() is "a/b/c"
- fileSuffixNo() is 3
- if the suffix is "ns", fileSuffixNo -1
- */
- const RelativePath& relativePath() const {
- DEV verify( !_p._p.empty() );
- return _p;
- }
+ /* Get the "write" view (which is required for writing).
+ @return the write view.
+ */
+ void* view_write() const {
+ return _view_write;
+ }
- int fileSuffixNo() const { return _fileSuffixNo; }
- HANDLE getFd() { return MemoryMappedFile::getFd(); }
+ /** for a filename a/b/c.3
+ filePath() is "a/b/c"
+ fileSuffixNo() is 3
+ if the suffix is "ns", fileSuffixNo -1
+ */
+ const RelativePath& relativePath() const {
+ DEV verify(!_p._p.empty());
+ return _p;
+ }
- /** true if we have written.
- set in PREPLOGBUFFER, it is NOT set immediately on write intent declaration.
- reset to false in REMAPPRIVATEVIEW
- */
- bool willNeedRemap() { return _willNeedRemap; }
- void setWillNeedRemap() { _willNeedRemap = true; }
+ int fileSuffixNo() const {
+ return _fileSuffixNo;
+ }
+ HANDLE getFd() {
+ return MemoryMappedFile::getFd();
+ }
- void remapThePrivateView();
+ /** true if we have written.
+ set in PREPLOGBUFFER, it is NOT set immediately on write intent declaration.
+ reset to false in REMAPPRIVATEVIEW
+ */
+ bool willNeedRemap() {
+ return _willNeedRemap;
+ }
+ void setWillNeedRemap() {
+ _willNeedRemap = true;
+ }
- virtual bool isDurableMappedFile() { return true; }
+ void remapThePrivateView();
- private:
+ virtual bool isDurableMappedFile() {
+ return true;
+ }
- void *_view_write;
- void *_view_private;
- bool _willNeedRemap;
- RelativePath _p; // e.g. "somepath/dbname"
- int _fileSuffixNo; // e.g. 3. -1="ns"
+private:
+ void* _view_write;
+ void* _view_private;
+ bool _willNeedRemap;
+ RelativePath _p; // e.g. "somepath/dbname"
+ int _fileSuffixNo; // e.g. 3. -1="ns"
- void setPath(const std::string& pathAndFileName);
- bool finishOpening();
- };
+ void setPath(const std::string& pathAndFileName);
+ bool finishOpening();
+};
#ifdef _WIN32
- // Simple array based bitset to track COW chunks in memory mapped files on Windows
- // A chunk is a 64MB granular region in virtual memory that we mark as COW everytime we need
- // to write to a memory mapped files on Windows
- //
- class MemoryMappedCOWBitset {
- MONGO_DISALLOW_COPYING(MemoryMappedCOWBitset);
- public:
- // Size of the chunks we mark Copy-On-Write with VirtualProtect
- static const unsigned long long ChunkSize = 64 * 1024 * 1024;
-
- // Number of chunks we store in our bitset which are really 32-bit ints
- static const unsigned long long NChunks = 64 * 1024;
-
- // Total Virtual Memory space we can cover with the bitset
- static const unsigned long long MaxChunkMemory = ChunkSize * NChunks
- * sizeof(unsigned int) * 8;
-
- // Size in bytes of the bitset we allocate
- static const unsigned long long MaxChunkBytes = NChunks * sizeof(unsigned int);
-
- // 128 TB Virtual Memory space in Windows 8.1/2012 R2, 8TB before
- static const unsigned long long MaxWinMemory =
- 128ULL * 1024 * 1024 * 1024 * 1024;
-
- // Make sure that the chunk memory covers the Max Windows user process VM space
- static_assert(MaxChunkMemory == MaxWinMemory,
- "Need a larger bitset to cover max process VM space");
- public:
- MemoryMappedCOWBitset() {
- static_assert(MemoryMappedCOWBitset::MaxChunkBytes == sizeof(bits),
- "Validate our predicted bitset size is correct");
- }
+// Simple array based bitset to track COW chunks in memory mapped files on Windows
+// A chunk is a 64MB granular region in virtual memory that we mark as COW everytime we need
+// to write to a memory mapped files on Windows
+//
+class MemoryMappedCOWBitset {
+ MONGO_DISALLOW_COPYING(MemoryMappedCOWBitset);
- bool get(uintptr_t i) const {
- uintptr_t x = i / 32;
- verify(x < MemoryMappedCOWBitset::NChunks);
- return (bits[x].loadRelaxed() & (1 << (i % 32))) != 0;
- }
+public:
+ // Size of the chunks we mark Copy-On-Write with VirtualProtect
+ static const unsigned long long ChunkSize = 64 * 1024 * 1024;
- // Note: assumes caller holds privateViews.mutex
- void set(uintptr_t i) {
- uintptr_t x = i / 32;
- verify(x < MemoryMappedCOWBitset::NChunks);
- bits[x].store( bits[x].loadRelaxed() | (1 << (i % 32)));
- }
+ // Number of chunks we store in our bitset which are really 32-bit ints
+ static const unsigned long long NChunks = 64 * 1024;
- // Note: assumes caller holds privateViews.mutex
- void clear(uintptr_t i) {
- uintptr_t x = i / 32;
- verify(x < MemoryMappedCOWBitset::NChunks);
- bits[x].store(bits[x].loadRelaxed() & ~(1 << (i % 32)));
- }
+ // Total Virtual Memory space we can cover with the bitset
+ static const unsigned long long MaxChunkMemory = ChunkSize * NChunks * sizeof(unsigned int) * 8;
+
+ // Size in bytes of the bitset we allocate
+ static const unsigned long long MaxChunkBytes = NChunks * sizeof(unsigned int);
+
+ // 128 TB Virtual Memory space in Windows 8.1/2012 R2, 8TB before
+ static const unsigned long long MaxWinMemory = 128ULL * 1024 * 1024 * 1024 * 1024;
+
+ // Make sure that the chunk memory covers the Max Windows user process VM space
+ static_assert(MaxChunkMemory == MaxWinMemory,
+ "Need a larger bitset to cover max process VM space");
+
+public:
+ MemoryMappedCOWBitset() {
+ static_assert(MemoryMappedCOWBitset::MaxChunkBytes == sizeof(bits),
+ "Validate our predicted bitset size is correct");
+ }
+
+ bool get(uintptr_t i) const {
+ uintptr_t x = i / 32;
+ verify(x < MemoryMappedCOWBitset::NChunks);
+ return (bits[x].loadRelaxed() & (1 << (i % 32))) != 0;
+ }
+
+ // Note: assumes caller holds privateViews.mutex
+ void set(uintptr_t i) {
+ uintptr_t x = i / 32;
+ verify(x < MemoryMappedCOWBitset::NChunks);
+ bits[x].store(bits[x].loadRelaxed() | (1 << (i % 32)));
+ }
- private:
- // atomic as we are doing double check locking
- AtomicUInt32 bits[MemoryMappedCOWBitset::NChunks];
- };
+ // Note: assumes caller holds privateViews.mutex
+ void clear(uintptr_t i) {
+ uintptr_t x = i / 32;
+ verify(x < MemoryMappedCOWBitset::NChunks);
+ bits[x].store(bits[x].loadRelaxed() & ~(1 << (i % 32)));
+ }
+
+private:
+ // atomic as we are doing double check locking
+ AtomicUInt32 bits[MemoryMappedCOWBitset::NChunks];
+};
#endif
- /** for durability support we want to be able to map pointers to specific DurableMappedFile objects.
- */
- class PointerToDurableMappedFile {
- MONGO_DISALLOW_COPYING(PointerToDurableMappedFile);
- public:
- PointerToDurableMappedFile();
+/** for durability support we want to be able to map pointers to specific DurableMappedFile objects.
+*/
+class PointerToDurableMappedFile {
+ MONGO_DISALLOW_COPYING(PointerToDurableMappedFile);
- /** register view.
- not-threadsafe, caller must hold _mutex()
- */
- void add_inlock(void *view, DurableMappedFile *f);
+public:
+ PointerToDurableMappedFile();
- /** de-register view.
- threadsafe
- */
- void remove(void *view, size_t length);
+ /** register view.
+ not-threadsafe, caller must hold _mutex()
+ */
+ void add_inlock(void* view, DurableMappedFile* f);
- /** find associated MMF object for a given pointer.
- threadsafe
- @param ofs out returns offset into the view of the pointer, if found.
- @return the DurableMappedFile to which this pointer belongs. null if not found.
+ /** de-register view.
+ threadsafe
*/
- DurableMappedFile* find(void *p, /*out*/ size_t& ofs);
+ void remove(void* view, size_t length);
- /** for doing many finds in a row with one lock operation */
- stdx::mutex& _mutex() { return _m; }
+ /** find associated MMF object for a given pointer.
+ threadsafe
+ @param ofs out returns offset into the view of the pointer, if found.
+ @return the DurableMappedFile to which this pointer belongs. null if not found.
+ */
+ DurableMappedFile* find(void* p, /*out*/ size_t& ofs);
- /** not-threadsafe, caller must hold _mutex() */
- DurableMappedFile* find_inlock(void *p, /*out*/ size_t& ofs);
+ /** for doing many finds in a row with one lock operation */
+ stdx::mutex& _mutex() {
+ return _m;
+ }
+
+ /** not-threadsafe, caller must hold _mutex() */
+ DurableMappedFile* find_inlock(void* p, /*out*/ size_t& ofs);
- /** not-threadsafe, caller must hold _mutex() */
- unsigned numberOfViews_inlock() const { return _views.size(); }
+ /** not-threadsafe, caller must hold _mutex() */
+ unsigned numberOfViews_inlock() const {
+ return _views.size();
+ }
- /** make the private map range writable (necessary for our windows implementation) */
- void makeWritable(void *, unsigned len);
+ /** make the private map range writable (necessary for our windows implementation) */
+ void makeWritable(void*, unsigned len);
- void clearWritableBits(void *privateView, size_t len);
+ void clearWritableBits(void* privateView, size_t len);
- private:
- void clearWritableBits_inlock(void *privateView, size_t len);
+private:
+ void clearWritableBits_inlock(void* privateView, size_t len);
#ifdef _WIN32
- void makeChunkWritable(size_t chunkno);
+ void makeChunkWritable(size_t chunkno);
#endif
- private:
- // PointerToDurableMappedFile Mutex
- //
- // Protects:
- // Protects internal consistency of data structure
- // Lock Ordering:
- // Must be taken before MapViewMutex if both are taken to prevent deadlocks
- stdx::mutex _m;
- std::map<void*, DurableMappedFile*> _views;
+private:
+ // PointerToDurableMappedFile Mutex
+ //
+ // Protects:
+ // Protects internal consistency of data structure
+ // Lock Ordering:
+ // Must be taken before MapViewMutex if both are taken to prevent deadlocks
+ stdx::mutex _m;
+ std::map<void*, DurableMappedFile*> _views;
#ifdef _WIN32
- // Tracks which memory mapped regions are marked as Copy on Write
- MemoryMappedCOWBitset writable;
+ // Tracks which memory mapped regions are marked as Copy on Write
+ MemoryMappedCOWBitset writable;
#endif
- };
+};
#ifdef _WIN32
- inline void PointerToDurableMappedFile::makeWritable(void *privateView, unsigned len) {
- size_t p = reinterpret_cast<size_t>(privateView);
- unsigned a = p / MemoryMappedCOWBitset::ChunkSize;
- unsigned b = (p + len) / MemoryMappedCOWBitset::ChunkSize;
-
- for (unsigned i = a; i <= b; i++) {
- if (!writable.get(i)) {
- makeChunkWritable(i);
- }
+inline void PointerToDurableMappedFile::makeWritable(void* privateView, unsigned len) {
+ size_t p = reinterpret_cast<size_t>(privateView);
+ unsigned a = p / MemoryMappedCOWBitset::ChunkSize;
+ unsigned b = (p + len) / MemoryMappedCOWBitset::ChunkSize;
+
+ for (unsigned i = a; i <= b; i++) {
+ if (!writable.get(i)) {
+ makeChunkWritable(i);
}
}
+}
#else
- inline void PointerToDurableMappedFile::makeWritable(void *_p, unsigned len) {
- }
+inline void PointerToDurableMappedFile::makeWritable(void* _p, unsigned len) {}
#endif
- // allows a pointer into any private view of a DurableMappedFile to be resolved to the DurableMappedFile object
- extern PointerToDurableMappedFile privateViews;
+// allows a pointer into any private view of a DurableMappedFile to be resolved to the DurableMappedFile object
+extern PointerToDurableMappedFile privateViews;
}
diff --git a/src/mongo/db/storage/mmap_v1/durop.cpp b/src/mongo/db/storage/mmap_v1/durop.cpp
index 2a049596593..8efd7720c3e 100644
--- a/src/mongo/db/storage/mmap_v1/durop.cpp
+++ b/src/mongo/db/storage/mmap_v1/durop.cpp
@@ -47,134 +47,133 @@
namespace mongo {
- using std::unique_ptr;
- using std::shared_ptr;
- using std::endl;
- using std::string;
-
- namespace dur {
-
- /** read a durop from journal file referenced by br.
- @param opcode the opcode which has already been written from the bufreader
- */
- shared_ptr<DurOp> DurOp::read(unsigned opcode, BufReader& br) {
- shared_ptr<DurOp> op;
- switch( opcode ) {
- case JEntry::OpCode_FileCreated:
- op = shared_ptr<DurOp>( new FileCreatedOp(br) );
- break;
- case JEntry::OpCode_DropDb:
- op = shared_ptr<DurOp>( new DropDbOp(br) );
- break;
- default:
- massert(13546, (str::stream() << "journal recover: unrecognized opcode in journal " << opcode), false);
- }
- return op;
- }
+using std::unique_ptr;
+using std::shared_ptr;
+using std::endl;
+using std::string;
- void DurOp::serialize(AlignedBuilder& ab) {
- ab.appendNum(_opcode);
- _serialize(ab);
- }
+namespace dur {
- DropDbOp::DropDbOp(BufReader& log) : DurOp(JEntry::OpCode_DropDb) {
- unsigned long long reserved;
- log.read(reserved);
- log.read(reserved);
- log.readStr(_db);
- string reservedStr;
- log.readStr(reservedStr);
- }
+/** read a durop from journal file referenced by br.
+ @param opcode the opcode which has already been written from the bufreader
+*/
+shared_ptr<DurOp> DurOp::read(unsigned opcode, BufReader& br) {
+ shared_ptr<DurOp> op;
+ switch (opcode) {
+ case JEntry::OpCode_FileCreated:
+ op = shared_ptr<DurOp>(new FileCreatedOp(br));
+ break;
+ case JEntry::OpCode_DropDb:
+ op = shared_ptr<DurOp>(new DropDbOp(br));
+ break;
+ default:
+ massert(13546,
+ (str::stream() << "journal recover: unrecognized opcode in journal " << opcode),
+ false);
+ }
+ return op;
+}
- void DropDbOp::_serialize(AlignedBuilder& ab) {
- ab.appendNum((unsigned long long) 0); // reserved for future use
- ab.appendNum((unsigned long long) 0); // reserved for future use
- ab.appendStr(_db);
- ab.appendStr(""); // reserved
- }
+void DurOp::serialize(AlignedBuilder& ab) {
+ ab.appendNum(_opcode);
+ _serialize(ab);
+}
- /** throws */
- void DropDbOp::replay() {
- log() << "recover replay drop db " << _db << endl;
- _deleteDataFiles(_db);
- }
+DropDbOp::DropDbOp(BufReader& log) : DurOp(JEntry::OpCode_DropDb) {
+ unsigned long long reserved;
+ log.read(reserved);
+ log.read(reserved);
+ log.readStr(_db);
+ string reservedStr;
+ log.readStr(reservedStr);
+}
- FileCreatedOp::FileCreatedOp(const std::string& f, unsigned long long l) :
- DurOp(JEntry::OpCode_FileCreated) {
- _p = RelativePath::fromFullPath(storageGlobalParams.dbpath, f);
- _len = l;
- }
+void DropDbOp::_serialize(AlignedBuilder& ab) {
+ ab.appendNum((unsigned long long)0); // reserved for future use
+ ab.appendNum((unsigned long long)0); // reserved for future use
+ ab.appendStr(_db);
+ ab.appendStr(""); // reserved
+}
- FileCreatedOp::FileCreatedOp(BufReader& log) : DurOp(JEntry::OpCode_FileCreated) {
- unsigned long long reserved;
- log.read(reserved);
- log.read(reserved);
- log.read(_len); // size of file, not length of name
- string s;
- log.readStr(s);
- _p._p = s;
- }
+/** throws */
+void DropDbOp::replay() {
+ log() << "recover replay drop db " << _db << endl;
+ _deleteDataFiles(_db);
+}
- void FileCreatedOp::_serialize(AlignedBuilder& ab) {
- ab.appendNum((unsigned long long) 0); // reserved for future use
- ab.appendNum((unsigned long long) 0); // reserved for future use
- ab.appendNum(_len);
- ab.appendStr(_p.toString());
- }
+FileCreatedOp::FileCreatedOp(const std::string& f, unsigned long long l)
+ : DurOp(JEntry::OpCode_FileCreated) {
+ _p = RelativePath::fromFullPath(storageGlobalParams.dbpath, f);
+ _len = l;
+}
- string FileCreatedOp::toString() {
- return str::stream() << "FileCreatedOp " << _p.toString() << ' ' << _len/1024.0/1024.0 << "MB";
- }
+FileCreatedOp::FileCreatedOp(BufReader& log) : DurOp(JEntry::OpCode_FileCreated) {
+ unsigned long long reserved;
+ log.read(reserved);
+ log.read(reserved);
+ log.read(_len); // size of file, not length of name
+ string s;
+ log.readStr(s);
+ _p._p = s;
+}
- // if an operation deletes or creates a file (or moves etc.), it may need files closed.
- bool FileCreatedOp::needFilesClosed() {
- return boost::filesystem::exists( _p.asFullPath() );
- }
+void FileCreatedOp::_serialize(AlignedBuilder& ab) {
+ ab.appendNum((unsigned long long)0); // reserved for future use
+ ab.appendNum((unsigned long long)0); // reserved for future use
+ ab.appendNum(_len);
+ ab.appendStr(_p.toString());
+}
- void FileCreatedOp::replay() {
- // i believe the code assumes new files are filled with zeros. thus we have to recreate the file,
- // or rewrite at least, even if it were the right length. perhaps one day we should change that
- // although easier to avoid defects if we assume it is zeros perhaps.
- string full = _p.asFullPath();
- if( boost::filesystem::exists(full) ) {
- try {
- boost::filesystem::remove(full);
- }
- catch(std::exception& e) {
- LOG(1) << "recover info FileCreateOp::replay unlink " << e.what() << endl;
- }
- }
-
- log() << "recover create file " << full << ' ' << _len/1024.0/1024.0 << "MB" << endl;
- if( boost::filesystem::exists(full) ) {
- // first delete if exists.
- try {
- boost::filesystem::remove(full);
- }
- catch(...) {
- log() << "warning could not delete file " << full << endl;
- }
- }
- ensureParentDirCreated(full);
- File f;
- f.open(full.c_str());
- massert(13547, str::stream() << "recover couldn't create file " << full, f.is_open());
- unsigned long long left = _len;
- const unsigned blksz = 64 * 1024;
- unique_ptr<char[]> v( new char[blksz] );
- memset( v.get(), 0, blksz );
- fileofs ofs = 0;
- while( left ) {
- unsigned long long w = left < blksz ? left : blksz;
- f.write(ofs, v.get(), (unsigned) w);
- left -= w;
- ofs += w;
- }
- f.fsync();
- flushMyDirectory(full);
- massert(13628, str::stream() << "recover failure writing file " << full, !f.bad() );
- }
+string FileCreatedOp::toString() {
+ return str::stream() << "FileCreatedOp " << _p.toString() << ' ' << _len / 1024.0 / 1024.0
+ << "MB";
+}
+// if an operation deletes or creates a file (or moves etc.), it may need files closed.
+bool FileCreatedOp::needFilesClosed() {
+ return boost::filesystem::exists(_p.asFullPath());
+}
+
+void FileCreatedOp::replay() {
+ // i believe the code assumes new files are filled with zeros. thus we have to recreate the file,
+ // or rewrite at least, even if it were the right length. perhaps one day we should change that
+ // although easier to avoid defects if we assume it is zeros perhaps.
+ string full = _p.asFullPath();
+ if (boost::filesystem::exists(full)) {
+ try {
+ boost::filesystem::remove(full);
+ } catch (std::exception& e) {
+ LOG(1) << "recover info FileCreateOp::replay unlink " << e.what() << endl;
+ }
}
+ log() << "recover create file " << full << ' ' << _len / 1024.0 / 1024.0 << "MB" << endl;
+ if (boost::filesystem::exists(full)) {
+ // first delete if exists.
+ try {
+ boost::filesystem::remove(full);
+ } catch (...) {
+ log() << "warning could not delete file " << full << endl;
+ }
+ }
+ ensureParentDirCreated(full);
+ File f;
+ f.open(full.c_str());
+ massert(13547, str::stream() << "recover couldn't create file " << full, f.is_open());
+ unsigned long long left = _len;
+ const unsigned blksz = 64 * 1024;
+ unique_ptr<char[]> v(new char[blksz]);
+ memset(v.get(), 0, blksz);
+ fileofs ofs = 0;
+ while (left) {
+ unsigned long long w = left < blksz ? left : blksz;
+ f.write(ofs, v.get(), (unsigned)w);
+ left -= w;
+ ofs += w;
+ }
+ f.fsync();
+ flushMyDirectory(full);
+ massert(13628, str::stream() << "recover failure writing file " << full, !f.bad());
+}
+}
}
diff --git a/src/mongo/db/storage/mmap_v1/durop.h b/src/mongo/db/storage/mmap_v1/durop.h
index 9ebddb3dfc0..a798f210616 100644
--- a/src/mongo/db/storage/mmap_v1/durop.h
+++ b/src/mongo/db/storage/mmap_v1/durop.h
@@ -37,86 +37,93 @@
namespace mongo {
- class AlignedBuilder;
-
- namespace dur {
-
- /** DurOp - Operations we journal that aren't just basic writes.
- *
- * Basic writes are logged as JEntry's, and indicated in ram temporarily as struct dur::WriteIntent.
- * We don't make WriteIntent inherit from DurOp to keep it as lean as possible as there will be millions of
- * them (we don't want a vtable for example there).
- *
- * For each op we want to journal, we define a subclass.
- */
- class DurOp { /* copyable */
- public:
- // @param opcode a sentinel value near max unsigned which uniquely identifies the operation.
- // @see dur::JEntry
- DurOp(unsigned opcode) : _opcode(opcode) { }
-
- virtual ~DurOp() { }
-
- /** serialize the op out to a builder which will then be written (presumably) to the journal */
- void serialize(AlignedBuilder& ab);
-
- /** read a durop from journal file referenced by br.
- @param opcode the opcode which has already been written from the bufreader
- */
- static std::shared_ptr<DurOp> read(unsigned opcode, BufReader& br);
-
- /** replay the operation (during recovery)
- throws
-
- For now, these are not replayed during the normal WRITETODATAFILES phase, since these
- operations are handled in other parts of the code. At some point this may change.
- */
- virtual void replay() = 0;
-
- virtual std::string toString() = 0;
-
- /** if the op requires all file to be closed before doing its work, returns true. */
- virtual bool needFilesClosed() { return false; }
-
- protected:
- /** DurOp will have already written the opcode for you */
- virtual void _serialize(AlignedBuilder& ab) = 0;
-
- private:
- const unsigned _opcode;
- };
-
- /** indicates creation of a new file */
- class FileCreatedOp : public DurOp {
- public:
- FileCreatedOp(BufReader& log);
- /** param f filename to create with path */
- FileCreatedOp(const std::string& f, unsigned long long l);
- virtual void replay();
- virtual std::string toString();
- virtual bool needFilesClosed();
- protected:
- virtual void _serialize(AlignedBuilder& ab);
- private:
- RelativePath _p;
- unsigned long long _len; // size of file, not length of name
- };
-
- /** record drop of a database */
- class DropDbOp : public DurOp {
- public:
- DropDbOp(BufReader& log);
- DropDbOp(const std::string& db) :
- DurOp(JEntry::OpCode_DropDb), _db(db) { }
- virtual void replay();
- virtual std::string toString() { return std::string("DropDbOp ") + _db; }
- virtual bool needFilesClosed() { return true; }
- protected:
- virtual void _serialize(AlignedBuilder& ab);
- private:
- std::string _db;
- };
+class AlignedBuilder;
+
+namespace dur {
+
+/** DurOp - Operations we journal that aren't just basic writes.
+ *
+ * Basic writes are logged as JEntry's, and indicated in ram temporarily as struct dur::WriteIntent.
+ * We don't make WriteIntent inherit from DurOp to keep it as lean as possible as there will be millions of
+ * them (we don't want a vtable for example there).
+ *
+ * For each op we want to journal, we define a subclass.
+ */
+class DurOp {/* copyable */
+public:
+ // @param opcode a sentinel value near max unsigned which uniquely identifies the operation.
+ // @see dur::JEntry
+ DurOp(unsigned opcode) : _opcode(opcode) {}
+
+ virtual ~DurOp() {}
+
+ /** serialize the op out to a builder which will then be written (presumably) to the journal */
+ void serialize(AlignedBuilder& ab);
+
+ /** read a durop from journal file referenced by br.
+ @param opcode the opcode which has already been written from the bufreader
+ */
+ static std::shared_ptr<DurOp> read(unsigned opcode, BufReader& br);
+
+ /** replay the operation (during recovery)
+ throws
+
+ For now, these are not replayed during the normal WRITETODATAFILES phase, since these
+ operations are handled in other parts of the code. At some point this may change.
+ */
+ virtual void replay() = 0;
+
+ virtual std::string toString() = 0;
+
+ /** if the op requires all file to be closed before doing its work, returns true. */
+ virtual bool needFilesClosed() {
+ return false;
+ }
+protected:
+ /** DurOp will have already written the opcode for you */
+ virtual void _serialize(AlignedBuilder& ab) = 0;
+
+private:
+ const unsigned _opcode;
+};
+
+/** indicates creation of a new file */
+class FileCreatedOp : public DurOp {
+public:
+ FileCreatedOp(BufReader& log);
+ /** param f filename to create with path */
+ FileCreatedOp(const std::string& f, unsigned long long l);
+ virtual void replay();
+ virtual std::string toString();
+ virtual bool needFilesClosed();
+
+protected:
+ virtual void _serialize(AlignedBuilder& ab);
+
+private:
+ RelativePath _p;
+ unsigned long long _len; // size of file, not length of name
+};
+
+/** record drop of a database */
+class DropDbOp : public DurOp {
+public:
+ DropDbOp(BufReader& log);
+ DropDbOp(const std::string& db) : DurOp(JEntry::OpCode_DropDb), _db(db) {}
+ virtual void replay();
+ virtual std::string toString() {
+ return std::string("DropDbOp ") + _db;
+ }
+ virtual bool needFilesClosed() {
+ return true;
}
+protected:
+ virtual void _serialize(AlignedBuilder& ab);
+
+private:
+ std::string _db;
+};
+}
}
diff --git a/src/mongo/db/storage/mmap_v1/extent.cpp b/src/mongo/db/storage/mmap_v1/extent.cpp
index 905e4d28a9e..7f6d41cde80 100644
--- a/src/mongo/db/storage/mmap_v1/extent.cpp
+++ b/src/mongo/db/storage/mmap_v1/extent.cpp
@@ -36,82 +36,70 @@
namespace mongo {
- using std::iostream;
- using std::string;
- using std::vector;
+using std::iostream;
+using std::string;
+using std::vector;
- BOOST_STATIC_ASSERT( sizeof(Extent)-4 == 48+128 );
+BOOST_STATIC_ASSERT(sizeof(Extent) - 4 == 48 + 128);
- BSONObj Extent::dump() const {
- return BSON( "loc" << myLoc.toString()
- << "xnext" << xnext.toString()
- << "xprev" << xprev.toString()
- << "nsdiag" << nsDiagnostic.toString()
- << "size" << length
- << "firstRecord"
- << firstRecord.toString()
- << "lastRecord" << lastRecord.toString() );
- }
+BSONObj Extent::dump() const {
+ return BSON("loc" << myLoc.toString() << "xnext" << xnext.toString() << "xprev"
+ << xprev.toString() << "nsdiag" << nsDiagnostic.toString() << "size" << length
+ << "firstRecord" << firstRecord.toString() << "lastRecord"
+ << lastRecord.toString());
+}
- void Extent::dump(iostream& s) const {
- s << " loc:" << myLoc.toString()
- << " xnext:" << xnext.toString()
- << " xprev:" << xprev.toString() << '\n';
- s << " nsdiag:" << nsDiagnostic.toString() << '\n';
- s << " size:" << length
- << " firstRecord:" << firstRecord.toString()
- << " lastRecord:" << lastRecord.toString() << '\n';
- }
+void Extent::dump(iostream& s) const {
+ s << " loc:" << myLoc.toString() << " xnext:" << xnext.toString()
+ << " xprev:" << xprev.toString() << '\n';
+ s << " nsdiag:" << nsDiagnostic.toString() << '\n';
+ s << " size:" << length << " firstRecord:" << firstRecord.toString()
+ << " lastRecord:" << lastRecord.toString() << '\n';
+}
- bool Extent::validates(const DiskLoc diskLoc, vector<string>* errors) const {
- bool extentOk = true;
- if (magic != extentSignature) {
- if (errors) {
- StringBuilder sb;
- sb << "bad extent signature " << integerToHex(magic)
- << " in extent " << diskLoc.toString();
- errors->push_back( sb.str() );
- }
- extentOk = false;
+bool Extent::validates(const DiskLoc diskLoc, vector<string>* errors) const {
+ bool extentOk = true;
+ if (magic != extentSignature) {
+ if (errors) {
+ StringBuilder sb;
+ sb << "bad extent signature " << integerToHex(magic) << " in extent "
+ << diskLoc.toString();
+ errors->push_back(sb.str());
}
- if (myLoc != diskLoc) {
- if (errors) {
- StringBuilder sb;
- sb << "extent " << diskLoc.toString()
- << " self-pointer is " << myLoc.toString();
- errors->push_back( sb.str() );
- }
- extentOk = false;
+ extentOk = false;
+ }
+ if (myLoc != diskLoc) {
+ if (errors) {
+ StringBuilder sb;
+ sb << "extent " << diskLoc.toString() << " self-pointer is " << myLoc.toString();
+ errors->push_back(sb.str());
}
- if (firstRecord.isNull() != lastRecord.isNull()) {
- if (errors) {
- StringBuilder sb;
- if (firstRecord.isNull()) {
- sb << "in extent " << diskLoc.toString()
- << ", firstRecord is null but lastRecord is "
- << lastRecord.toString();
- }
- else {
- sb << "in extent " << diskLoc.toString()
- << ", firstRecord is " << firstRecord.toString()
- << " but lastRecord is null";
- }
- errors->push_back( sb.str() );
+ extentOk = false;
+ }
+ if (firstRecord.isNull() != lastRecord.isNull()) {
+ if (errors) {
+ StringBuilder sb;
+ if (firstRecord.isNull()) {
+ sb << "in extent " << diskLoc.toString()
+ << ", firstRecord is null but lastRecord is " << lastRecord.toString();
+ } else {
+ sb << "in extent " << diskLoc.toString() << ", firstRecord is "
+ << firstRecord.toString() << " but lastRecord is null";
}
- extentOk = false;
+ errors->push_back(sb.str());
}
- static const int minSize = 0x1000;
- if (length < minSize) {
- if (errors) {
- StringBuilder sb;
- sb << "length of extent " << diskLoc.toString()
- << " is " << length
- << ", which is less than minimum length of " << minSize;
- errors->push_back( sb.str() );
- }
- extentOk = false;
+ extentOk = false;
+ }
+ static const int minSize = 0x1000;
+ if (length < minSize) {
+ if (errors) {
+ StringBuilder sb;
+ sb << "length of extent " << diskLoc.toString() << " is " << length
+ << ", which is less than minimum length of " << minSize;
+ errors->push_back(sb.str());
}
- return extentOk;
+ extentOk = false;
}
-
+ return extentOk;
+}
}
diff --git a/src/mongo/db/storage/mmap_v1/extent.h b/src/mongo/db/storage/mmap_v1/extent.h
index a25d34c49e0..9d6d3935346 100644
--- a/src/mongo/db/storage/mmap_v1/extent.h
+++ b/src/mongo/db/storage/mmap_v1/extent.h
@@ -39,45 +39,50 @@
namespace mongo {
- /* extents are datafile regions where all the records within the region
- belong to the same namespace.
+/* extents are datafile regions where all the records within the region
+ belong to the same namespace.
- (11:12:35 AM) dm10gen: when the extent is allocated, all its empty space is stuck into one big DeletedRecord
- (11:12:55 AM) dm10gen: and that is placed on the free list
- */
+(11:12:35 AM) dm10gen: when the extent is allocated, all its empty space is stuck into one big DeletedRecord
+(11:12:55 AM) dm10gen: and that is placed on the free list
+*/
#pragma pack(1)
- struct Extent {
- enum { extentSignature = 0x41424344 };
- unsigned magic;
- DiskLoc myLoc;
+struct Extent {
+ enum { extentSignature = 0x41424344 };
+ unsigned magic;
+ DiskLoc myLoc;
- /* next/prev extent for this namespace */
- DiskLoc xnext;
- DiskLoc xprev;
+ /* next/prev extent for this namespace */
+ DiskLoc xnext;
+ DiskLoc xprev;
- /* which namespace this extent is for. this is just for troubleshooting really
- and won't even be correct if the collection were renamed!
- */
- Namespace nsDiagnostic;
+ /* which namespace this extent is for. this is just for troubleshooting really
+ and won't even be correct if the collection were renamed!
+ */
+ Namespace nsDiagnostic;
- int length; /* size of the extent, including these fields */
- DiskLoc firstRecord;
- DiskLoc lastRecord;
- char _extentData[4];
+ int length; /* size of the extent, including these fields */
+ DiskLoc firstRecord;
+ DiskLoc lastRecord;
+ char _extentData[4];
- // -----
+ // -----
- bool validates(const DiskLoc diskLoc, std::vector<std::string>* errors = NULL) const;
+ bool validates(const DiskLoc diskLoc, std::vector<std::string>* errors = NULL) const;
- BSONObj dump() const;
+ BSONObj dump() const;
- void dump(std::iostream& s) const;
+ void dump(std::iostream& s) const;
- bool isOk() const { return magic == extentSignature; }
- void assertOk() const { verify(isOk()); }
+ bool isOk() const {
+ return magic == extentSignature;
+ }
+ void assertOk() const {
+ verify(isOk());
+ }
- static int HeaderSize() { return sizeof(Extent)-4; }
- };
+ static int HeaderSize() {
+ return sizeof(Extent) - 4;
+ }
+};
#pragma pack()
-
}
diff --git a/src/mongo/db/storage/mmap_v1/extent_manager.cpp b/src/mongo/db/storage/mmap_v1/extent_manager.cpp
index 8efc2cbc50f..15222fac01a 100644
--- a/src/mongo/db/storage/mmap_v1/extent_manager.cpp
+++ b/src/mongo/db/storage/mmap_v1/extent_manager.cpp
@@ -34,66 +34,64 @@
namespace mongo {
- int ExtentManager::quantizeExtentSize( int size ) const {
+int ExtentManager::quantizeExtentSize(int size) const {
+ if (size == maxSize()) {
+ // no point doing quantizing for the entire file
+ return size;
+ }
- if ( size == maxSize() ) {
- // no point doing quantizing for the entire file
- return size;
- }
+ invariant(size <= maxSize());
- invariant( size <= maxSize() );
+ // make sizes align with VM page size
+ int newSize = (size + 0xfff) & 0xfffff000;
- // make sizes align with VM page size
- int newSize = (size + 0xfff) & 0xfffff000;
+ if (newSize > maxSize()) {
+ return maxSize();
+ }
- if ( newSize > maxSize() ) {
- return maxSize();
- }
+ if (newSize < minSize()) {
+ return minSize();
+ }
- if ( newSize < minSize() ) {
- return minSize();
- }
+ return newSize;
+}
- return newSize;
+int ExtentManager::followupSize(int len, int lastExtentLen) const {
+ invariant(len < maxSize());
+ int x = initialSize(len);
+ // changed from 1.20 to 1.35 in v2.1.x to get to larger extent size faster
+ int y = (int)(lastExtentLen < 4000000 ? lastExtentLen * 4.0 : lastExtentLen * 1.35);
+ int sz = y > x ? y : x;
+
+ if (sz < lastExtentLen) {
+ // this means there was an int overflow
+ // so we should turn it into maxSize
+ return maxSize();
+ } else if (sz > maxSize()) {
+ return maxSize();
}
- int ExtentManager::followupSize( int len, int lastExtentLen ) const {
- invariant( len < maxSize() );
- int x = initialSize(len);
- // changed from 1.20 to 1.35 in v2.1.x to get to larger extent size faster
- int y = (int) (lastExtentLen < 4000000 ? lastExtentLen * 4.0 : lastExtentLen * 1.35);
- int sz = y > x ? y : x;
-
- if ( sz < lastExtentLen ) {
- // this means there was an int overflow
- // so we should turn it into maxSize
- return maxSize();
- }
- else if ( sz > maxSize() ) {
- return maxSize();
- }
-
- sz = quantizeExtentSize( sz );
- verify( sz >= len );
-
- return sz;
- }
+ sz = quantizeExtentSize(sz);
+ verify(sz >= len);
- int ExtentManager::initialSize( int len ) const {
- invariant( len <= maxSize() );
+ return sz;
+}
- long long sz = len * 16;
- if ( len < 1000 )
- sz = len * 64;
+int ExtentManager::initialSize(int len) const {
+ invariant(len <= maxSize());
- if ( sz >= maxSize() )
- return maxSize();
+ long long sz = len * 16;
+ if (len < 1000)
+ sz = len * 64;
- if ( sz <= minSize() )
- return minSize();
+ if (sz >= maxSize())
+ return maxSize();
- int z = ExtentManager::quantizeExtentSize( sz );
- verify( z >= len );
- return z;
- }
+ if (sz <= minSize())
+ return minSize();
+
+ int z = ExtentManager::quantizeExtentSize(sz);
+ verify(z >= len);
+ return z;
+}
}
diff --git a/src/mongo/db/storage/mmap_v1/extent_manager.h b/src/mongo/db/storage/mmap_v1/extent_manager.h
index 54191faa2cf..6151f8e11a2 100644
--- a/src/mongo/db/storage/mmap_v1/extent_manager.h
+++ b/src/mongo/db/storage/mmap_v1/extent_manager.h
@@ -40,141 +40,141 @@
namespace mongo {
- class DataFile;
- class MmapV1RecordHeader;
- class RecordFetcher;
- class OperationContext;
+class DataFile;
+class MmapV1RecordHeader;
+class RecordFetcher;
+class OperationContext;
- struct Extent;
+struct Extent;
+
+/**
+ * ExtentManager basics
+ * - one per database
+ * - responsible for managing <db>.# files
+ * - NOT responsible for .ns file
+ * - gives out extents
+ * - responsible for figuring out how to get a new extent
+ * - can use any method it wants to do so
+ * - this structure is NOT stored on disk
+ * - files will not be removed from the EM
+ * - extent size and loc are immutable
+ * - this class is thread safe, once constructed and init()-ialized
+ */
+class ExtentManager {
+ MONGO_DISALLOW_COPYING(ExtentManager);
+
+public:
+ ExtentManager() {}
+
+ virtual ~ExtentManager() {}
+
+ /**
+ * opens all current files
+ */
+ virtual Status init(OperationContext* txn) = 0;
+
+ virtual int numFiles() const = 0;
+ virtual long long fileSize() const = 0;
+
+ // must call Extent::reuse on the returned extent
+ virtual DiskLoc allocateExtent(OperationContext* txn,
+ bool capped,
+ int size,
+ bool enforceQuota) = 0;
+
+ /**
+ * firstExt has to be == lastExt or a chain
+ */
+ virtual void freeExtents(OperationContext* txn, DiskLoc firstExt, DiskLoc lastExt) = 0;
/**
- * ExtentManager basics
- * - one per database
- * - responsible for managing <db>.# files
- * - NOT responsible for .ns file
- * - gives out extents
- * - responsible for figuring out how to get a new extent
- * - can use any method it wants to do so
- * - this structure is NOT stored on disk
- * - files will not be removed from the EM
- * - extent size and loc are immutable
- * - this class is thread safe, once constructed and init()-ialized
+ * frees a single extent
+ * ignores all fields in the Extent except: magic, myLoc, length
*/
- class ExtentManager {
- MONGO_DISALLOW_COPYING( ExtentManager );
+ virtual void freeExtent(OperationContext* txn, DiskLoc extent) = 0;
+ /**
+ * Retrieve statistics on the the free list managed by this ExtentManger.
+ * @param numExtents - non-null pointer to an int that will receive the number of extents
+ * @param totalFreeSizeBytes - non-null pointer to an int64_t receiving the total free
+ * space in the free list.
+ */
+ virtual void freeListStats(OperationContext* txn,
+ int* numExtents,
+ int64_t* totalFreeSizeBytes) const = 0;
+
+ /**
+ * @param loc - has to be for a specific MmapV1RecordHeader
+ * Note(erh): this sadly cannot be removed.
+ * A MmapV1RecordHeader DiskLoc has an offset from a file, while a RecordStore really wants an offset
+ * from an extent. This intrinsically links an original record store to the original extent
+ * manager.
+ */
+ virtual MmapV1RecordHeader* recordForV1(const DiskLoc& loc) const = 0;
+
+ /**
+ * The extent manager tracks accesses to DiskLocs. This returns non-NULL if the DiskLoc has
+ * been recently accessed, and therefore has likely been paged into physical memory.
+ * Returns nullptr if the DiskLoc is Null.
+ *
+ */
+ virtual std::unique_ptr<RecordFetcher> recordNeedsFetch(const DiskLoc& loc) const = 0;
+
+ /**
+ * @param loc - has to be for a specific MmapV1RecordHeader (not an Extent)
+ * Note(erh) see comment on recordFor
+ */
+ virtual Extent* extentForV1(const DiskLoc& loc) const = 0;
+
+ /**
+ * @param loc - has to be for a specific MmapV1RecordHeader (not an Extent)
+ * Note(erh) see comment on recordFor
+ */
+ virtual DiskLoc extentLocForV1(const DiskLoc& loc) const = 0;
+
+ /**
+ * @param loc - has to be for a specific Extent
+ */
+ virtual Extent* getExtent(const DiskLoc& loc, bool doSanityCheck = true) const = 0;
+
+ /**
+ * @return maximum size of an Extent
+ */
+ virtual int maxSize() const = 0;
+
+ /**
+ * @return minimum size of an Extent
+ */
+ virtual int minSize() const {
+ return 0x1000;
+ }
+
+ /**
+ * @param recordLen length of record we need
+ * @param lastExt size of last extent which is a factor in next extent size
+ */
+ virtual int followupSize(int recordLen, int lastExtentLen) const;
+
+ /** get a suggested size for the first extent in a namespace
+ * @param recordLen length of record we need to insert
+ */
+ virtual int initialSize(int recordLen) const;
+
+ /**
+ * quantizes extent size to >= min + page boundary
+ */
+ virtual int quantizeExtentSize(int size) const;
+
+ // see cacheHint methods
+ enum HintType { Sequential, Random };
+ class CacheHint {
public:
- ExtentManager(){}
-
- virtual ~ExtentManager(){}
-
- /**
- * opens all current files
- */
- virtual Status init(OperationContext* txn) = 0;
-
- virtual int numFiles() const = 0;
- virtual long long fileSize() const = 0;
-
- // must call Extent::reuse on the returned extent
- virtual DiskLoc allocateExtent( OperationContext* txn,
- bool capped,
- int size,
- bool enforceQuota ) = 0;
-
- /**
- * firstExt has to be == lastExt or a chain
- */
- virtual void freeExtents( OperationContext* txn,
- DiskLoc firstExt, DiskLoc lastExt ) = 0;
-
- /**
- * frees a single extent
- * ignores all fields in the Extent except: magic, myLoc, length
- */
- virtual void freeExtent( OperationContext* txn, DiskLoc extent ) = 0;
-
- /**
- * Retrieve statistics on the the free list managed by this ExtentManger.
- * @param numExtents - non-null pointer to an int that will receive the number of extents
- * @param totalFreeSizeBytes - non-null pointer to an int64_t receiving the total free
- * space in the free list.
- */
- virtual void freeListStats(OperationContext* txn,
- int* numExtents,
- int64_t* totalFreeSizeBytes) const = 0;
-
- /**
- * @param loc - has to be for a specific MmapV1RecordHeader
- * Note(erh): this sadly cannot be removed.
- * A MmapV1RecordHeader DiskLoc has an offset from a file, while a RecordStore really wants an offset
- * from an extent. This intrinsically links an original record store to the original extent
- * manager.
- */
- virtual MmapV1RecordHeader* recordForV1( const DiskLoc& loc ) const = 0;
-
- /**
- * The extent manager tracks accesses to DiskLocs. This returns non-NULL if the DiskLoc has
- * been recently accessed, and therefore has likely been paged into physical memory.
- * Returns nullptr if the DiskLoc is Null.
- *
- */
- virtual std::unique_ptr<RecordFetcher> recordNeedsFetch( const DiskLoc& loc ) const = 0;
-
- /**
- * @param loc - has to be for a specific MmapV1RecordHeader (not an Extent)
- * Note(erh) see comment on recordFor
- */
- virtual Extent* extentForV1( const DiskLoc& loc ) const = 0;
-
- /**
- * @param loc - has to be for a specific MmapV1RecordHeader (not an Extent)
- * Note(erh) see comment on recordFor
- */
- virtual DiskLoc extentLocForV1( const DiskLoc& loc ) const = 0;
-
- /**
- * @param loc - has to be for a specific Extent
- */
- virtual Extent* getExtent( const DiskLoc& loc, bool doSanityCheck = true ) const = 0;
-
- /**
- * @return maximum size of an Extent
- */
- virtual int maxSize() const = 0;
-
- /**
- * @return minimum size of an Extent
- */
- virtual int minSize() const { return 0x1000; }
-
- /**
- * @param recordLen length of record we need
- * @param lastExt size of last extent which is a factor in next extent size
- */
- virtual int followupSize( int recordLen, int lastExtentLen ) const;
-
- /** get a suggested size for the first extent in a namespace
- * @param recordLen length of record we need to insert
- */
- virtual int initialSize( int recordLen ) const;
-
- /**
- * quantizes extent size to >= min + page boundary
- */
- virtual int quantizeExtentSize( int size ) const;
-
- // see cacheHint methods
- enum HintType { Sequential, Random };
- class CacheHint {
- public:
- virtual ~CacheHint(){}
- };
- /**
- * Tell the system that for this extent, it will have this kind of disk access.
- * Caller takes owernship of CacheHint
- */
- virtual CacheHint* cacheHint( const DiskLoc& extentLoc, const HintType& hint ) = 0;
+ virtual ~CacheHint() {}
};
-
+ /**
+ * Tell the system that for this extent, it will have this kind of disk access.
+ * Caller takes owernship of CacheHint
+ */
+ virtual CacheHint* cacheHint(const DiskLoc& extentLoc, const HintType& hint) = 0;
+};
}
diff --git a/src/mongo/db/storage/mmap_v1/file_allocator.cpp b/src/mongo/db/storage/mmap_v1/file_allocator.cpp
index bedd7d9e03d..0500ad43a83 100644
--- a/src/mongo/db/storage/mmap_v1/file_allocator.cpp
+++ b/src/mongo/db/storage/mmap_v1/file_allocator.cpp
@@ -38,16 +38,16 @@
#include <fcntl.h>
#if defined(__FreeBSD__)
-# include <sys/param.h>
-# include <sys/mount.h>
+#include <sys/param.h>
+#include <sys/mount.h>
#endif
#if defined(__linux__)
-# include <sys/vfs.h>
+#include <sys/vfs.h>
#endif
#if defined(_WIN32)
-# include <io.h>
+#include <io.h>
#endif
#include "mongo/db/storage/paths.h"
@@ -71,402 +71,410 @@ using namespace mongoutils;
namespace mongo {
- using std::endl;
- using std::list;
- using std::string;
- using std::stringstream;
+using std::endl;
+using std::list;
+using std::string;
+using std::stringstream;
- // unique number for temporary file names
- unsigned long long FileAllocator::_uniqueNumber = 0;
- static SimpleMutex _uniqueNumberMutex;
+// unique number for temporary file names
+unsigned long long FileAllocator::_uniqueNumber = 0;
+static SimpleMutex _uniqueNumberMutex;
- MONGO_FP_DECLARE(allocateDiskFull);
+MONGO_FP_DECLARE(allocateDiskFull);
- /**
- * Aliases for Win32 CRT functions
- */
+/**
+ * Aliases for Win32 CRT functions
+ */
#if defined(_WIN32)
- static inline long lseek(int fd, long offset, int origin) { return _lseek(fd, offset, origin); }
- static inline int write(int fd, const void *data, int count) { return _write(fd, data, count); }
- static inline int close(int fd) { return _close(fd); }
-
- typedef BOOL (CALLBACK *GetVolumeInformationByHandleWPtr)(HANDLE, LPWSTR, DWORD, LPDWORD, LPDWORD, LPDWORD, LPWSTR, DWORD);
- GetVolumeInformationByHandleWPtr GetVolumeInformationByHandleWFunc;
-
- MONGO_INITIALIZER(InitGetVolumeInformationByHandleW)(InitializerContext *context) {
- HMODULE kernelLib = LoadLibraryA("kernel32.dll");
- if (kernelLib) {
- GetVolumeInformationByHandleWFunc = reinterpret_cast<GetVolumeInformationByHandleWPtr>
- (GetProcAddress(kernelLib, "GetVolumeInformationByHandleW"));
- }
- return Status::OK();
+static inline long lseek(int fd, long offset, int origin) {
+ return _lseek(fd, offset, origin);
+}
+static inline int write(int fd, const void* data, int count) {
+ return _write(fd, data, count);
+}
+static inline int close(int fd) {
+ return _close(fd);
+}
+
+typedef BOOL(CALLBACK* GetVolumeInformationByHandleWPtr)(
+ HANDLE, LPWSTR, DWORD, LPDWORD, LPDWORD, LPDWORD, LPWSTR, DWORD);
+GetVolumeInformationByHandleWPtr GetVolumeInformationByHandleWFunc;
+
+MONGO_INITIALIZER(InitGetVolumeInformationByHandleW)(InitializerContext* context) {
+ HMODULE kernelLib = LoadLibraryA("kernel32.dll");
+ if (kernelLib) {
+ GetVolumeInformationByHandleWFunc = reinterpret_cast<GetVolumeInformationByHandleWPtr>(
+ GetProcAddress(kernelLib, "GetVolumeInformationByHandleW"));
}
+ return Status::OK();
+}
#endif
- boost::filesystem::path ensureParentDirCreated(const boost::filesystem::path& p){
- const boost::filesystem::path parent = p.branch_path();
+boost::filesystem::path ensureParentDirCreated(const boost::filesystem::path& p) {
+ const boost::filesystem::path parent = p.branch_path();
- if (! boost::filesystem::exists(parent)){
- ensureParentDirCreated(parent);
- log() << "creating directory " << parent.string() << endl;
- boost::filesystem::create_directory(parent);
- flushMyDirectory(parent); // flushes grandparent to ensure parent exists after crash
- }
-
- verify(boost::filesystem::is_directory(parent));
- return parent;
+ if (!boost::filesystem::exists(parent)) {
+ ensureParentDirCreated(parent);
+ log() << "creating directory " << parent.string() << endl;
+ boost::filesystem::create_directory(parent);
+ flushMyDirectory(parent); // flushes grandparent to ensure parent exists after crash
}
- FileAllocator::FileAllocator() : _failed() {}
+ verify(boost::filesystem::is_directory(parent));
+ return parent;
+}
+FileAllocator::FileAllocator() : _failed() {}
- void FileAllocator::start() {
- stdx::thread t( stdx::bind( &FileAllocator::run , this ) );
- }
- void FileAllocator::requestAllocation( const string &name, long &size ) {
- stdx::lock_guard<stdx::mutex> lk( _pendingMutex );
- if ( _failed )
- return;
- long oldSize = prevSize( name );
- if ( oldSize != -1 ) {
- size = oldSize;
- return;
- }
- _pending.push_back( name );
- _pendingSize[ name ] = size;
- _pendingUpdated.notify_all();
- }
-
- void FileAllocator::allocateAsap( const string &name, unsigned long long &size ) {
- stdx::unique_lock<stdx::mutex> lk( _pendingMutex );
-
- // In case the allocator is in failed state, check once before starting so that subsequent
- // requests for the same database would fail fast after the first one has failed.
- checkFailure();
-
- long oldSize = prevSize( name );
- if ( oldSize != -1 ) {
- size = oldSize;
- if ( !inProgress( name ) )
- return;
- }
- checkFailure();
- _pendingSize[ name ] = size;
- if ( _pending.size() == 0 )
- _pending.push_back( name );
- else if ( _pending.front() != name ) {
- _pending.remove( name );
- list< string >::iterator i = _pending.begin();
- ++i;
- _pending.insert( i, name );
- }
- _pendingUpdated.notify_all();
- while( inProgress( name ) ) {
- checkFailure();
- _pendingUpdated.wait(lk);
- }
+void FileAllocator::start() {
+ stdx::thread t(stdx::bind(&FileAllocator::run, this));
+}
+void FileAllocator::requestAllocation(const string& name, long& size) {
+ stdx::lock_guard<stdx::mutex> lk(_pendingMutex);
+ if (_failed)
+ return;
+ long oldSize = prevSize(name);
+ if (oldSize != -1) {
+ size = oldSize;
+ return;
}
-
- void FileAllocator::waitUntilFinished() const {
- if ( _failed )
+ _pending.push_back(name);
+ _pendingSize[name] = size;
+ _pendingUpdated.notify_all();
+}
+
+void FileAllocator::allocateAsap(const string& name, unsigned long long& size) {
+ stdx::unique_lock<stdx::mutex> lk(_pendingMutex);
+
+ // In case the allocator is in failed state, check once before starting so that subsequent
+ // requests for the same database would fail fast after the first one has failed.
+ checkFailure();
+
+ long oldSize = prevSize(name);
+ if (oldSize != -1) {
+ size = oldSize;
+ if (!inProgress(name))
return;
- stdx::unique_lock<stdx::mutex> lk( _pendingMutex );
- while( _pending.size() != 0 )
- _pendingUpdated.wait(lk);
}
-
- // TODO: pull this out to per-OS files once they exist
- static bool useSparseFiles(int fd) {
-
+ checkFailure();
+ _pendingSize[name] = size;
+ if (_pending.size() == 0)
+ _pending.push_back(name);
+ else if (_pending.front() != name) {
+ _pending.remove(name);
+ list<string>::iterator i = _pending.begin();
+ ++i;
+ _pending.insert(i, name);
+ }
+ _pendingUpdated.notify_all();
+ while (inProgress(name)) {
+ checkFailure();
+ _pendingUpdated.wait(lk);
+ }
+}
+
+void FileAllocator::waitUntilFinished() const {
+ if (_failed)
+ return;
+ stdx::unique_lock<stdx::mutex> lk(_pendingMutex);
+ while (_pending.size() != 0)
+ _pendingUpdated.wait(lk);
+}
+
+// TODO: pull this out to per-OS files once they exist
+static bool useSparseFiles(int fd) {
#if defined(__linux__) || defined(__FreeBSD__)
- struct statfs fs_stats;
- int ret = fstatfs(fd, &fs_stats);
- uassert(16062, "fstatfs failed: " + errnoWithDescription(), ret == 0);
+ struct statfs fs_stats;
+ int ret = fstatfs(fd, &fs_stats);
+ uassert(16062, "fstatfs failed: " + errnoWithDescription(), ret == 0);
#endif
#if defined(__linux__)
// these are from <linux/magic.h> but that isn't available on all systems
-# define NFS_SUPER_MAGIC 0x6969
-# define TMPFS_MAGIC 0x01021994
+#define NFS_SUPER_MAGIC 0x6969
+#define TMPFS_MAGIC 0x01021994
- return (fs_stats.f_type == NFS_SUPER_MAGIC)
- || (fs_stats.f_type == TMPFS_MAGIC)
- ;
+ return (fs_stats.f_type == NFS_SUPER_MAGIC) || (fs_stats.f_type == TMPFS_MAGIC);
#elif defined(__FreeBSD__)
- return (str::equals(fs_stats.f_fstypename, "zfs") ||
+ return (str::equals(fs_stats.f_fstypename, "zfs") ||
str::equals(fs_stats.f_fstypename, "nfs") ||
str::equals(fs_stats.f_fstypename, "oldnfs"));
#elif defined(__sun)
- // assume using ZFS which is copy-on-write so no benefit to zero-filling
- // TODO: check which fs we are using like we do elsewhere
- return true;
+ // assume using ZFS which is copy-on-write so no benefit to zero-filling
+ // TODO: check which fs we are using like we do elsewhere
+ return true;
#else
- return false;
+ return false;
#endif
- }
+}
#if defined(_WIN32)
- static bool isFileOnNTFSVolume(int fd) {
- if (!GetVolumeInformationByHandleWFunc) {
- warning() << "Could not retrieve pointer to GetVolumeInformationByHandleW function";
- return false;
- }
-
- HANDLE fileHandle = (HANDLE)_get_osfhandle(fd);
- if (fileHandle == INVALID_HANDLE_VALUE) {
- warning() << "_get_osfhandle() failed with " << _strerror(NULL);
- return false;
- }
+static bool isFileOnNTFSVolume(int fd) {
+ if (!GetVolumeInformationByHandleWFunc) {
+ warning() << "Could not retrieve pointer to GetVolumeInformationByHandleW function";
+ return false;
+ }
- WCHAR fileSystemName[MAX_PATH + 1];
- if (!GetVolumeInformationByHandleWFunc(fileHandle, NULL, 0, NULL, 0, NULL, fileSystemName, sizeof(fileSystemName))) {
- DWORD gle = GetLastError();
- warning() << "GetVolumeInformationByHandleW failed with " << errnoWithDescription(gle);
- return false;
- }
+ HANDLE fileHandle = (HANDLE)_get_osfhandle(fd);
+ if (fileHandle == INVALID_HANDLE_VALUE) {
+ warning() << "_get_osfhandle() failed with " << _strerror(NULL);
+ return false;
+ }
- return lstrcmpW(fileSystemName, L"NTFS") == 0;
+ WCHAR fileSystemName[MAX_PATH + 1];
+ if (!GetVolumeInformationByHandleWFunc(
+ fileHandle, NULL, 0, NULL, 0, NULL, fileSystemName, sizeof(fileSystemName))) {
+ DWORD gle = GetLastError();
+ warning() << "GetVolumeInformationByHandleW failed with " << errnoWithDescription(gle);
+ return false;
}
+
+ return lstrcmpW(fileSystemName, L"NTFS") == 0;
+}
#endif
- void FileAllocator::ensureLength(int fd , long size) {
- // Test running out of disk scenarios
- if (MONGO_FAIL_POINT(allocateDiskFull)) {
- uasserted( 10444 , "File allocation failed due to failpoint.");
- }
+void FileAllocator::ensureLength(int fd, long size) {
+ // Test running out of disk scenarios
+ if (MONGO_FAIL_POINT(allocateDiskFull)) {
+ uasserted(10444, "File allocation failed due to failpoint.");
+ }
#if !defined(_WIN32)
- if (useSparseFiles(fd)) {
- LOG(1) << "using ftruncate to create a sparse file" << endl;
- int ret = ftruncate(fd, size);
- uassert(16063, "ftruncate failed: " + errnoWithDescription(), ret == 0);
- return;
- }
+ if (useSparseFiles(fd)) {
+ LOG(1) << "using ftruncate to create a sparse file" << endl;
+ int ret = ftruncate(fd, size);
+ uassert(16063, "ftruncate failed: " + errnoWithDescription(), ret == 0);
+ return;
+ }
#endif
#if defined(__linux__)
- int ret = posix_fallocate(fd,0,size);
- if ( ret == 0 )
- return;
+ int ret = posix_fallocate(fd, 0, size);
+ if (ret == 0)
+ return;
- log() << "FileAllocator: posix_fallocate failed: " << errnoWithDescription( ret ) << " falling back" << endl;
+ log() << "FileAllocator: posix_fallocate failed: " << errnoWithDescription(ret)
+ << " falling back" << endl;
#endif
- off_t filelen = lseek( fd, 0, SEEK_END );
- if ( filelen < size ) {
- if (filelen != 0) {
- stringstream ss;
- ss << "failure creating new datafile; lseek failed for fd " << fd << " with errno: " << errnoWithDescription();
- uassert( 10440 , ss.str(), filelen == 0 );
- }
- // Check for end of disk.
-
- uassert( 10441 , str::stream() << "Unable to allocate new file of size " << size << ' ' << errnoWithDescription(),
- size - 1 == lseek(fd, size - 1, SEEK_SET) );
- uassert( 10442 , str::stream() << "Unable to allocate new file of size " << size << ' ' << errnoWithDescription(),
- 1 == write(fd, "", 1) );
-
- // File expansion is completed here. Do not do the zeroing out on OS-es where there
- // is no risk of triggering allocation-related bugs such as
- // http://support.microsoft.com/kb/2731284.
- //
- if (!ProcessInfo::isDataFileZeroingNeeded()) {
- return;
- }
+ off_t filelen = lseek(fd, 0, SEEK_END);
+ if (filelen < size) {
+ if (filelen != 0) {
+ stringstream ss;
+ ss << "failure creating new datafile; lseek failed for fd " << fd
+ << " with errno: " << errnoWithDescription();
+ uassert(10440, ss.str(), filelen == 0);
+ }
+ // Check for end of disk.
+
+ uassert(10441,
+ str::stream() << "Unable to allocate new file of size " << size << ' '
+ << errnoWithDescription(),
+ size - 1 == lseek(fd, size - 1, SEEK_SET));
+ uassert(10442,
+ str::stream() << "Unable to allocate new file of size " << size << ' '
+ << errnoWithDescription(),
+ 1 == write(fd, "", 1));
+
+ // File expansion is completed here. Do not do the zeroing out on OS-es where there
+ // is no risk of triggering allocation-related bugs such as
+ // http://support.microsoft.com/kb/2731284.
+ //
+ if (!ProcessInfo::isDataFileZeroingNeeded()) {
+ return;
+ }
#if defined(_WIN32)
- if (!isFileOnNTFSVolume(fd)) {
- log() << "No need to zero out datafile on non-NTFS volume" << endl;
- return;
- }
-#endif
-
- lseek(fd, 0, SEEK_SET);
-
- const long z = 256 * 1024;
- const std::unique_ptr<char[]> buf_holder (new char[z]);
- char* buf = buf_holder.get();
- memset(buf, 0, z);
- long left = size;
- while ( left > 0 ) {
- long towrite = left;
- if ( towrite > z )
- towrite = z;
-
- int written = write( fd , buf , towrite );
- uassert( 10443 , errnoWithPrefix("FileAllocator: file write failed" ), written > 0 );
- left -= written;
- }
+ if (!isFileOnNTFSVolume(fd)) {
+ log() << "No need to zero out datafile on non-NTFS volume" << endl;
+ return;
}
- }
+#endif
- void FileAllocator::checkFailure() {
- if (_failed) {
- // we want to log the problem (diskfull.js expects it) but we do not want to dump a stack tracke
- msgassertedNoTrace( 12520, "new file allocation failure" );
+ lseek(fd, 0, SEEK_SET);
+
+ const long z = 256 * 1024;
+ const std::unique_ptr<char[]> buf_holder(new char[z]);
+ char* buf = buf_holder.get();
+ memset(buf, 0, z);
+ long left = size;
+ while (left > 0) {
+ long towrite = left;
+ if (towrite > z)
+ towrite = z;
+
+ int written = write(fd, buf, towrite);
+ uassert(10443, errnoWithPrefix("FileAllocator: file write failed"), written > 0);
+ left -= written;
}
}
+}
- long FileAllocator::prevSize( const string &name ) const {
- if ( _pendingSize.count( name ) > 0 )
- return _pendingSize[ name ];
- if ( boost::filesystem::exists( name ) )
- return boost::filesystem::file_size( name );
- return -1;
+void FileAllocator::checkFailure() {
+ if (_failed) {
+ // we want to log the problem (diskfull.js expects it) but we do not want to dump a stack tracke
+ msgassertedNoTrace(12520, "new file allocation failure");
}
-
- // caller must hold _pendingMutex lock.
- bool FileAllocator::inProgress( const string &name ) const {
- for( list< string >::const_iterator i = _pending.begin(); i != _pending.end(); ++i )
- if ( *i == name )
- return true;
- return false;
- }
-
- string FileAllocator::makeTempFileName( boost::filesystem::path root ) {
- while( 1 ) {
- boost::filesystem::path p = root / "_tmp";
- stringstream ss;
- unsigned long long thisUniqueNumber;
- {
- // increment temporary file name counter
- // TODO: SERVER-6055 -- Unify temporary file name selection
- stdx::lock_guard<SimpleMutex> lk(_uniqueNumberMutex);
- thisUniqueNumber = _uniqueNumber;
- ++_uniqueNumber;
- }
- ss << thisUniqueNumber;
- p /= ss.str();
- string fn = p.string();
- if( !boost::filesystem::exists(p) )
- return fn;
- }
- return "";
- }
-
- void FileAllocator::run( FileAllocator * fa ) {
- setThreadName( "FileAllocator" );
+}
+
+long FileAllocator::prevSize(const string& name) const {
+ if (_pendingSize.count(name) > 0)
+ return _pendingSize[name];
+ if (boost::filesystem::exists(name))
+ return boost::filesystem::file_size(name);
+ return -1;
+}
+
+// caller must hold _pendingMutex lock.
+bool FileAllocator::inProgress(const string& name) const {
+ for (list<string>::const_iterator i = _pending.begin(); i != _pending.end(); ++i)
+ if (*i == name)
+ return true;
+ return false;
+}
+
+string FileAllocator::makeTempFileName(boost::filesystem::path root) {
+ while (1) {
+ boost::filesystem::path p = root / "_tmp";
+ stringstream ss;
+ unsigned long long thisUniqueNumber;
{
- // initialize unique temporary file name counter
+ // increment temporary file name counter
// TODO: SERVER-6055 -- Unify temporary file name selection
stdx::lock_guard<SimpleMutex> lk(_uniqueNumberMutex);
- _uniqueNumber = curTimeMicros64();
+ thisUniqueNumber = _uniqueNumber;
+ ++_uniqueNumber;
}
- while( 1 ) {
+ ss << thisUniqueNumber;
+ p /= ss.str();
+ string fn = p.string();
+ if (!boost::filesystem::exists(p))
+ return fn;
+ }
+ return "";
+}
+
+void FileAllocator::run(FileAllocator* fa) {
+ setThreadName("FileAllocator");
+ {
+ // initialize unique temporary file name counter
+ // TODO: SERVER-6055 -- Unify temporary file name selection
+ stdx::lock_guard<SimpleMutex> lk(_uniqueNumberMutex);
+ _uniqueNumber = curTimeMicros64();
+ }
+ while (1) {
+ {
+ stdx::unique_lock<stdx::mutex> lk(fa->_pendingMutex);
+ if (fa->_pending.size() == 0)
+ fa->_pendingUpdated.wait(lk);
+ }
+ while (1) {
+ string name;
+ long size = 0;
{
- stdx::unique_lock<stdx::mutex> lk( fa->_pendingMutex );
- if ( fa->_pending.size() == 0 )
- fa->_pendingUpdated.wait(lk);
+ stdx::lock_guard<stdx::mutex> lk(fa->_pendingMutex);
+ if (fa->_pending.size() == 0)
+ break;
+ name = fa->_pending.front();
+ size = fa->_pendingSize[name];
}
- while( 1 ) {
- string name;
- long size = 0;
- {
- stdx::lock_guard<stdx::mutex> lk( fa->_pendingMutex );
- if ( fa->_pending.size() == 0 )
- break;
- name = fa->_pending.front();
- size = fa->_pendingSize[ name ];
- }
- string tmp;
- long fd = 0;
- try {
- log() << "allocating new datafile " << name << ", filling with zeroes..." << endl;
-
- boost::filesystem::path parent = ensureParentDirCreated(name);
- tmp = fa->makeTempFileName( parent );
- ensureParentDirCreated(tmp);
+ string tmp;
+ long fd = 0;
+ try {
+ log() << "allocating new datafile " << name << ", filling with zeroes..." << endl;
+
+ boost::filesystem::path parent = ensureParentDirCreated(name);
+ tmp = fa->makeTempFileName(parent);
+ ensureParentDirCreated(tmp);
#if defined(_WIN32)
- fd = _open( tmp.c_str(), _O_RDWR | _O_CREAT | O_NOATIME, _S_IREAD | _S_IWRITE );
+ fd = _open(tmp.c_str(), _O_RDWR | _O_CREAT | O_NOATIME, _S_IREAD | _S_IWRITE);
#else
- fd = open(tmp.c_str(), O_CREAT | O_RDWR | O_NOATIME, S_IRUSR | S_IWUSR);
+ fd = open(tmp.c_str(), O_CREAT | O_RDWR | O_NOATIME, S_IRUSR | S_IWUSR);
#endif
- if ( fd < 0 ) {
- log() << "FileAllocator: couldn't create " << name << " (" << tmp << ") " << errnoWithDescription() << endl;
- uasserted(10439, "");
- }
+ if (fd < 0) {
+ log() << "FileAllocator: couldn't create " << name << " (" << tmp << ") "
+ << errnoWithDescription() << endl;
+ uasserted(10439, "");
+ }
#if defined(POSIX_FADV_DONTNEED)
- if( posix_fadvise(fd, 0, size, POSIX_FADV_DONTNEED) ) {
- log() << "warning: posix_fadvise fails " << name << " (" << tmp << ") " << errnoWithDescription() << endl;
- }
+ if (posix_fadvise(fd, 0, size, POSIX_FADV_DONTNEED)) {
+ log() << "warning: posix_fadvise fails " << name << " (" << tmp << ") "
+ << errnoWithDescription() << endl;
+ }
#endif
- Timer t;
+ Timer t;
- /* make sure the file is the full desired length */
- ensureLength( fd , size );
+ /* make sure the file is the full desired length */
+ ensureLength(fd, size);
- close( fd );
- fd = 0;
+ close(fd);
+ fd = 0;
- if( rename(tmp.c_str(), name.c_str()) ) {
- const string& errStr = errnoWithDescription();
- const string& errMessage = str::stream()
- << "error: couldn't rename " << tmp
- << " to " << name << ' ' << errStr;
- msgasserted(13653, errMessage);
- }
- flushMyDirectory(name);
-
- log() << "done allocating datafile " << name << ", "
- << "size: " << size/1024/1024 << "MB, "
- << " took " << ((double)t.millis())/1000.0 << " secs"
- << endl;
-
- // no longer in a failed state. allow new writers.
- fa->_failed = false;
+ if (rename(tmp.c_str(), name.c_str())) {
+ const string& errStr = errnoWithDescription();
+ const string& errMessage = str::stream() << "error: couldn't rename " << tmp
+ << " to " << name << ' ' << errStr;
+ msgasserted(13653, errMessage);
}
- catch ( const std::exception& e ) {
- log() << "error: failed to allocate new file: " << name
- << " size: " << size << ' ' << e.what()
- << ". will try again in 10 seconds" << endl;
- if ( fd > 0 )
- close( fd );
- try {
- if ( ! tmp.empty() )
- boost::filesystem::remove( tmp );
- boost::filesystem::remove( name );
- } catch ( const std::exception& e ) {
- log() << "error removing files: " << e.what() << endl;
- }
-
- {
- stdx::lock_guard<stdx::mutex> lk(fa->_pendingMutex);
- fa->_failed = true;
-
- // TODO: Should we remove the file from pending?
- fa->_pendingUpdated.notify_all();
- }
-
-
- sleepsecs(10);
- continue;
+ flushMyDirectory(name);
+
+ log() << "done allocating datafile " << name << ", "
+ << "size: " << size / 1024 / 1024 << "MB, "
+ << " took " << ((double)t.millis()) / 1000.0 << " secs" << endl;
+
+ // no longer in a failed state. allow new writers.
+ fa->_failed = false;
+ } catch (const std::exception& e) {
+ log() << "error: failed to allocate new file: " << name << " size: " << size << ' '
+ << e.what() << ". will try again in 10 seconds" << endl;
+ if (fd > 0)
+ close(fd);
+ try {
+ if (!tmp.empty())
+ boost::filesystem::remove(tmp);
+ boost::filesystem::remove(name);
+ } catch (const std::exception& e) {
+ log() << "error removing files: " << e.what() << endl;
}
{
- stdx::lock_guard<stdx::mutex> lk( fa->_pendingMutex );
- fa->_pendingSize.erase( name );
- fa->_pending.pop_front();
+ stdx::lock_guard<stdx::mutex> lk(fa->_pendingMutex);
+ fa->_failed = true;
+
+ // TODO: Should we remove the file from pending?
fa->_pendingUpdated.notify_all();
}
+
+
+ sleepsecs(10);
+ continue;
+ }
+
+ {
+ stdx::lock_guard<stdx::mutex> lk(fa->_pendingMutex);
+ fa->_pendingSize.erase(name);
+ fa->_pending.pop_front();
+ fa->_pendingUpdated.notify_all();
}
}
}
+}
- FileAllocator* FileAllocator::_instance = 0;
+FileAllocator* FileAllocator::_instance = 0;
- FileAllocator* FileAllocator::get(){
- if ( ! _instance )
- _instance = new FileAllocator();
- return _instance;
- }
+FileAllocator* FileAllocator::get() {
+ if (!_instance)
+ _instance = new FileAllocator();
+ return _instance;
+}
-} // namespace mongo
+} // namespace mongo
diff --git a/src/mongo/db/storage/mmap_v1/file_allocator.h b/src/mongo/db/storage/mmap_v1/file_allocator.h
index d3f9b6cceda..e3e4ad55881 100644
--- a/src/mongo/db/storage/mmap_v1/file_allocator.h
+++ b/src/mongo/db/storage/mmap_v1/file_allocator.h
@@ -37,73 +37,71 @@
namespace mongo {
+/*
+ * Handles allocation of contiguous files on disk. Allocation may be
+ * requested asynchronously or synchronously.
+ * singleton
+ */
+class FileAllocator {
+ MONGO_DISALLOW_COPYING(FileAllocator);
/*
- * Handles allocation of contiguous files on disk. Allocation may be
- * requested asynchronously or synchronously.
- * singleton
+ * The public functions may not be called concurrently. The allocation
+ * functions may be called multiple times per file, but only the first
+ * size specified per file will be used.
+ */
+public:
+ void start();
+
+ /**
+ * May be called if file exists. If file exists, or its allocation has
+ * been requested, size is updated to match existing file size.
*/
- class FileAllocator {
- MONGO_DISALLOW_COPYING(FileAllocator);
- /*
- * The public functions may not be called concurrently. The allocation
- * functions may be called multiple times per file, but only the first
- * size specified per file will be used.
- */
- public:
- void start();
-
- /**
- * May be called if file exists. If file exists, or its allocation has
- * been requested, size is updated to match existing file size.
- */
- void requestAllocation( const std::string &name, long &size );
+ void requestAllocation(const std::string& name, long& size);
- /**
- * Returns when file has been allocated. If file exists, size is
- * updated to match existing file size.
- */
- void allocateAsap( const std::string &name, unsigned long long &size );
-
- void waitUntilFinished() const;
+ /**
+ * Returns when file has been allocated. If file exists, size is
+ * updated to match existing file size.
+ */
+ void allocateAsap(const std::string& name, unsigned long long& size);
- static void ensureLength(int fd, long size);
+ void waitUntilFinished() const;
- /** @return the singleton */
- static FileAllocator * get();
-
- private:
+ static void ensureLength(int fd, long size);
- FileAllocator();
+ /** @return the singleton */
+ static FileAllocator* get();
- void checkFailure();
+private:
+ FileAllocator();
- // caller must hold pendingMutex_ lock. Returns size if allocated or
- // allocation requested, -1 otherwise.
- long prevSize( const std::string &name ) const;
+ void checkFailure();
- // caller must hold pendingMutex_ lock.
- bool inProgress( const std::string &name ) const;
+ // caller must hold pendingMutex_ lock. Returns size if allocated or
+ // allocation requested, -1 otherwise.
+ long prevSize(const std::string& name) const;
- /** called from the worked thread */
- static void run( FileAllocator * fa );
+ // caller must hold pendingMutex_ lock.
+ bool inProgress(const std::string& name) const;
- // generate a unique name for temporary files
- std::string makeTempFileName( boost::filesystem::path root );
+ /** called from the worked thread */
+ static void run(FileAllocator* fa);
- mutable stdx::mutex _pendingMutex;
- mutable stdx::condition_variable _pendingUpdated;
+ // generate a unique name for temporary files
+ std::string makeTempFileName(boost::filesystem::path root);
- std::list< std::string > _pending;
- mutable std::map< std::string, long > _pendingSize;
+ mutable stdx::mutex _pendingMutex;
+ mutable stdx::condition_variable _pendingUpdated;
- // unique number for temporary files
- static unsigned long long _uniqueNumber;
+ std::list<std::string> _pending;
+ mutable std::map<std::string, long> _pendingSize;
- bool _failed;
+ // unique number for temporary files
+ static unsigned long long _uniqueNumber;
- static FileAllocator* _instance;
+ bool _failed;
- };
+ static FileAllocator* _instance;
+};
-} // namespace mongo
+} // namespace mongo
diff --git a/src/mongo/db/storage/mmap_v1/heap_record_store_btree.cpp b/src/mongo/db/storage/mmap_v1/heap_record_store_btree.cpp
index dfe51554836..934f9807628 100644
--- a/src/mongo/db/storage/mmap_v1/heap_record_store_btree.cpp
+++ b/src/mongo/db/storage/mmap_v1/heap_record_store_btree.cpp
@@ -40,117 +40,117 @@
namespace mongo {
- RecordData HeapRecordStoreBtree::dataFor(OperationContext* txn, const RecordId& loc) const {
- Records::const_iterator it = _records.find(loc);
- invariant(it != _records.end());
- const MmapV1RecordHeader& rec = it->second;
-
- return RecordData(rec.data.get(), rec.dataSize);
- }
-
- bool HeapRecordStoreBtree::findRecord(OperationContext* txn,
- const RecordId& loc, RecordData* out) const {
- Records::const_iterator it = _records.find(loc);
- if ( it == _records.end() )
- return false;
- const MmapV1RecordHeader& rec = it->second;
- *out = RecordData(rec.data.get(), rec.dataSize);
- return true;
- }
-
- void HeapRecordStoreBtree::deleteRecord(OperationContext* txn, const RecordId& loc) {
- invariant(_records.erase(loc) == 1);
- }
-
- StatusWith<RecordId> HeapRecordStoreBtree::insertRecord(OperationContext* txn,
- const char* data,
- int len,
- bool enforceQuota) {
- MmapV1RecordHeader rec(len);
- memcpy(rec.data.get(), data, len);
-
- const RecordId loc = allocateLoc();
- _records[loc] = rec;
-
- HeapRecordStoreBtreeRecoveryUnit::notifyInsert( txn, this, loc );
-
- return StatusWith<RecordId>(loc);
+RecordData HeapRecordStoreBtree::dataFor(OperationContext* txn, const RecordId& loc) const {
+ Records::const_iterator it = _records.find(loc);
+ invariant(it != _records.end());
+ const MmapV1RecordHeader& rec = it->second;
+
+ return RecordData(rec.data.get(), rec.dataSize);
+}
+
+bool HeapRecordStoreBtree::findRecord(OperationContext* txn,
+ const RecordId& loc,
+ RecordData* out) const {
+ Records::const_iterator it = _records.find(loc);
+ if (it == _records.end())
+ return false;
+ const MmapV1RecordHeader& rec = it->second;
+ *out = RecordData(rec.data.get(), rec.dataSize);
+ return true;
+}
+
+void HeapRecordStoreBtree::deleteRecord(OperationContext* txn, const RecordId& loc) {
+ invariant(_records.erase(loc) == 1);
+}
+
+StatusWith<RecordId> HeapRecordStoreBtree::insertRecord(OperationContext* txn,
+ const char* data,
+ int len,
+ bool enforceQuota) {
+ MmapV1RecordHeader rec(len);
+ memcpy(rec.data.get(), data, len);
+
+ const RecordId loc = allocateLoc();
+ _records[loc] = rec;
+
+ HeapRecordStoreBtreeRecoveryUnit::notifyInsert(txn, this, loc);
+
+ return StatusWith<RecordId>(loc);
+}
+
+StatusWith<RecordId> HeapRecordStoreBtree::insertRecord(OperationContext* txn,
+ const DocWriter* doc,
+ bool enforceQuota) {
+ MmapV1RecordHeader rec(doc->documentSize());
+ doc->writeDocument(rec.data.get());
+
+ const RecordId loc = allocateLoc();
+ _records[loc] = rec;
+
+ HeapRecordStoreBtreeRecoveryUnit::notifyInsert(txn, this, loc);
+
+ return StatusWith<RecordId>(loc);
+}
+
+RecordId HeapRecordStoreBtree::allocateLoc() {
+ const int64_t id = _nextId++;
+ // This is a hack, but both the high and low order bits of RecordId offset must be 0, and the
+ // file must fit in 23 bits. This gives us a total of 30 + 23 == 53 bits.
+ invariant(id < (1LL << 53));
+ RecordId dl(int(id >> 30), int((id << 1) & ~(1 << 31)));
+ invariant((dl.repr() & 0x1) == 0);
+ return dl;
+}
+
+Status HeapRecordStoreBtree::touch(OperationContext* txn, BSONObjBuilder* output) const {
+ // not currently called from the tests, but called from btree_logic.h
+ return Status::OK();
+}
+
+// ---------------------------
+
+void HeapRecordStoreBtreeRecoveryUnit::commitUnitOfWork() {
+ _insertions.clear();
+ _mods.clear();
+}
+
+void HeapRecordStoreBtreeRecoveryUnit::abortUnitOfWork() {
+ // reverse in case we write same area twice
+ for (size_t i = _mods.size(); i > 0; i--) {
+ ModEntry& e = _mods[i - 1];
+ memcpy(e.data, e.old.get(), e.len);
}
- StatusWith<RecordId> HeapRecordStoreBtree::insertRecord(OperationContext* txn,
- const DocWriter* doc,
- bool enforceQuota) {
- MmapV1RecordHeader rec(doc->documentSize());
- doc->writeDocument(rec.data.get());
+ invariant(_insertions.size() == 0); // todo
+}
- const RecordId loc = allocateLoc();
- _records[loc] = rec;
+void* HeapRecordStoreBtreeRecoveryUnit::writingPtr(void* data, size_t len) {
+ ModEntry e = {data, len, boost::shared_array<char>(new char[len])};
+ memcpy(e.old.get(), data, len);
+ _mods.push_back(e);
+ return data;
+}
- HeapRecordStoreBtreeRecoveryUnit::notifyInsert( txn, this, loc );
+void HeapRecordStoreBtreeRecoveryUnit::notifyInsert(HeapRecordStoreBtree* rs, const RecordId& loc) {
+ InsertEntry e = {rs, loc};
+ _insertions.push_back(e);
+}
- return StatusWith<RecordId>(loc);
- }
+void HeapRecordStoreBtreeRecoveryUnit::notifyInsert(OperationContext* ctx,
+ HeapRecordStoreBtree* rs,
+ const RecordId& loc) {
+ if (!ctx)
+ return;
- RecordId HeapRecordStoreBtree::allocateLoc() {
- const int64_t id = _nextId++;
- // This is a hack, but both the high and low order bits of RecordId offset must be 0, and the
- // file must fit in 23 bits. This gives us a total of 30 + 23 == 53 bits.
- invariant(id < (1LL << 53));
- RecordId dl(int(id >> 30), int((id << 1) & ~(1<<31)));
- invariant( (dl.repr() & 0x1) == 0 );
- return dl;
- }
-
- Status HeapRecordStoreBtree::touch(OperationContext* txn, BSONObjBuilder* output) const {
- // not currently called from the tests, but called from btree_logic.h
- return Status::OK();
- }
+ // This dynamic_cast has semantics, should change ideally.
+ HeapRecordStoreBtreeRecoveryUnit* ru =
+ dynamic_cast<HeapRecordStoreBtreeRecoveryUnit*>(ctx->recoveryUnit());
- // ---------------------------
+ if (!ru)
+ return;
- void HeapRecordStoreBtreeRecoveryUnit::commitUnitOfWork() {
- _insertions.clear();
- _mods.clear();
- }
-
- void HeapRecordStoreBtreeRecoveryUnit::abortUnitOfWork() {
- // reverse in case we write same area twice
- for ( size_t i = _mods.size(); i > 0; i-- ) {
- ModEntry& e = _mods[i-1];
- memcpy( e.data, e.old.get(), e.len );
- }
-
- invariant( _insertions.size() == 0 ); // todo
- }
-
- void* HeapRecordStoreBtreeRecoveryUnit::writingPtr(void* data, size_t len) {
- ModEntry e = { data, len, boost::shared_array<char>( new char[len] ) };
- memcpy( e.old.get(), data, len );
- _mods.push_back( e );
- return data;
- }
-
- void HeapRecordStoreBtreeRecoveryUnit::notifyInsert( HeapRecordStoreBtree* rs,
- const RecordId& loc ) {
- InsertEntry e = { rs, loc };
- _insertions.push_back( e );
- }
-
- void HeapRecordStoreBtreeRecoveryUnit::notifyInsert( OperationContext* ctx,
- HeapRecordStoreBtree* rs,
- const RecordId& loc ) {
- if ( !ctx )
- return;
-
- // This dynamic_cast has semantics, should change ideally.
- HeapRecordStoreBtreeRecoveryUnit* ru =
- dynamic_cast<HeapRecordStoreBtreeRecoveryUnit*>( ctx->recoveryUnit() );
-
- if ( !ru )
- return;
-
- ru->notifyInsert( rs, loc );
- }
+ ru->notifyInsert(rs, loc);
+}
-} // namespace mongo
+} // namespace mongo
diff --git a/src/mongo/db/storage/mmap_v1/heap_record_store_btree.h b/src/mongo/db/storage/mmap_v1/heap_record_store_btree.h
index c44dcf3f473..aa193549440 100644
--- a/src/mongo/db/storage/mmap_v1/heap_record_store_btree.h
+++ b/src/mongo/db/storage/mmap_v1/heap_record_store_btree.h
@@ -38,174 +38,190 @@
namespace mongo {
- /**
- * A RecordStore that stores all data on the heap. This implementation contains only the
- * functionality necessary to test btree.
- */
- class HeapRecordStoreBtree : public RecordStore {
- struct MmapV1RecordHeader;
-
- public:
- // RecordId(0,0) isn't valid for records.
- explicit HeapRecordStoreBtree(StringData ns): RecordStore(ns), _nextId(1) { }
-
- virtual RecordData dataFor(OperationContext* txn, const RecordId& loc) const;
-
- virtual bool findRecord(OperationContext* txn, const RecordId& loc, RecordData* out) const;
-
- virtual void deleteRecord(OperationContext* txn, const RecordId& dl);
-
- virtual StatusWith<RecordId> insertRecord(OperationContext* txn,
- const char* data,
- int len,
- bool enforceQuota);
-
- virtual StatusWith<RecordId> insertRecord(OperationContext* txn,
- const DocWriter* doc,
- bool enforceQuota);
-
- virtual long long numRecords( OperationContext* txn ) const { return _records.size(); }
-
- virtual Status touch(OperationContext* txn, BSONObjBuilder* output) const;
-
- // public methods below here are not necessary to test btree, and will crash when called.
-
- // ------------------------------
-
- virtual StatusWith<RecordId> updateRecord(OperationContext* txn,
- const RecordId& oldLocation,
- const char* data,
- int len,
- bool enforceQuota,
- UpdateNotifier* notifier) {
- invariant(false);
- }
-
- virtual bool updateWithDamagesSupported() const {
- return true;
- }
-
- virtual Status updateWithDamages(OperationContext* txn,
- const RecordId& loc,
- const RecordData& oldRec,
- const char* damageSource,
- const mutablebson::DamageVector& damages) {
- invariant(false);
- }
-
- std::unique_ptr<RecordCursor> getCursor(OperationContext* txn, bool forward) const final {
- invariant(false);
- }
-
-
- virtual Status truncate(OperationContext* txn) { invariant(false); }
-
- virtual void temp_cappedTruncateAfter(OperationContext* txn,
- RecordId end,
- bool inclusive) {
- invariant(false);
- }
-
- virtual bool compactSupported() const { invariant(false); }
-
- virtual Status validate(OperationContext* txn,
- bool full,
- bool scanData,
- ValidateAdaptor* adaptor,
- ValidateResults* results, BSONObjBuilder* output) {
- invariant(false);
- }
+/**
+ * A RecordStore that stores all data on the heap. This implementation contains only the
+ * functionality necessary to test btree.
+ */
+class HeapRecordStoreBtree : public RecordStore {
+ struct MmapV1RecordHeader;
+
+public:
+ // RecordId(0,0) isn't valid for records.
+ explicit HeapRecordStoreBtree(StringData ns) : RecordStore(ns), _nextId(1) {}
+
+ virtual RecordData dataFor(OperationContext* txn, const RecordId& loc) const;
+
+ virtual bool findRecord(OperationContext* txn, const RecordId& loc, RecordData* out) const;
+
+ virtual void deleteRecord(OperationContext* txn, const RecordId& dl);
+
+ virtual StatusWith<RecordId> insertRecord(OperationContext* txn,
+ const char* data,
+ int len,
+ bool enforceQuota);
+
+ virtual StatusWith<RecordId> insertRecord(OperationContext* txn,
+ const DocWriter* doc,
+ bool enforceQuota);
+
+ virtual long long numRecords(OperationContext* txn) const {
+ return _records.size();
+ }
+
+ virtual Status touch(OperationContext* txn, BSONObjBuilder* output) const;
+
+ // public methods below here are not necessary to test btree, and will crash when called.
+
+ // ------------------------------
+
+ virtual StatusWith<RecordId> updateRecord(OperationContext* txn,
+ const RecordId& oldLocation,
+ const char* data,
+ int len,
+ bool enforceQuota,
+ UpdateNotifier* notifier) {
+ invariant(false);
+ }
+
+ virtual bool updateWithDamagesSupported() const {
+ return true;
+ }
+
+ virtual Status updateWithDamages(OperationContext* txn,
+ const RecordId& loc,
+ const RecordData& oldRec,
+ const char* damageSource,
+ const mutablebson::DamageVector& damages) {
+ invariant(false);
+ }
+
+ std::unique_ptr<RecordCursor> getCursor(OperationContext* txn, bool forward) const final {
+ invariant(false);
+ }
+
+
+ virtual Status truncate(OperationContext* txn) {
+ invariant(false);
+ }
+
+ virtual void temp_cappedTruncateAfter(OperationContext* txn, RecordId end, bool inclusive) {
+ invariant(false);
+ }
+
+ virtual bool compactSupported() const {
+ invariant(false);
+ }
+
+ virtual Status validate(OperationContext* txn,
+ bool full,
+ bool scanData,
+ ValidateAdaptor* adaptor,
+ ValidateResults* results,
+ BSONObjBuilder* output) {
+ invariant(false);
+ }
+
+ virtual void appendCustomStats(OperationContext* txn,
+ BSONObjBuilder* result,
+ double scale) const {
+ invariant(false);
+ }
+
+ virtual void increaseStorageSize(OperationContext* txn, int size, bool enforceQuota) {
+ invariant(false);
+ }
+
+ virtual int64_t storageSize(OperationContext* txn,
+ BSONObjBuilder* extraInfo = NULL,
+ int infoLevel = 0) const {
+ invariant(false);
+ }
+
+ virtual long long dataSize(OperationContext* txn) const {
+ invariant(false);
+ }
+
+ virtual MmapV1RecordHeader* recordFor(const RecordId& loc) const {
+ invariant(false);
+ }
+
+ virtual bool isCapped() const {
+ invariant(false);
+ }
+
+ virtual const char* name() const {
+ invariant(false);
+ }
+
+ virtual void updateStatsAfterRepair(OperationContext* txn,
+ long long numRecords,
+ long long dataSize) {
+ invariant(false);
+ }
+ // more things that we actually care about below
+
+private:
+ struct MmapV1RecordHeader {
+ MmapV1RecordHeader() : dataSize(-1), data() {}
+ explicit MmapV1RecordHeader(int size) : dataSize(size), data(new char[size]) {}
+
+ int dataSize;
+ boost::shared_array<char> data;
+ };
- virtual void appendCustomStats(OperationContext* txn,
- BSONObjBuilder* result,
- double scale) const {
- invariant(false);
- }
+ RecordId allocateLoc();
- virtual void increaseStorageSize(OperationContext* txn, int size, bool enforceQuota) {
- invariant(false);
- }
+ typedef std::map<RecordId, HeapRecordStoreBtree::MmapV1RecordHeader> Records;
+ Records _records;
+ int64_t _nextId;
+};
- virtual int64_t storageSize(OperationContext* txn,
- BSONObjBuilder* extraInfo = NULL,
- int infoLevel = 0) const {
- invariant(false);
- }
+/**
+ * A RecoveryUnit for HeapRecordStoreBtree, this is for testing btree only.
+ */
+class HeapRecordStoreBtreeRecoveryUnit : public RecoveryUnit {
+public:
+ void beginUnitOfWork(OperationContext* opCtx) final{};
+ void commitUnitOfWork() final;
+ void abortUnitOfWork() final;
- virtual long long dataSize(OperationContext* txn) const { invariant(false); }
+ virtual bool waitUntilDurable() {
+ return true;
+ }
- virtual MmapV1RecordHeader* recordFor(const RecordId& loc) const { invariant(false); }
+ virtual void abandonSnapshot() {}
- virtual bool isCapped() const { invariant(false); }
+ virtual void registerChange(Change* change) {
+ change->commit();
+ delete change;
+ }
- virtual const char* name() const { invariant(false); }
+ virtual void* writingPtr(void* data, size_t len);
- virtual void updateStatsAfterRepair(OperationContext* txn,
- long long numRecords,
- long long dataSize) {
- invariant(false);
- }
- // more things that we actually care about below
+ virtual void setRollbackWritesDisabled() {}
- private:
- struct MmapV1RecordHeader {
- MmapV1RecordHeader(): dataSize(-1), data() { }
- explicit MmapV1RecordHeader(int size): dataSize(size), data(new char[size]) { }
+ virtual SnapshotId getSnapshotId() const {
+ return SnapshotId();
+ }
- int dataSize;
- boost::shared_array<char> data;
- };
+ // -----------------------
- RecordId allocateLoc();
+ void notifyInsert(HeapRecordStoreBtree* rs, const RecordId& loc);
+ static void notifyInsert(OperationContext* ctx, HeapRecordStoreBtree* rs, const RecordId& loc);
- typedef std::map<RecordId, HeapRecordStoreBtree::MmapV1RecordHeader> Records;
- Records _records;
- int64_t _nextId;
+private:
+ struct InsertEntry {
+ HeapRecordStoreBtree* rs;
+ RecordId loc;
};
+ std::vector<InsertEntry> _insertions;
- /**
- * A RecoveryUnit for HeapRecordStoreBtree, this is for testing btree only.
- */
- class HeapRecordStoreBtreeRecoveryUnit : public RecoveryUnit {
- public:
- void beginUnitOfWork(OperationContext* opCtx) final { };
- void commitUnitOfWork() final;
- void abortUnitOfWork() final;
-
- virtual bool waitUntilDurable() { return true; }
-
- virtual void abandonSnapshot() {}
-
- virtual void registerChange(Change* change) {
- change->commit();
- delete change;
- }
-
- virtual void* writingPtr(void* data, size_t len);
-
- virtual void setRollbackWritesDisabled() {}
-
- virtual SnapshotId getSnapshotId() const { return SnapshotId(); }
-
- // -----------------------
-
- void notifyInsert( HeapRecordStoreBtree* rs, const RecordId& loc );
- static void notifyInsert( OperationContext* ctx,
- HeapRecordStoreBtree* rs, const RecordId& loc );
-
- private:
- struct InsertEntry {
- HeapRecordStoreBtree* rs;
- RecordId loc;
- };
- std::vector<InsertEntry> _insertions;
-
- struct ModEntry {
- void* data;
- size_t len;
- boost::shared_array<char> old;
- };
- std::vector<ModEntry> _mods;
+ struct ModEntry {
+ void* data;
+ size_t len;
+ boost::shared_array<char> old;
};
+ std::vector<ModEntry> _mods;
+};
-} // namespace mongo
+} // namespace mongo
diff --git a/src/mongo/db/storage/mmap_v1/journal_latency_test_cmd.cpp b/src/mongo/db/storage/mmap_v1/journal_latency_test_cmd.cpp
index 8c29741ed7e..0f21961d459 100644
--- a/src/mongo/db/storage/mmap_v1/journal_latency_test_cmd.cpp
+++ b/src/mongo/db/storage/mmap_v1/journal_latency_test_cmd.cpp
@@ -54,102 +54,108 @@
namespace mongo {
- using std::max;
- using std::min;
- using std::string;
- using std::stringstream;
+using std::max;
+using std::min;
+using std::string;
+using std::stringstream;
- namespace dur {
- boost::filesystem::path getJournalDir();
- }
+namespace dur {
+boost::filesystem::path getJournalDir();
+}
- // Testing-only, enabled via command line
- class JournalLatencyTestCmd : public Command {
- public:
- JournalLatencyTestCmd() : Command( "journalLatencyTest" ) {}
+// Testing-only, enabled via command line
+class JournalLatencyTestCmd : public Command {
+public:
+ JournalLatencyTestCmd() : Command("journalLatencyTest") {}
- virtual bool slaveOk() const { return true; }
- virtual bool isWriteCommandForConfigServer() const { return false; }
- virtual bool adminOnly() const { return true; }
- virtual void help(stringstream& h) const { h << "test how long to write and fsync to a test file in the journal/ directory"; }
- // No auth needed because it only works when enabled via command line.
- virtual void addRequiredPrivileges(const std::string& dbname,
- const BSONObj& cmdObj,
- std::vector<Privilege>* out) {}
- bool run(OperationContext* txn,
- const string& dbname,
- BSONObj& cmdObj,
- int,
- string& errmsg,
- BSONObjBuilder& result) {
- boost::filesystem::path p = dur::getJournalDir();
- p /= "journalLatencyTest";
-
- // remove file if already present
- try {
- boost::filesystem::remove(p);
- }
- catch(...) { }
+ virtual bool slaveOk() const {
+ return true;
+ }
+ virtual bool isWriteCommandForConfigServer() const {
+ return false;
+ }
+ virtual bool adminOnly() const {
+ return true;
+ }
+ virtual void help(stringstream& h) const {
+ h << "test how long to write and fsync to a test file in the journal/ directory";
+ }
+ // No auth needed because it only works when enabled via command line.
+ virtual void addRequiredPrivileges(const std::string& dbname,
+ const BSONObj& cmdObj,
+ std::vector<Privilege>* out) {}
+ bool run(OperationContext* txn,
+ const string& dbname,
+ BSONObj& cmdObj,
+ int,
+ string& errmsg,
+ BSONObjBuilder& result) {
+ boost::filesystem::path p = dur::getJournalDir();
+ p /= "journalLatencyTest";
- BSONObjBuilder bb[2];
- for( int pass = 0; pass < 2; pass++ ) {
- LogFile f(p.string());
- AlignedBuilder b(1024 * 1024);
- {
- Timer t;
- for( int i = 0 ; i < 100; i++ ) {
- f.synchronousAppend(b.buf(), 8192);
- }
- bb[pass].append("8KB", t.millis() / 100.0);
- }
- {
- const int N = 50;
- Timer t2;
- long long x = 0;
- for( int i = 0 ; i < N; i++ ) {
- Timer t;
- f.synchronousAppend(b.buf(), 8192);
- x += t.micros();
- sleepmillis(4);
- }
- long long y = t2.micros() - 4*N*1000;
- // not really trusting the timer granularity on all platforms so whichever is higher of x and y
- bb[pass].append("8KBWithPauses", max(x,y) / (N*1000.0));
+ // remove file if already present
+ try {
+ boost::filesystem::remove(p);
+ } catch (...) {
+ }
+
+ BSONObjBuilder bb[2];
+ for (int pass = 0; pass < 2; pass++) {
+ LogFile f(p.string());
+ AlignedBuilder b(1024 * 1024);
+ {
+ Timer t;
+ for (int i = 0; i < 100; i++) {
+ f.synchronousAppend(b.buf(), 8192);
}
- {
+ bb[pass].append("8KB", t.millis() / 100.0);
+ }
+ {
+ const int N = 50;
+ Timer t2;
+ long long x = 0;
+ for (int i = 0; i < N; i++) {
Timer t;
- for( int i = 0 ; i < 20; i++ ) {
- f.synchronousAppend(b.buf(), 1024 * 1024);
- }
- bb[pass].append("1MB", t.millis() / 20.0);
+ f.synchronousAppend(b.buf(), 8192);
+ x += t.micros();
+ sleepmillis(4);
}
- // second time around, we are prealloced.
+ long long y = t2.micros() - 4 * N * 1000;
+ // not really trusting the timer granularity on all platforms so whichever is higher of x and y
+ bb[pass].append("8KBWithPauses", max(x, y) / (N * 1000.0));
}
- result.append("timeMillis", bb[0].obj());
- result.append("timeMillisWithPrealloc", bb[1].obj());
-
- try {
- remove(p);
- }
- catch(...) { }
-
- try {
- result.append("onSamePartition", onSamePartition(dur::getJournalDir().string(),
- storageGlobalParams.dbpath));
+ {
+ Timer t;
+ for (int i = 0; i < 20; i++) {
+ f.synchronousAppend(b.buf(), 1024 * 1024);
+ }
+ bb[pass].append("1MB", t.millis() / 20.0);
}
- catch(...) { }
-
- return 1;
- }
- };
- MONGO_INITIALIZER(RegisterJournalLatencyTestCmd)(InitializerContext* context) {
- if (Command::testCommandsEnabled) {
- // Leaked intentionally: a Command registers itself when constructed.
- new JournalLatencyTestCmd();
+ // second time around, we are prealloced.
}
- return Status::OK();
- }
+ result.append("timeMillis", bb[0].obj());
+ result.append("timeMillisWithPrealloc", bb[1].obj());
+ try {
+ remove(p);
+ } catch (...) {
+ }
+ try {
+ result.append(
+ "onSamePartition",
+ onSamePartition(dur::getJournalDir().string(), storageGlobalParams.dbpath));
+ } catch (...) {
+ }
+ return 1;
+ }
+};
+MONGO_INITIALIZER(RegisterJournalLatencyTestCmd)(InitializerContext* context) {
+ if (Command::testCommandsEnabled) {
+ // Leaked intentionally: a Command registers itself when constructed.
+ new JournalLatencyTestCmd();
+ }
+ return Status::OK();
+}
}
diff --git a/src/mongo/db/storage/mmap_v1/logfile.cpp b/src/mongo/db/storage/mmap_v1/logfile.cpp
index 8aa5e32626f..62c3b61bc73 100644
--- a/src/mongo/db/storage/mmap_v1/logfile.cpp
+++ b/src/mongo/db/storage/mmap_v1/logfile.cpp
@@ -53,83 +53,84 @@ using std::string;
namespace mongo {
- LogFile::LogFile(const std::string& name, bool readwrite) : _name(name) {
- _fd = CreateFile(
- toNativeString(name.c_str()).c_str(),
- (readwrite?GENERIC_READ:0)|GENERIC_WRITE,
- FILE_SHARE_READ,
- NULL,
- OPEN_ALWAYS,
- FILE_FLAG_NO_BUFFERING,
- NULL);
- if( _fd == INVALID_HANDLE_VALUE ) {
- DWORD e = GetLastError();
- uasserted(13518, str::stream() << "couldn't open file " << name << " for writing " << errnoWithDescription(e));
- }
- SetFilePointer(_fd, 0, 0, FILE_BEGIN);
+LogFile::LogFile(const std::string& name, bool readwrite) : _name(name) {
+ _fd = CreateFile(toNativeString(name.c_str()).c_str(),
+ (readwrite ? GENERIC_READ : 0) | GENERIC_WRITE,
+ FILE_SHARE_READ,
+ NULL,
+ OPEN_ALWAYS,
+ FILE_FLAG_NO_BUFFERING,
+ NULL);
+ if (_fd == INVALID_HANDLE_VALUE) {
+ DWORD e = GetLastError();
+ uasserted(13518,
+ str::stream() << "couldn't open file " << name << " for writing "
+ << errnoWithDescription(e));
}
+ SetFilePointer(_fd, 0, 0, FILE_BEGIN);
+}
- LogFile::~LogFile() {
- if( _fd != INVALID_HANDLE_VALUE )
- CloseHandle(_fd);
- }
+LogFile::~LogFile() {
+ if (_fd != INVALID_HANDLE_VALUE)
+ CloseHandle(_fd);
+}
- void LogFile::truncate() {
- verify(_fd != INVALID_HANDLE_VALUE);
+void LogFile::truncate() {
+ verify(_fd != INVALID_HANDLE_VALUE);
- if (!SetEndOfFile(_fd)){
- msgasserted(15871, "Couldn't truncate file: " + errnoWithDescription());
- }
+ if (!SetEndOfFile(_fd)) {
+ msgasserted(15871, "Couldn't truncate file: " + errnoWithDescription());
}
+}
- void LogFile::writeAt(unsigned long long offset, const void *_buf, size_t _len) {
-// TODO 64 bit offsets
- OVERLAPPED o;
- memset(&o,0,sizeof(o));
- (unsigned long long&) o.Offset = offset;
- BOOL ok= WriteFile(_fd, _buf, _len, 0, &o);
- verify(ok);
- }
+void LogFile::writeAt(unsigned long long offset, const void* _buf, size_t _len) {
+ // TODO 64 bit offsets
+ OVERLAPPED o;
+ memset(&o, 0, sizeof(o));
+ (unsigned long long&)o.Offset = offset;
+ BOOL ok = WriteFile(_fd, _buf, _len, 0, &o);
+ verify(ok);
+}
- void LogFile::readAt(unsigned long long offset, void *_buf, size_t _len) {
-// TODO 64 bit offsets
- OVERLAPPED o;
- memset(&o,0,sizeof(o));
- (unsigned long long&) o.Offset = offset;
- DWORD nr;
- BOOL ok = ReadFile(_fd, _buf, _len, &nr, &o);
- if( !ok ) {
- string e = errnoWithDescription();
- //DWORD e = GetLastError();
- log() << "LogFile readAt(" << offset << ") len:" << _len << "errno:" << e << endl;
- verify(false);
- }
+void LogFile::readAt(unsigned long long offset, void* _buf, size_t _len) {
+ // TODO 64 bit offsets
+ OVERLAPPED o;
+ memset(&o, 0, sizeof(o));
+ (unsigned long long&)o.Offset = offset;
+ DWORD nr;
+ BOOL ok = ReadFile(_fd, _buf, _len, &nr, &o);
+ if (!ok) {
+ string e = errnoWithDescription();
+ // DWORD e = GetLastError();
+ log() << "LogFile readAt(" << offset << ") len:" << _len << "errno:" << e << endl;
+ verify(false);
}
+}
- void LogFile::synchronousAppend(const void *_buf, size_t _len) {
- const size_t BlockSize = 8 * 1024 * 1024;
- verify(_fd);
- verify(_len % g_minOSPageSizeBytes == 0);
- const char *buf = (const char *) _buf;
- size_t left = _len;
- while( left ) {
- size_t toWrite = std::min(left, BlockSize);
- DWORD written;
- if( !WriteFile(_fd, buf, toWrite, &written, NULL) ) {
- DWORD e = GetLastError();
- if( e == 87 )
- msgasserted(13519, "error 87 appending to file - invalid parameter");
- else
- uasserted(13517, str::stream() << "error appending to file " << _name << ' ' << _len << ' ' << toWrite << ' ' << errnoWithDescription(e));
- }
- else {
- dassert( written == toWrite );
- }
- left -= written;
- buf += written;
+void LogFile::synchronousAppend(const void* _buf, size_t _len) {
+ const size_t BlockSize = 8 * 1024 * 1024;
+ verify(_fd);
+ verify(_len % g_minOSPageSizeBytes == 0);
+ const char* buf = (const char*)_buf;
+ size_t left = _len;
+ while (left) {
+ size_t toWrite = std::min(left, BlockSize);
+ DWORD written;
+ if (!WriteFile(_fd, buf, toWrite, &written, NULL)) {
+ DWORD e = GetLastError();
+ if (e == 87)
+ msgasserted(13519, "error 87 appending to file - invalid parameter");
+ else
+ uasserted(13517,
+ str::stream() << "error appending to file " << _name << ' ' << _len << ' '
+ << toWrite << ' ' << errnoWithDescription(e));
+ } else {
+ dassert(written == toWrite);
}
+ left -= written;
+ buf += written;
}
-
+}
}
#else
@@ -147,124 +148,123 @@ namespace mongo {
namespace mongo {
- LogFile::LogFile(const std::string& name, bool readwrite) : _name(name) {
- int options = O_CREAT
- | (readwrite?O_RDWR:O_WRONLY)
+LogFile::LogFile(const std::string& name, bool readwrite) : _name(name) {
+ int options = O_CREAT | (readwrite ? O_RDWR : O_WRONLY)
#if defined(O_DIRECT)
- | O_DIRECT
+ | O_DIRECT
#endif
#if defined(O_NOATIME)
- | O_NOATIME
+ | O_NOATIME
#endif
- ;
+ ;
- _fd = open(name.c_str(), options, S_IRUSR | S_IWUSR);
- _blkSize = g_minOSPageSizeBytes;
+ _fd = open(name.c_str(), options, S_IRUSR | S_IWUSR);
+ _blkSize = g_minOSPageSizeBytes;
#if defined(O_DIRECT)
- _direct = true;
- if( _fd < 0 ) {
- _direct = false;
- options &= ~O_DIRECT;
- _fd = open(name.c_str(), options, S_IRUSR | S_IWUSR);
- }
+ _direct = true;
+ if (_fd < 0) {
+ _direct = false;
+ options &= ~O_DIRECT;
+ _fd = open(name.c_str(), options, S_IRUSR | S_IWUSR);
+ }
#ifdef __linux__
- ssize_t tmpBlkSize = ioctl(_fd, BLKBSZGET);
- // TODO: We need some sanity checking on tmpBlkSize even if ioctl() did not fail.
- if (tmpBlkSize > 0) {
- _blkSize = (size_t)tmpBlkSize;
- }
+ ssize_t tmpBlkSize = ioctl(_fd, BLKBSZGET);
+ // TODO: We need some sanity checking on tmpBlkSize even if ioctl() did not fail.
+ if (tmpBlkSize > 0) {
+ _blkSize = (size_t)tmpBlkSize;
+ }
#endif
#else
- _direct = false;
+ _direct = false;
#endif
- if( _fd < 0 ) {
- uasserted(13516, str::stream() << "couldn't open file " << name << " for writing " << errnoWithDescription());
- }
-
- flushMyDirectory(name);
+ if (_fd < 0) {
+ uasserted(13516,
+ str::stream() << "couldn't open file " << name << " for writing "
+ << errnoWithDescription());
}
- LogFile::~LogFile() {
- if( _fd >= 0 )
- close(_fd);
- _fd = -1;
- }
+ flushMyDirectory(name);
+}
- void LogFile::truncate() {
- verify(_fd >= 0);
+LogFile::~LogFile() {
+ if (_fd >= 0)
+ close(_fd);
+ _fd = -1;
+}
- BOOST_STATIC_ASSERT(sizeof(off_t) == 8); // we don't want overflow here
- const off_t pos = lseek(_fd, 0, SEEK_CUR); // doesn't actually seek
- if (ftruncate(_fd, pos) != 0){
- msgasserted(15873, "Couldn't truncate file: " + errnoWithDescription());
- }
+void LogFile::truncate() {
+ verify(_fd >= 0);
- fsync(_fd);
+ BOOST_STATIC_ASSERT(sizeof(off_t) == 8); // we don't want overflow here
+ const off_t pos = lseek(_fd, 0, SEEK_CUR); // doesn't actually seek
+ if (ftruncate(_fd, pos) != 0) {
+ msgasserted(15873, "Couldn't truncate file: " + errnoWithDescription());
}
- void LogFile::writeAt(unsigned long long offset, const void *buf, size_t len) {
- verify(((size_t)buf) % g_minOSPageSizeBytes == 0); // aligned
- ssize_t written = pwrite(_fd, buf, len, offset);
- if( written != (ssize_t) len ) {
- log() << "writeAt fails " << errnoWithDescription() << endl;
- }
+ fsync(_fd);
+}
+
+void LogFile::writeAt(unsigned long long offset, const void* buf, size_t len) {
+ verify(((size_t)buf) % g_minOSPageSizeBytes == 0); // aligned
+ ssize_t written = pwrite(_fd, buf, len, offset);
+ if (written != (ssize_t)len) {
+ log() << "writeAt fails " << errnoWithDescription() << endl;
+ }
#if defined(__linux__)
- fdatasync(_fd);
+ fdatasync(_fd);
#else
- fsync(_fd);
+ fsync(_fd);
#endif
- }
-
- void LogFile::readAt(unsigned long long offset, void *_buf, size_t _len) {
- verify(((size_t)_buf) % g_minOSPageSizeBytes == 0); // aligned
- ssize_t rd = pread(_fd, _buf, _len, offset);
- verify( rd != -1 );
- }
+}
- void LogFile::synchronousAppend(const void *b, size_t len) {
+void LogFile::readAt(unsigned long long offset, void* _buf, size_t _len) {
+ verify(((size_t)_buf) % g_minOSPageSizeBytes == 0); // aligned
+ ssize_t rd = pread(_fd, _buf, _len, offset);
+ verify(rd != -1);
+}
- const char *buf = static_cast<const char *>( b );
- ssize_t charsToWrite = static_cast<ssize_t>( len );
+void LogFile::synchronousAppend(const void* b, size_t len) {
+ const char* buf = static_cast<const char*>(b);
+ ssize_t charsToWrite = static_cast<ssize_t>(len);
- fassert( 16144, charsToWrite >= 0 );
- fassert( 16142, _fd >= 0 );
- fassert( 16143, reinterpret_cast<size_t>( buf ) % _blkSize == 0 ); // aligned
+ fassert(16144, charsToWrite >= 0);
+ fassert(16142, _fd >= 0);
+ fassert(16143, reinterpret_cast<size_t>(buf) % _blkSize == 0); // aligned
#ifdef POSIX_FADV_DONTNEED
- const off_t pos = lseek(_fd, 0, SEEK_CUR); // doesn't actually seek, just get current position
+ const off_t pos = lseek(_fd, 0, SEEK_CUR); // doesn't actually seek, just get current position
#endif
- while ( charsToWrite > 0 ) {
- const ssize_t written = write( _fd, buf, static_cast<size_t>( charsToWrite ) );
- if ( -1 == written ) {
- log() << "LogFile::synchronousAppend failed with " << charsToWrite
- << " bytes unwritten out of " << len << " bytes; b=" << b << ' '
- << errnoWithDescription() << std::endl;
- fassertFailed( 13515 );
- }
- buf += written;
- charsToWrite -= written;
+ while (charsToWrite > 0) {
+ const ssize_t written = write(_fd, buf, static_cast<size_t>(charsToWrite));
+ if (-1 == written) {
+ log() << "LogFile::synchronousAppend failed with " << charsToWrite
+ << " bytes unwritten out of " << len << " bytes; b=" << b << ' '
+ << errnoWithDescription() << std::endl;
+ fassertFailed(13515);
}
+ buf += written;
+ charsToWrite -= written;
+ }
- if(
+ if (
#if defined(__linux__)
- fdatasync(_fd) < 0
+ fdatasync(_fd) < 0
#else
- fsync(_fd)
+ fsync(_fd)
#endif
- ) {
- log() << "error appending to file on fsync " << ' ' << errnoWithDescription();
- fassertFailed( 13514 );
- }
+ ) {
+ log() << "error appending to file on fsync " << ' ' << errnoWithDescription();
+ fassertFailed(13514);
+ }
#ifdef POSIX_FADV_DONTNEED
- if (!_direct)
- posix_fadvise(_fd, pos, len, POSIX_FADV_DONTNEED);
+ if (!_direct)
+ posix_fadvise(_fd, pos, len, POSIX_FADV_DONTNEED);
#endif
- }
-
+}
}
#endif
diff --git a/src/mongo/db/storage/mmap_v1/logfile.h b/src/mongo/db/storage/mmap_v1/logfile.h
index 278b9c162aa..4a3bb5535e2 100644
--- a/src/mongo/db/storage/mmap_v1/logfile.h
+++ b/src/mongo/db/storage/mmap_v1/logfile.h
@@ -35,43 +35,42 @@
namespace mongo {
- class LogFile {
- public:
- /** create the file and open. must not already exist.
- throws UserAssertion on i/o error
- */
- LogFile(const std::string& name, bool readwrite = false);
+class LogFile {
+public:
+ /** create the file and open. must not already exist.
+ throws UserAssertion on i/o error
+ */
+ LogFile(const std::string& name, bool readwrite = false);
- /** closes */
- ~LogFile();
+ /** closes */
+ ~LogFile();
- /** append to file. does not return until sync'd. uses direct i/o when possible.
- throws UserAssertion on an i/o error
- note direct i/o may have alignment requirements
- */
- void synchronousAppend(const void *buf, size_t len);
+ /** append to file. does not return until sync'd. uses direct i/o when possible.
+ throws UserAssertion on an i/o error
+ note direct i/o may have alignment requirements
+ */
+ void synchronousAppend(const void* buf, size_t len);
- /** write at specified offset. must be aligned. noreturn until physically written. thread safe */
- void writeAt(unsigned long long offset, const void *_bug, size_t _len);
+ /** write at specified offset. must be aligned. noreturn until physically written. thread safe */
+ void writeAt(unsigned long long offset, const void* _bug, size_t _len);
- void readAt(unsigned long long offset, void *_buf, size_t _len);
+ void readAt(unsigned long long offset, void* _buf, size_t _len);
- const std::string _name;
+ const std::string _name;
- void truncate(); // Removes extra data after current position
+ void truncate(); // Removes extra data after current position
- private:
+private:
#if defined(_WIN32)
- typedef HANDLE fd_type;
+ typedef HANDLE fd_type;
#else
- typedef int fd_type;
+ typedef int fd_type;
#endif
- fd_type _fd;
- bool _direct; // are we using direct I/O
-
- // Block size, in case of direct I/O we need to test alignment against the page size,
- // which can be different than 4kB.
- size_t _blkSize;
- };
+ fd_type _fd;
+ bool _direct; // are we using direct I/O
+ // Block size, in case of direct I/O we need to test alignment against the page size,
+ // which can be different than 4kB.
+ size_t _blkSize;
+};
}
diff --git a/src/mongo/db/storage/mmap_v1/mmap.cpp b/src/mongo/db/storage/mmap_v1/mmap.cpp
index e9519fc7d94..57559d3038e 100644
--- a/src/mongo/db/storage/mmap_v1/mmap.cpp
+++ b/src/mongo/db/storage/mmap_v1/mmap.cpp
@@ -46,213 +46,220 @@
namespace mongo {
- using std::endl;
- using std::map;
- using std::set;
- using std::string;
- using std::stringstream;
- using std::vector;
-
- void minOSPageSizeBytesTest(size_t minOSPageSizeBytes) {
- fassert( 16325, minOSPageSizeBytes > 0 );
- fassert( 16326, minOSPageSizeBytes < 1000000 );
- // check to see if the page size is a power of 2
- fassert( 16327, (minOSPageSizeBytes & (minOSPageSizeBytes - 1)) == 0);
- }
+using std::endl;
+using std::map;
+using std::set;
+using std::string;
+using std::stringstream;
+using std::vector;
+
+void minOSPageSizeBytesTest(size_t minOSPageSizeBytes) {
+ fassert(16325, minOSPageSizeBytes > 0);
+ fassert(16326, minOSPageSizeBytes < 1000000);
+ // check to see if the page size is a power of 2
+ fassert(16327, (minOSPageSizeBytes & (minOSPageSizeBytes - 1)) == 0);
+}
namespace {
- set<MongoFile*> mmfiles;
- map<string,MongoFile*> pathToFile;
+set<MongoFile*> mmfiles;
+map<string, MongoFile*> pathToFile;
} // namespace
- /* Create. Must not exist.
- @param zero fill file with zeros when true
- */
- void* MemoryMappedFile::create(const std::string& filename, unsigned long long len, bool zero) {
- uassert( 13468, string("can't create file already exists ") + filename, ! boost::filesystem::exists(filename) );
- void *p = map(filename.c_str(), len);
- if( p && zero ) {
- size_t sz = (size_t) len;
- verify( len == sz );
- memset(p, 0, sz);
- }
- return p;
+/* Create. Must not exist.
+@param zero fill file with zeros when true
+*/
+void* MemoryMappedFile::create(const std::string& filename, unsigned long long len, bool zero) {
+ uassert(13468,
+ string("can't create file already exists ") + filename,
+ !boost::filesystem::exists(filename));
+ void* p = map(filename.c_str(), len);
+ if (p && zero) {
+ size_t sz = (size_t)len;
+ verify(len == sz);
+ memset(p, 0, sz);
}
-
- /*static*/ void MemoryMappedFile::updateLength( const char *filename, unsigned long long &length ) {
- if ( !boost::filesystem::exists( filename ) )
- return;
- // make sure we map full length if preexisting file.
- boost::uintmax_t l = boost::filesystem::file_size( filename );
- length = l;
+ return p;
+}
+
+/*static*/ void MemoryMappedFile::updateLength(const char* filename, unsigned long long& length) {
+ if (!boost::filesystem::exists(filename))
+ return;
+ // make sure we map full length if preexisting file.
+ boost::uintmax_t l = boost::filesystem::file_size(filename);
+ length = l;
+}
+
+void* MemoryMappedFile::map(const char* filename) {
+ unsigned long long l;
+ try {
+ l = boost::filesystem::file_size(filename);
+ } catch (boost::filesystem::filesystem_error& e) {
+ uasserted(15922,
+ mongoutils::str::stream() << "couldn't get file length when opening mapping "
+ << filename << ' ' << e.what());
}
-
- void* MemoryMappedFile::map(const char *filename) {
- unsigned long long l;
- try {
- l = boost::filesystem::file_size( filename );
- }
- catch(boost::filesystem::filesystem_error& e) {
- uasserted(15922, mongoutils::str::stream() << "couldn't get file length when opening mapping " << filename << ' ' << e.what() );
- }
- return map( filename , l );
+ return map(filename, l);
+}
+void* MemoryMappedFile::mapWithOptions(const char* filename, int options) {
+ unsigned long long l;
+ try {
+ l = boost::filesystem::file_size(filename);
+ } catch (boost::filesystem::filesystem_error& e) {
+ uasserted(15923,
+ mongoutils::str::stream() << "couldn't get file length when opening mapping "
+ << filename << ' ' << e.what());
}
- void* MemoryMappedFile::mapWithOptions(const char *filename, int options) {
- unsigned long long l;
- try {
- l = boost::filesystem::file_size( filename );
- }
- catch(boost::filesystem::filesystem_error& e) {
- uasserted(15923, mongoutils::str::stream() << "couldn't get file length when opening mapping " << filename << ' ' << e.what() );
- }
- return map( filename , l, options );
+ return map(filename, l, options);
+}
+
+/* --- MongoFile -------------------------------------------------
+ this is the administrative stuff
+*/
+
+RWLockRecursiveNongreedy LockMongoFilesShared::mmmutex("mmmutex", 10 * 60 * 1000 /* 10 minutes */);
+unsigned LockMongoFilesShared::era = 99; // note this rolls over
+
+set<MongoFile*>& MongoFile::getAllFiles() {
+ return mmfiles;
+}
+
+/* subclass must call in destructor (or at close).
+ removes this from pathToFile and other maps
+ safe to call more than once, albeit might be wasted work
+ ideal to call close to the close, if the close is well before object destruction
+*/
+void MongoFile::destroyed() {
+ LockMongoFilesShared::assertExclusivelyLocked();
+ mmfiles.erase(this);
+ pathToFile.erase(filename());
+}
+
+/*static*/
+void MongoFile::closeAllFiles(stringstream& message) {
+ static int closingAllFiles = 0;
+ if (closingAllFiles) {
+ message << "warning closingAllFiles=" << closingAllFiles << endl;
+ return;
}
+ ++closingAllFiles;
- /* --- MongoFile -------------------------------------------------
- this is the administrative stuff
- */
-
- RWLockRecursiveNongreedy LockMongoFilesShared::mmmutex("mmmutex",10*60*1000 /* 10 minutes */);
- unsigned LockMongoFilesShared::era = 99; // note this rolls over
-
- set<MongoFile*>& MongoFile::getAllFiles() { return mmfiles; }
+ LockMongoFilesExclusive lk;
- /* subclass must call in destructor (or at close).
- removes this from pathToFile and other maps
- safe to call more than once, albeit might be wasted work
- ideal to call close to the close, if the close is well before object destruction
- */
- void MongoFile::destroyed() {
- LockMongoFilesShared::assertExclusivelyLocked();
- mmfiles.erase(this);
- pathToFile.erase( filename() );
+ ProgressMeter pm(mmfiles.size(), 2, 1, "files", "File Closing Progress");
+ set<MongoFile*> temp = mmfiles;
+ for (set<MongoFile*>::iterator i = temp.begin(); i != temp.end(); i++) {
+ (*i)->close(); // close() now removes from mmfiles
+ pm.hit();
}
+ message << "closeAllFiles() finished";
+ --closingAllFiles;
+}
- /*static*/
- void MongoFile::closeAllFiles( stringstream &message ) {
- static int closingAllFiles = 0;
- if ( closingAllFiles ) {
- message << "warning closingAllFiles=" << closingAllFiles << endl;
- return;
- }
- ++closingAllFiles;
+/*static*/ long long MongoFile::totalMappedLength() {
+ unsigned long long total = 0;
- LockMongoFilesExclusive lk;
+ LockMongoFilesShared lk;
- ProgressMeter pm(mmfiles.size(), 2, 1, "files", "File Closing Progress");
- set<MongoFile*> temp = mmfiles;
- for ( set<MongoFile*>::iterator i = temp.begin(); i != temp.end(); i++ ) {
- (*i)->close(); // close() now removes from mmfiles
- pm.hit();
- }
- message << "closeAllFiles() finished";
- --closingAllFiles;
- }
+ for (set<MongoFile*>::iterator i = mmfiles.begin(); i != mmfiles.end(); i++)
+ total += (*i)->length();
- /*static*/ long long MongoFile::totalMappedLength() {
- unsigned long long total = 0;
+ return total;
+}
- LockMongoFilesShared lk;
-
- for ( set<MongoFile*>::iterator i = mmfiles.begin(); i != mmfiles.end(); i++ )
- total += (*i)->length();
-
- return total;
- }
+void nullFunc() {}
- void nullFunc() { }
+// callback notifications
+void (*MongoFile::notifyPreFlush)() = nullFunc;
+void (*MongoFile::notifyPostFlush)() = nullFunc;
- // callback notifications
- void (*MongoFile::notifyPreFlush)() = nullFunc;
- void (*MongoFile::notifyPostFlush)() = nullFunc;
+/*static*/ int MongoFile::flushAll(bool sync) {
+ if (sync)
+ notifyPreFlush();
+ int x = _flushAll(sync);
+ if (sync)
+ notifyPostFlush();
+ return x;
+}
- /*static*/ int MongoFile::flushAll( bool sync ) {
- if ( sync ) notifyPreFlush();
- int x = _flushAll(sync);
- if ( sync ) notifyPostFlush();
- return x;
- }
+/*static*/ int MongoFile::_flushAll(bool sync) {
+ if (!sync) {
+ int num = 0;
+ LockMongoFilesShared lk;
+ for (set<MongoFile*>::iterator i = mmfiles.begin(); i != mmfiles.end(); i++) {
+ num++;
+ MongoFile* mmf = *i;
+ if (!mmf)
+ continue;
- /*static*/ int MongoFile::_flushAll( bool sync ) {
- if ( ! sync ) {
- int num = 0;
- LockMongoFilesShared lk;
- for ( set<MongoFile*>::iterator i = mmfiles.begin(); i != mmfiles.end(); i++ ) {
- num++;
- MongoFile * mmf = *i;
- if ( ! mmf )
- continue;
-
- mmf->flush( sync );
- }
- return num;
+ mmf->flush(sync);
}
+ return num;
+ }
- // want to do it sync
-
- // get a thread-safe Flushable object for each file first in a single lock
- // so that we can iterate and flush without doing any locking here
- OwnedPointerVector<Flushable> thingsToFlushWrapper;
- vector<Flushable*>& thingsToFlush = thingsToFlushWrapper.mutableVector();
- {
- LockMongoFilesShared lk;
- for ( set<MongoFile*>::iterator i = mmfiles.begin(); i != mmfiles.end(); i++ ) {
- MongoFile* mmf = *i;
- if ( !mmf )
- continue;
- thingsToFlush.push_back( mmf->prepareFlush() );
- }
- }
+ // want to do it sync
- for ( size_t i = 0; i < thingsToFlush.size(); i++ ) {
- thingsToFlush[i]->flush();
+ // get a thread-safe Flushable object for each file first in a single lock
+ // so that we can iterate and flush without doing any locking here
+ OwnedPointerVector<Flushable> thingsToFlushWrapper;
+ vector<Flushable*>& thingsToFlush = thingsToFlushWrapper.mutableVector();
+ {
+ LockMongoFilesShared lk;
+ for (set<MongoFile*>::iterator i = mmfiles.begin(); i != mmfiles.end(); i++) {
+ MongoFile* mmf = *i;
+ if (!mmf)
+ continue;
+ thingsToFlush.push_back(mmf->prepareFlush());
}
-
- return thingsToFlush.size();
- }
-
- void MongoFile::created() {
- LockMongoFilesExclusive lk;
- mmfiles.insert(this);
}
- void MongoFile::setFilename(const std::string& fn) {
- LockMongoFilesExclusive lk;
- verify( _filename.empty() );
- _filename = boost::filesystem::absolute(fn).generic_string();
- MongoFile *&ptf = pathToFile[_filename];
- massert(13617, "MongoFile : multiple opens of same filename", ptf == 0);
- ptf = this;
+ for (size_t i = 0; i < thingsToFlush.size(); i++) {
+ thingsToFlush[i]->flush();
}
- MongoFile* MongoFileFinder::findByPath(const std::string& path) const {
- return mapFindWithDefault(pathToFile,
- boost::filesystem::absolute(path).generic_string(),
- static_cast<MongoFile*>(NULL));
+ return thingsToFlush.size();
+}
+
+void MongoFile::created() {
+ LockMongoFilesExclusive lk;
+ mmfiles.insert(this);
+}
+
+void MongoFile::setFilename(const std::string& fn) {
+ LockMongoFilesExclusive lk;
+ verify(_filename.empty());
+ _filename = boost::filesystem::absolute(fn).generic_string();
+ MongoFile*& ptf = pathToFile[_filename];
+ massert(13617, "MongoFile : multiple opens of same filename", ptf == 0);
+ ptf = this;
+}
+
+MongoFile* MongoFileFinder::findByPath(const std::string& path) const {
+ return mapFindWithDefault(pathToFile,
+ boost::filesystem::absolute(path).generic_string(),
+ static_cast<MongoFile*>(NULL));
+}
+
+
+void printMemInfo(const char* where) {
+ LogstreamBuilder out = log();
+ out << "mem info: ";
+ if (where)
+ out << where << " ";
+
+ ProcessInfo pi;
+ if (!pi.supported()) {
+ out << " not supported";
+ return;
}
+ out << "vsize: " << pi.getVirtualMemorySize() << " resident: " << pi.getResidentSize()
+ << " mapped: " << (MemoryMappedFile::totalMappedLength() / (1024 * 1024));
+}
- void printMemInfo( const char * where ) {
- LogstreamBuilder out = log();
- out << "mem info: ";
- if ( where )
- out << where << " ";
-
- ProcessInfo pi;
- if ( ! pi.supported() ) {
- out << " not supported";
- return;
- }
-
- out << "vsize: " << pi.getVirtualMemorySize()
- << " resident: " << pi.getResidentSize()
- << " mapped: " << ( MemoryMappedFile::totalMappedLength() / ( 1024 * 1024 ) );
- }
-
- void dataSyncFailedHandler() {
- log() << "error syncing data to disk, probably a disk error";
- log() << " shutting down immediately to avoid corruption";
- fassertFailed( 17346 );
- }
+void dataSyncFailedHandler() {
+ log() << "error syncing data to disk, probably a disk error";
+ log() << " shutting down immediately to avoid corruption";
+ fassertFailed(17346);
+}
-} // namespace mongo
+} // namespace mongo
diff --git a/src/mongo/db/storage/mmap_v1/mmap.h b/src/mongo/db/storage/mmap_v1/mmap.h
index f70b64c96eb..ae9a0796a4b 100644
--- a/src/mongo/db/storage/mmap_v1/mmap.h
+++ b/src/mongo/db/storage/mmap_v1/mmap.h
@@ -38,225 +38,249 @@
namespace mongo {
#if !defined(_WIN32)
- typedef int HANDLE;
+typedef int HANDLE;
#endif
- extern const size_t g_minOSPageSizeBytes;
- void minOSPageSizeBytesTest(size_t minOSPageSizeBytes); // lame-o
+extern const size_t g_minOSPageSizeBytes;
+void minOSPageSizeBytesTest(size_t minOSPageSizeBytes); // lame-o
- // call this if syncing data fails
- void dataSyncFailedHandler();
+// call this if syncing data fails
+void dataSyncFailedHandler();
- class MAdvise {
- MONGO_DISALLOW_COPYING(MAdvise);
- public:
- enum Advice { Sequential=1 , Random=2 };
- MAdvise(void *p, unsigned len, Advice a);
- ~MAdvise(); // destructor resets the range to MADV_NORMAL
- private:
- void *_p;
- unsigned _len;
- };
+class MAdvise {
+ MONGO_DISALLOW_COPYING(MAdvise);
- // lock order: lock dbMutex before this if you lock both
- class LockMongoFilesShared {
- friend class LockMongoFilesExclusive;
- static RWLockRecursiveNongreedy mmmutex;
- static unsigned era;
- RWLockRecursive::Shared lk;
- public:
- LockMongoFilesShared() : lk(mmmutex) { }
+public:
+ enum Advice { Sequential = 1, Random = 2 };
+ MAdvise(void* p, unsigned len, Advice a);
+ ~MAdvise(); // destructor resets the range to MADV_NORMAL
+private:
+ void* _p;
+ unsigned _len;
+};
- /** era changes anytime memory maps come and go. thus you can use this as a cheap way to check
- if nothing has changed since the last time you locked. Of course you must be shared locked
- at the time of this call, otherwise someone could be in progress.
+// lock order: lock dbMutex before this if you lock both
+class LockMongoFilesShared {
+ friend class LockMongoFilesExclusive;
+ static RWLockRecursiveNongreedy mmmutex;
+ static unsigned era;
+ RWLockRecursive::Shared lk;
- This is used for yielding; see PageFaultException::touch().
- */
- static unsigned getEra() { return era; }
+public:
+ LockMongoFilesShared() : lk(mmmutex) {}
- static void assertExclusivelyLocked() { mmmutex.assertExclusivelyLocked(); }
- static void assertAtLeastReadLocked() { mmmutex.assertAtLeastReadLocked(); }
- };
+ /** era changes anytime memory maps come and go. thus you can use this as a cheap way to check
+ if nothing has changed since the last time you locked. Of course you must be shared locked
+ at the time of this call, otherwise someone could be in progress.
+
+ This is used for yielding; see PageFaultException::touch().
+ */
+ static unsigned getEra() {
+ return era;
+ }
+
+ static void assertExclusivelyLocked() {
+ mmmutex.assertExclusivelyLocked();
+ }
+ static void assertAtLeastReadLocked() {
+ mmmutex.assertAtLeastReadLocked();
+ }
+};
+
+class LockMongoFilesExclusive {
+ RWLockRecursive::Exclusive lk;
- class LockMongoFilesExclusive {
- RWLockRecursive::Exclusive lk;
+public:
+ LockMongoFilesExclusive() : lk(LockMongoFilesShared::mmmutex) {
+ LockMongoFilesShared::era++;
+ }
+};
+
+/* the administrative-ish stuff here */
+class MongoFile {
+ MONGO_DISALLOW_COPYING(MongoFile);
+
+public:
+ /** Flushable has to fail nicely if the underlying object gets killed */
+ class Flushable {
public:
- LockMongoFilesExclusive() : lk(LockMongoFilesShared::mmmutex) {
- LockMongoFilesShared::era++;
- }
+ virtual ~Flushable() {}
+ virtual void flush() = 0;
};
- /* the administrative-ish stuff here */
- class MongoFile {
- MONGO_DISALLOW_COPYING(MongoFile);
- public:
- /** Flushable has to fail nicely if the underlying object gets killed */
- class Flushable {
- public:
- virtual ~Flushable() {}
- virtual void flush() = 0;
- };
-
- MongoFile() {}
- virtual ~MongoFile() {}
-
- enum Options {
- SEQUENTIAL = 1, // hint - e.g. FILE_FLAG_SEQUENTIAL_SCAN on windows
- READONLY = 2 // not contractually guaranteed, but if specified the impl has option to fault writes
- };
-
- /** @param fun is called for each MongoFile.
- called from within a mutex that MongoFile uses. so be careful not to deadlock.
- */
- template < class F >
- static void forEach( F fun );
+ MongoFile() {}
+ virtual ~MongoFile() {}
- /** note: you need to be in mmmutex when using this. forEach (above) handles that for you automatically.
-*/
- static std::set<MongoFile*>& getAllFiles();
+ enum Options {
+ SEQUENTIAL = 1, // hint - e.g. FILE_FLAG_SEQUENTIAL_SCAN on windows
+ READONLY =
+ 2 // not contractually guaranteed, but if specified the impl has option to fault writes
+ };
- // callbacks if you need them
- static void (*notifyPreFlush)();
- static void (*notifyPostFlush)();
+ /** @param fun is called for each MongoFile.
+ called from within a mutex that MongoFile uses. so be careful not to deadlock.
+ */
+ template <class F>
+ static void forEach(F fun);
- static int flushAll( bool sync ); // returns n flushed
- static long long totalMappedLength();
- static void closeAllFiles( std::stringstream &message );
+ /** note: you need to be in mmmutex when using this. forEach (above) handles that for you automatically.
+*/
+ static std::set<MongoFile*>& getAllFiles();
- virtual bool isDurableMappedFile() { return false; }
+ // callbacks if you need them
+ static void (*notifyPreFlush)();
+ static void (*notifyPostFlush)();
- std::string filename() const { return _filename; }
- void setFilename(const std::string& fn);
+ static int flushAll(bool sync); // returns n flushed
+ static long long totalMappedLength();
+ static void closeAllFiles(std::stringstream& message);
- virtual uint64_t getUniqueId() const = 0;
+ virtual bool isDurableMappedFile() {
+ return false;
+ }
- private:
- std::string _filename;
- static int _flushAll( bool sync ); // returns n flushed
- protected:
- virtual void close() = 0;
- virtual void flush(bool sync) = 0;
- /**
- * returns a thread safe object that you can call flush on
- * Flushable has to fail nicely if the underlying object gets killed
- */
- virtual Flushable * prepareFlush() = 0;
+ std::string filename() const {
+ return _filename;
+ }
+ void setFilename(const std::string& fn);
+
+ virtual uint64_t getUniqueId() const = 0;
+
+private:
+ std::string _filename;
+ static int _flushAll(bool sync); // returns n flushed
+protected:
+ virtual void close() = 0;
+ virtual void flush(bool sync) = 0;
+ /**
+ * returns a thread safe object that you can call flush on
+ * Flushable has to fail nicely if the underlying object gets killed
+ */
+ virtual Flushable* prepareFlush() = 0;
+
+ void created(); /* subclass must call after create */
+
+ /* subclass must call in destructor (or at close).
+ removes this from pathToFile and other maps
+ safe to call more than once, albeit might be wasted work
+ ideal to call close to the close, if the close is well before object destruction
+ */
+ void destroyed();
- void created(); /* subclass must call after create */
+ virtual unsigned long long length() const = 0;
+};
- /* subclass must call in destructor (or at close).
- removes this from pathToFile and other maps
- safe to call more than once, albeit might be wasted work
- ideal to call close to the close, if the close is well before object destruction
- */
- void destroyed();
+/** look up a MMF by filename. scoped mutex locking convention.
+ example:
+ MMFFinderByName finder;
+ DurableMappedFile *a = finder.find("file_name_a");
+ DurableMappedFile *b = finder.find("file_name_b");
+*/
+class MongoFileFinder {
+ MONGO_DISALLOW_COPYING(MongoFileFinder);
- virtual unsigned long long length() const = 0;
- };
+public:
+ MongoFileFinder() {}
- /** look up a MMF by filename. scoped mutex locking convention.
- example:
- MMFFinderByName finder;
- DurableMappedFile *a = finder.find("file_name_a");
- DurableMappedFile *b = finder.find("file_name_b");
+ /** @return The MongoFile object associated with the specified file name. If no file is open
+ with the specified name, returns null.
*/
- class MongoFileFinder {
- MONGO_DISALLOW_COPYING(MongoFileFinder);
- public:
- MongoFileFinder() { }
+ MongoFile* findByPath(const std::string& path) const;
+
+private:
+ LockMongoFilesShared _lk;
+};
+
+class MemoryMappedFile : public MongoFile {
+protected:
+ virtual void* viewForFlushing() {
+ if (views.size() == 0)
+ return 0;
+ verify(views.size() == 1);
+ return views[0];
+ }
- /** @return The MongoFile object associated with the specified file name. If no file is open
- with the specified name, returns null.
- */
- MongoFile* findByPath(const std::string& path) const;
+public:
+ MemoryMappedFile();
- private:
- LockMongoFilesShared _lk;
- };
+ virtual ~MemoryMappedFile() {
+ LockMongoFilesExclusive lk;
+ close();
+ }
- class MemoryMappedFile : public MongoFile {
- protected:
- virtual void* viewForFlushing() {
- if( views.size() == 0 )
- return 0;
- verify( views.size() == 1 );
- return views[0];
- }
- public:
- MemoryMappedFile();
+ virtual void close();
- virtual ~MemoryMappedFile() {
- LockMongoFilesExclusive lk;
- close();
- }
+ // Throws exception if file doesn't exist. (dm may2010: not sure if this is always true?)
+ void* map(const char* filename);
- virtual void close();
+ /** @param options see MongoFile::Options
+ */
+ void* mapWithOptions(const char* filename, int options);
- // Throws exception if file doesn't exist. (dm may2010: not sure if this is always true?)
- void* map(const char *filename);
+ /* Creates with length if DNE, otherwise uses existing file length,
+ passed length.
+ @param options MongoFile::Options bits
+ */
+ void* map(const char* filename, unsigned long long& length, int options = 0);
- /** @param options see MongoFile::Options
- */
- void* mapWithOptions(const char *filename, int options);
+ /* Create. Must not exist.
+ @param zero fill file with zeros when true
+ */
+ void* create(const std::string& filename, unsigned long long len, bool zero);
- /* Creates with length if DNE, otherwise uses existing file length,
- passed length.
- @param options MongoFile::Options bits
- */
- void* map(const char *filename, unsigned long long &length, int options = 0 );
+ void flush(bool sync);
+ virtual Flushable* prepareFlush();
- /* Create. Must not exist.
- @param zero fill file with zeros when true
+ long shortLength() const {
+ return (long)len;
+ }
+ unsigned long long length() const {
+ return len;
+ }
+ HANDLE getFd() const {
+ return fd;
+ }
+ /** create a new view with the specified properties.
+ automatically cleaned up upon close/destruction of the MemoryMappedFile object.
*/
- void* create(const std::string& filename, unsigned long long len, bool zero);
-
- void flush(bool sync);
- virtual Flushable * prepareFlush();
-
- long shortLength() const { return (long) len; }
- unsigned long long length() const { return len; }
- HANDLE getFd() const { return fd; }
- /** create a new view with the specified properties.
- automatically cleaned up upon close/destruction of the MemoryMappedFile object.
- */
- void* createReadOnlyMap();
- void* createPrivateMap();
-
- virtual uint64_t getUniqueId() const { return _uniqueId; }
-
- private:
- static void updateLength( const char *filename, unsigned long long &length );
-
- HANDLE fd;
- HANDLE maphandle;
- std::vector<void *> views;
- unsigned long long len;
- const uint64_t _uniqueId;
-#ifdef _WIN32
- // flush Mutex
- //
- // Protects:
- // Prevent flush() and close() from concurrently running.
- // It ensures close() cannot complete while flush() is running
- // Lock Ordering:
- // LockMongoFilesShared must be taken before _flushMutex if both are taken
- stdx::mutex _flushMutex;
-#endif
+ void* createReadOnlyMap();
+ void* createPrivateMap();
- protected:
+ virtual uint64_t getUniqueId() const {
+ return _uniqueId;
+ }
- /** close the current private view and open a new replacement */
- void* remapPrivateView(void *oldPrivateAddr);
- };
+private:
+ static void updateLength(const char* filename, unsigned long long& length);
- /** p is called from within a mutex that MongoFile uses. so be careful not to deadlock. */
- template < class F >
- inline void MongoFile::forEach( F p ) {
- LockMongoFilesShared lklk;
- const std::set<MongoFile*>& mmfiles = MongoFile::getAllFiles();
- for ( std::set<MongoFile*>::const_iterator i = mmfiles.begin(); i != mmfiles.end(); i++ )
- p(*i);
- }
+ HANDLE fd;
+ HANDLE maphandle;
+ std::vector<void*> views;
+ unsigned long long len;
+ const uint64_t _uniqueId;
+#ifdef _WIN32
+ // flush Mutex
+ //
+ // Protects:
+ // Prevent flush() and close() from concurrently running.
+ // It ensures close() cannot complete while flush() is running
+ // Lock Ordering:
+ // LockMongoFilesShared must be taken before _flushMutex if both are taken
+ stdx::mutex _flushMutex;
+#endif
-} // namespace mongo
+protected:
+ /** close the current private view and open a new replacement */
+ void* remapPrivateView(void* oldPrivateAddr);
+};
+
+/** p is called from within a mutex that MongoFile uses. so be careful not to deadlock. */
+template <class F>
+inline void MongoFile::forEach(F p) {
+ LockMongoFilesShared lklk;
+ const std::set<MongoFile*>& mmfiles = MongoFile::getAllFiles();
+ for (std::set<MongoFile*>::const_iterator i = mmfiles.begin(); i != mmfiles.end(); i++)
+ p(*i);
+}
+
+} // namespace mongo
diff --git a/src/mongo/db/storage/mmap_v1/mmap_posix.cpp b/src/mongo/db/storage/mmap_v1/mmap_posix.cpp
index f7dffae468f..a673d3e5fde 100644
--- a/src/mongo/db/storage/mmap_v1/mmap_posix.cpp
+++ b/src/mongo/db/storage/mmap_v1/mmap_posix.cpp
@@ -53,38 +53,37 @@ using std::vector;
using namespace mongoutils;
namespace {
- mongo::AtomicUInt64 mmfNextId(0);
+mongo::AtomicUInt64 mmfNextId(0);
}
namespace mongo {
- static size_t fetchMinOSPageSizeBytes() {
- size_t minOSPageSizeBytes = sysconf( _SC_PAGESIZE );
- minOSPageSizeBytesTest(minOSPageSizeBytes);
- return minOSPageSizeBytes;
- }
- const size_t g_minOSPageSizeBytes = fetchMinOSPageSizeBytes();
-
-
-
- MemoryMappedFile::MemoryMappedFile() : _uniqueId(mmfNextId.fetchAndAdd(1)) {
- fd = 0;
- maphandle = 0;
- len = 0;
- created();
- }
+static size_t fetchMinOSPageSizeBytes() {
+ size_t minOSPageSizeBytes = sysconf(_SC_PAGESIZE);
+ minOSPageSizeBytesTest(minOSPageSizeBytes);
+ return minOSPageSizeBytes;
+}
+const size_t g_minOSPageSizeBytes = fetchMinOSPageSizeBytes();
- void MemoryMappedFile::close() {
- LockMongoFilesShared::assertExclusivelyLocked();
- for( vector<void*>::iterator i = views.begin(); i != views.end(); i++ ) {
- munmap(*i,len);
- }
- views.clear();
- if ( fd )
- ::close(fd);
- fd = 0;
- destroyed(); // cleans up from the master list of mmaps
+MemoryMappedFile::MemoryMappedFile() : _uniqueId(mmfNextId.fetchAndAdd(1)) {
+ fd = 0;
+ maphandle = 0;
+ len = 0;
+ created();
+}
+
+void MemoryMappedFile::close() {
+ LockMongoFilesShared::assertExclusivelyLocked();
+ for (vector<void*>::iterator i = views.begin(); i != views.end(); i++) {
+ munmap(*i, len);
}
+ views.clear();
+
+ if (fd)
+ ::close(fd);
+ fd = 0;
+ destroyed(); // cleans up from the master list of mmaps
+}
#ifndef O_NOATIME
#define O_NOATIME (0)
@@ -94,231 +93,234 @@ namespace mongo {
#define MAP_NORESERVE (0)
#endif
- namespace {
- void* _pageAlign( void* p ) {
- return (void*)((int64_t)p & ~(g_minOSPageSizeBytes-1));
+namespace {
+void* _pageAlign(void* p) {
+ return (void*)((int64_t)p & ~(g_minOSPageSizeBytes - 1));
+}
+
+class PageAlignTest : public StartupTest {
+public:
+ void run() {
+ {
+ int64_t x = g_minOSPageSizeBytes + 123;
+ void* y = _pageAlign(reinterpret_cast<void*>(x));
+ invariant(g_minOSPageSizeBytes == reinterpret_cast<size_t>(y));
}
+ {
+ int64_t a = static_cast<uint64_t>(numeric_limits<int>::max());
+ a = a / g_minOSPageSizeBytes;
+ a = a * g_minOSPageSizeBytes;
+ // a should now be page aligned
- class PageAlignTest : public StartupTest {
- public:
- void run() {
- {
- int64_t x = g_minOSPageSizeBytes + 123;
- void* y = _pageAlign( reinterpret_cast<void*>( x ) );
- invariant( g_minOSPageSizeBytes == reinterpret_cast<size_t>(y) );
- }
- {
- int64_t a = static_cast<uint64_t>( numeric_limits<int>::max() );
- a = a / g_minOSPageSizeBytes;
- a = a * g_minOSPageSizeBytes;
- // a should now be page aligned
-
- // b is not page aligned
- int64_t b = a + 123;
-
- void* y = _pageAlign( reinterpret_cast<void*>( b ) );
- invariant( a == reinterpret_cast<int64_t>(y) );
- }
+ // b is not page aligned
+ int64_t b = a + 123;
- }
- } pageAlignTest;
+ void* y = _pageAlign(reinterpret_cast<void*>(b));
+ invariant(a == reinterpret_cast<int64_t>(y));
+ }
}
+} pageAlignTest;
+}
#if defined(__sun)
- MAdvise::MAdvise(void *,unsigned, Advice) { }
- MAdvise::~MAdvise() { }
+MAdvise::MAdvise(void*, unsigned, Advice) {}
+MAdvise::~MAdvise() {}
#else
- MAdvise::MAdvise(void *p, unsigned len, Advice a) {
+MAdvise::MAdvise(void* p, unsigned len, Advice a) {
+ _p = _pageAlign(p);
- _p = _pageAlign( p );
+ _len = len + static_cast<unsigned>(reinterpret_cast<size_t>(p) - reinterpret_cast<size_t>(_p));
- _len = len + static_cast<unsigned>( reinterpret_cast<size_t>(p) -
- reinterpret_cast<size_t>(_p) );
-
- int advice = 0;
- switch ( a ) {
+ int advice = 0;
+ switch (a) {
case Sequential:
advice = MADV_SEQUENTIAL;
break;
case Random:
advice = MADV_RANDOM;
break;
- }
-
- if ( madvise(_p,_len,advice ) ) {
- error() << "madvise failed: " << errnoWithDescription();
- }
-
}
- MAdvise::~MAdvise() {
- madvise(_p,_len,MADV_NORMAL);
+
+ if (madvise(_p, _len, advice)) {
+ error() << "madvise failed: " << errnoWithDescription();
}
+}
+MAdvise::~MAdvise() {
+ madvise(_p, _len, MADV_NORMAL);
+}
#endif
- void* MemoryMappedFile::map(const char *filename, unsigned long long &length, int options) {
- // length may be updated by callee.
- setFilename(filename);
- FileAllocator::get()->allocateAsap( filename, length );
- len = length;
+void* MemoryMappedFile::map(const char* filename, unsigned long long& length, int options) {
+ // length may be updated by callee.
+ setFilename(filename);
+ FileAllocator::get()->allocateAsap(filename, length);
+ len = length;
- massert( 10446 , str::stream() << "mmap: can't map area of size 0 file: " << filename, length > 0 );
+ massert(
+ 10446, str::stream() << "mmap: can't map area of size 0 file: " << filename, length > 0);
- fd = open(filename, O_RDWR | O_NOATIME);
- if ( fd <= 0 ) {
- log() << "couldn't open " << filename << ' ' << errnoWithDescription() << endl;
- fd = 0; // our sentinel for not opened
- return 0;
- }
+ fd = open(filename, O_RDWR | O_NOATIME);
+ if (fd <= 0) {
+ log() << "couldn't open " << filename << ' ' << errnoWithDescription() << endl;
+ fd = 0; // our sentinel for not opened
+ return 0;
+ }
- unsigned long long filelen = lseek(fd, 0, SEEK_END);
- uassert(10447, str::stream() << "map file alloc failed, wanted: " << length << " filelen: " << filelen << ' ' << sizeof(size_t), filelen == length );
- lseek( fd, 0, SEEK_SET );
-
- void * view = mmap(NULL, length, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
- if ( view == MAP_FAILED ) {
- error() << " mmap() failed for " << filename << " len:" << length << " " << errnoWithDescription() << endl;
- if ( errno == ENOMEM ) {
- if( sizeof(void*) == 4 )
- error() << "mmap failed with out of memory. You are using a 32-bit build and probably need to upgrade to 64" << endl;
- else
- error() << "mmap failed with out of memory. (64 bit build)" << endl;
- }
- return 0;
+ unsigned long long filelen = lseek(fd, 0, SEEK_END);
+ uassert(10447,
+ str::stream() << "map file alloc failed, wanted: " << length << " filelen: " << filelen
+ << ' ' << sizeof(size_t),
+ filelen == length);
+ lseek(fd, 0, SEEK_SET);
+
+ void* view = mmap(NULL, length, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
+ if (view == MAP_FAILED) {
+ error() << " mmap() failed for " << filename << " len:" << length << " "
+ << errnoWithDescription() << endl;
+ if (errno == ENOMEM) {
+ if (sizeof(void*) == 4)
+ error() << "mmap failed with out of memory. You are using a 32-bit build and "
+ "probably need to upgrade to 64" << endl;
+ else
+ error() << "mmap failed with out of memory. (64 bit build)" << endl;
}
+ return 0;
+ }
#if defined(__sun)
#warning madvise not supported on solaris yet
#else
- if ( options & SEQUENTIAL ) {
- if ( madvise( view , length , MADV_SEQUENTIAL ) ) {
- warning() << "map: madvise failed for " << filename << ' ' << errnoWithDescription() << endl;
- }
+ if (options & SEQUENTIAL) {
+ if (madvise(view, length, MADV_SEQUENTIAL)) {
+ warning() << "map: madvise failed for " << filename << ' ' << errnoWithDescription()
+ << endl;
}
+ }
#endif
- views.push_back( view );
+ views.push_back(view);
- return view;
- }
+ return view;
+}
- void* MemoryMappedFile::createReadOnlyMap() {
- void * x = mmap( /*start*/0 , len , PROT_READ , MAP_SHARED , fd , 0 );
- if( x == MAP_FAILED ) {
- if ( errno == ENOMEM ) {
- if( sizeof(void*) == 4 )
- error() << "mmap ro failed with out of memory. You are using a 32-bit build and probably need to upgrade to 64" << endl;
- else
- error() << "mmap ro failed with out of memory. (64 bit build)" << endl;
- }
- return 0;
+void* MemoryMappedFile::createReadOnlyMap() {
+ void* x = mmap(/*start*/ 0, len, PROT_READ, MAP_SHARED, fd, 0);
+ if (x == MAP_FAILED) {
+ if (errno == ENOMEM) {
+ if (sizeof(void*) == 4)
+ error() << "mmap ro failed with out of memory. You are using a 32-bit build and "
+ "probably need to upgrade to 64" << endl;
+ else
+ error() << "mmap ro failed with out of memory. (64 bit build)" << endl;
}
- return x;
+ return 0;
}
+ return x;
+}
- void* MemoryMappedFile::createPrivateMap() {
- void * x = mmap( /*start*/0 , len , PROT_READ|PROT_WRITE , MAP_PRIVATE|MAP_NORESERVE , fd , 0 );
- if( x == MAP_FAILED ) {
- if ( errno == ENOMEM ) {
- if( sizeof(void*) == 4 ) {
- error() << "mmap private failed with out of memory. You are using a 32-bit build and probably need to upgrade to 64" << endl;
- }
- else {
- error() << "mmap private failed with out of memory. (64 bit build)" << endl;
- }
+void* MemoryMappedFile::createPrivateMap() {
+ void* x = mmap(/*start*/ 0, len, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_NORESERVE, fd, 0);
+ if (x == MAP_FAILED) {
+ if (errno == ENOMEM) {
+ if (sizeof(void*) == 4) {
+ error() << "mmap private failed with out of memory. You are using a 32-bit build "
+ "and probably need to upgrade to 64" << endl;
+ } else {
+ error() << "mmap private failed with out of memory. (64 bit build)" << endl;
}
- else {
- error() << "mmap private failed " << errnoWithDescription() << endl;
- }
- return 0;
+ } else {
+ error() << "mmap private failed " << errnoWithDescription() << endl;
}
-
- views.push_back(x);
- return x;
+ return 0;
}
- void* MemoryMappedFile::remapPrivateView(void *oldPrivateAddr) {
-#if defined(__sun) // SERVER-8795
- LockMongoFilesExclusive lockMongoFiles;
+ views.push_back(x);
+ return x;
+}
+
+void* MemoryMappedFile::remapPrivateView(void* oldPrivateAddr) {
+#if defined(__sun) // SERVER-8795
+ LockMongoFilesExclusive lockMongoFiles;
#endif
- // don't unmap, just mmap over the old region
- void * x = mmap( oldPrivateAddr, len , PROT_READ|PROT_WRITE , MAP_PRIVATE|MAP_NORESERVE|MAP_FIXED , fd , 0 );
- if( x == MAP_FAILED ) {
- int err = errno;
- error() << "13601 Couldn't remap private view: " << errnoWithDescription(err) << endl;
- log() << "aborting" << endl;
- printMemInfo();
- abort();
- }
- verify( x == oldPrivateAddr );
- return x;
+ // don't unmap, just mmap over the old region
+ void* x = mmap(oldPrivateAddr,
+ len,
+ PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_NORESERVE | MAP_FIXED,
+ fd,
+ 0);
+ if (x == MAP_FAILED) {
+ int err = errno;
+ error() << "13601 Couldn't remap private view: " << errnoWithDescription(err) << endl;
+ log() << "aborting" << endl;
+ printMemInfo();
+ abort();
}
+ verify(x == oldPrivateAddr);
+ return x;
+}
- void MemoryMappedFile::flush(bool sync) {
- if ( views.empty() || fd == 0 )
- return;
+void MemoryMappedFile::flush(bool sync) {
+ if (views.empty() || fd == 0)
+ return;
- bool useFsync = sync && !ProcessInfo::preferMsyncOverFSync();
+ bool useFsync = sync && !ProcessInfo::preferMsyncOverFSync();
- if ( useFsync ?
- fsync(fd) != 0 :
- msync(viewForFlushing(), len, sync ? MS_SYNC : MS_ASYNC) ) {
- // msync failed, this is very bad
- log() << (useFsync ? "fsync failed: " : "msync failed: ") << errnoWithDescription()
- << " file: " << filename() << endl;
- dataSyncFailedHandler();
- }
+ if (useFsync ? fsync(fd) != 0 : msync(viewForFlushing(), len, sync ? MS_SYNC : MS_ASYNC)) {
+ // msync failed, this is very bad
+ log() << (useFsync ? "fsync failed: " : "msync failed: ") << errnoWithDescription()
+ << " file: " << filename() << endl;
+ dataSyncFailedHandler();
}
+}
- class PosixFlushable : public MemoryMappedFile::Flushable {
- public:
- PosixFlushable( MemoryMappedFile* theFile, void* view , HANDLE fd , long len)
- : _theFile( theFile ), _view( view ), _fd(fd), _len(len), _id(_theFile->getUniqueId()) {
- }
-
- void flush() {
- if ( _view == NULL || _fd == 0 )
- return;
-
- if ( ProcessInfo::preferMsyncOverFSync() ?
- msync(_view, _len, MS_SYNC ) == 0 :
- fsync(_fd) == 0 ) {
- return;
- }
+class PosixFlushable : public MemoryMappedFile::Flushable {
+public:
+ PosixFlushable(MemoryMappedFile* theFile, void* view, HANDLE fd, long len)
+ : _theFile(theFile), _view(view), _fd(fd), _len(len), _id(_theFile->getUniqueId()) {}
- if ( errno == EBADF ) {
- // ok, we were unlocked, so this file was closed
- return;
- }
+ void flush() {
+ if (_view == NULL || _fd == 0)
+ return;
- // some error, lets see if we're supposed to exist
- LockMongoFilesShared mmfilesLock;
- std::set<MongoFile*> mmfs = MongoFile::getAllFiles();
- std::set<MongoFile*>::const_iterator it = mmfs.find(_theFile);
- if ( (it == mmfs.end()) || ((*it)->getUniqueId() != _id) ) {
- log() << "msync failed with: " << errnoWithDescription()
- << " but file doesn't exist anymore, so ignoring";
- // this was deleted while we were unlocked
- return;
- }
+ if (ProcessInfo::preferMsyncOverFSync() ? msync(_view, _len, MS_SYNC) == 0
+ : fsync(_fd) == 0) {
+ return;
+ }
- // we got an error, and we still exist, so this is bad, we fail
- log() << "msync " << errnoWithDescription() << endl;
- dataSyncFailedHandler();
+ if (errno == EBADF) {
+ // ok, we were unlocked, so this file was closed
+ return;
}
- MemoryMappedFile* _theFile;
- void * _view;
- HANDLE _fd;
- long _len;
- const uint64_t _id;
- };
+ // some error, lets see if we're supposed to exist
+ LockMongoFilesShared mmfilesLock;
+ std::set<MongoFile*> mmfs = MongoFile::getAllFiles();
+ std::set<MongoFile*>::const_iterator it = mmfs.find(_theFile);
+ if ((it == mmfs.end()) || ((*it)->getUniqueId() != _id)) {
+ log() << "msync failed with: " << errnoWithDescription()
+ << " but file doesn't exist anymore, so ignoring";
+ // this was deleted while we were unlocked
+ return;
+ }
- MemoryMappedFile::Flushable * MemoryMappedFile::prepareFlush() {
- return new PosixFlushable( this, viewForFlushing(), fd, len);
+ // we got an error, and we still exist, so this is bad, we fail
+ log() << "msync " << errnoWithDescription() << endl;
+ dataSyncFailedHandler();
}
+ MemoryMappedFile* _theFile;
+ void* _view;
+ HANDLE _fd;
+ long _len;
+ const uint64_t _id;
+};
+
+MemoryMappedFile::Flushable* MemoryMappedFile::prepareFlush() {
+ return new PosixFlushable(this, viewForFlushing(), fd, len);
+}
-} // namespace mongo
+} // namespace mongo
diff --git a/src/mongo/db/storage/mmap_v1/mmap_v1_database_catalog_entry.cpp b/src/mongo/db/storage/mmap_v1/mmap_v1_database_catalog_entry.cpp
index 6cc9d9cef73..8cdbd4ad7a4 100644
--- a/src/mongo/db/storage/mmap_v1/mmap_v1_database_catalog_entry.cpp
+++ b/src/mongo/db/storage/mmap_v1/mmap_v1_database_catalog_entry.cpp
@@ -55,844 +55,782 @@
namespace mongo {
- using std::unique_ptr;
+using std::unique_ptr;
namespace {
- /**
- * Declaration for the "newCollectionsUsePowerOf2Sizes" server parameter, which is now
- * deprecated in 3.0.
- * Note that:
- * - setting to true performs a no-op.
- * - setting to false will fail.
- */
- class NewCollectionsUsePowerOf2SizesParameter : public ExportedServerParameter<bool> {
- public:
- NewCollectionsUsePowerOf2SizesParameter() :
- ExportedServerParameter<bool>(ServerParameterSet::getGlobal(),
- "newCollectionsUsePowerOf2Sizes",
- &newCollectionsUsePowerOf2SizesFlag,
- true,
- true),
- newCollectionsUsePowerOf2SizesFlag(true) {
-
+/**
+ * Declaration for the "newCollectionsUsePowerOf2Sizes" server parameter, which is now
+ * deprecated in 3.0.
+ * Note that:
+ * - setting to true performs a no-op.
+ * - setting to false will fail.
+ */
+class NewCollectionsUsePowerOf2SizesParameter : public ExportedServerParameter<bool> {
+public:
+ NewCollectionsUsePowerOf2SizesParameter()
+ : ExportedServerParameter<bool>(ServerParameterSet::getGlobal(),
+ "newCollectionsUsePowerOf2Sizes",
+ &newCollectionsUsePowerOf2SizesFlag,
+ true,
+ true),
+ newCollectionsUsePowerOf2SizesFlag(true) {}
+
+ virtual Status validate(const bool& potentialNewValue) {
+ if (!potentialNewValue) {
+ return Status(ErrorCodes::BadValue,
+ "newCollectionsUsePowerOf2Sizes cannot be set to false. "
+ "Use noPadding instead during createCollection.");
}
- virtual Status validate(const bool& potentialNewValue) {
- if (!potentialNewValue) {
- return Status(ErrorCodes::BadValue,
- "newCollectionsUsePowerOf2Sizes cannot be set to false. "
- "Use noPadding instead during createCollection.");
- }
+ return Status::OK();
+ }
- return Status::OK();
- }
+private:
+ // Unused, needed for server parameter.
+ bool newCollectionsUsePowerOf2SizesFlag;
- private:
- // Unused, needed for server parameter.
- bool newCollectionsUsePowerOf2SizesFlag;
+} exportedNewCollectionsUsePowerOf2SizesParameter;
- } exportedNewCollectionsUsePowerOf2SizesParameter;
+int _massageExtentSize(const ExtentManager* em, long long size) {
+ if (size < em->minSize())
+ return em->minSize();
+ if (size > em->maxSize())
+ return em->maxSize();
- int _massageExtentSize(const ExtentManager* em, long long size) {
- if (size < em->minSize())
- return em->minSize();
- if (size > em->maxSize())
- return em->maxSize();
+ return static_cast<int>(size);
+}
- return static_cast<int>(size);
- }
+} // namespace
-} // namespace
+/**
+ * Registers the insertion of a new entry in the _collections cache with the RecoveryUnit,
+ * allowing for rollback.
+ */
+class MMAPV1DatabaseCatalogEntry::EntryInsertion : public RecoveryUnit::Change {
+public:
+ EntryInsertion(StringData ns, MMAPV1DatabaseCatalogEntry* entry)
+ : _ns(ns.toString()), _entry(entry) {}
- /**
- * Registers the insertion of a new entry in the _collections cache with the RecoveryUnit,
- * allowing for rollback.
- */
- class MMAPV1DatabaseCatalogEntry::EntryInsertion : public RecoveryUnit::Change {
- public:
- EntryInsertion(StringData ns, MMAPV1DatabaseCatalogEntry* entry)
- : _ns(ns.toString()), _entry(entry) { }
+ void rollback() {
+ _entry->_removeFromCache(NULL, _ns);
+ }
- void rollback() {
- _entry->_removeFromCache(NULL, _ns);
- }
+ void commit() {}
- void commit() { }
- private:
- const std::string _ns;
- MMAPV1DatabaseCatalogEntry* const _entry;
- };
-
- /**
- * Registers the removal of an entry from the _collections cache with the RecoveryUnit,
- * delaying actual deletion of the information until the change is commited. This allows
- * for easy rollback.
- */
- class MMAPV1DatabaseCatalogEntry::EntryRemoval : public RecoveryUnit::Change {
- public:
- // Rollback removing the collection from the cache. Takes ownership of the cachedEntry,
- // and will delete it if removal is final.
- EntryRemoval(StringData ns,
- MMAPV1DatabaseCatalogEntry* catalogEntry,
- Entry *cachedEntry)
- : _ns(ns.toString()), _catalogEntry(catalogEntry), _cachedEntry(cachedEntry) { }
-
- void rollback() {
- _catalogEntry->_collections[_ns] = _cachedEntry;
- }
+private:
+ const std::string _ns;
+ MMAPV1DatabaseCatalogEntry* const _entry;
+};
- void commit() {
- delete _cachedEntry;
- }
+/**
+ * Registers the removal of an entry from the _collections cache with the RecoveryUnit,
+ * delaying actual deletion of the information until the change is commited. This allows
+ * for easy rollback.
+ */
+class MMAPV1DatabaseCatalogEntry::EntryRemoval : public RecoveryUnit::Change {
+public:
+ // Rollback removing the collection from the cache. Takes ownership of the cachedEntry,
+ // and will delete it if removal is final.
+ EntryRemoval(StringData ns, MMAPV1DatabaseCatalogEntry* catalogEntry, Entry* cachedEntry)
+ : _ns(ns.toString()), _catalogEntry(catalogEntry), _cachedEntry(cachedEntry) {}
+
+ void rollback() {
+ _catalogEntry->_collections[_ns] = _cachedEntry;
+ }
- private:
- const std::string _ns;
- MMAPV1DatabaseCatalogEntry* const _catalogEntry;
- Entry* const _cachedEntry;
- };
-
- MMAPV1DatabaseCatalogEntry::MMAPV1DatabaseCatalogEntry( OperationContext* txn,
- StringData name,
- StringData path,
- bool directoryPerDB,
- bool transient )
- : DatabaseCatalogEntry( name ),
- _path( path.toString() ),
- _namespaceIndex(_path, name.toString()),
- _extentManager(name, path, directoryPerDB) {
-
- invariant(txn->lockState()->isDbLockedForMode(name, MODE_X));
-
- try {
- // First init the .ns file. If this fails, we may leak the .ns file, but this is OK
- // because subsequent openDB will go through this code path again.
- _namespaceIndex.init(txn);
-
- // Initialize the extent manager. This will create the first data file (.0) if needed
- // and if this fails we would leak the .ns file above. Leaking the .ns or .0 file is
- // acceptable, because subsequent openDB calls will exercise the code path again.
- Status s = _extentManager.init(txn);
- if (!s.isOK()) {
- msgasserted(16966, str::stream() << "_extentManager.init failed: " << s.toString());
- }
+ void commit() {
+ delete _cachedEntry;
+ }
- // This is the actual loading of the on-disk structures into cache.
- _init( txn );
- }
- catch (const DBException& dbe) {
- warning() << "database " << path << " " << name
- << " could not be opened due to DBException " << dbe.getCode() << ": "
- << dbe.what();
- throw;
- }
- catch (const std::exception& e) {
- warning() << "database " << path << " " << name
- << " could not be opened " << e.what();
- throw;
- }
+private:
+ const std::string _ns;
+ MMAPV1DatabaseCatalogEntry* const _catalogEntry;
+ Entry* const _cachedEntry;
+};
+
+MMAPV1DatabaseCatalogEntry::MMAPV1DatabaseCatalogEntry(
+ OperationContext* txn, StringData name, StringData path, bool directoryPerDB, bool transient)
+ : DatabaseCatalogEntry(name),
+ _path(path.toString()),
+ _namespaceIndex(_path, name.toString()),
+ _extentManager(name, path, directoryPerDB) {
+ invariant(txn->lockState()->isDbLockedForMode(name, MODE_X));
+
+ try {
+ // First init the .ns file. If this fails, we may leak the .ns file, but this is OK
+ // because subsequent openDB will go through this code path again.
+ _namespaceIndex.init(txn);
+
+ // Initialize the extent manager. This will create the first data file (.0) if needed
+ // and if this fails we would leak the .ns file above. Leaking the .ns or .0 file is
+ // acceptable, because subsequent openDB calls will exercise the code path again.
+ Status s = _extentManager.init(txn);
+ if (!s.isOK()) {
+ msgasserted(16966, str::stream() << "_extentManager.init failed: " << s.toString());
+ }
+
+ // This is the actual loading of the on-disk structures into cache.
+ _init(txn);
+ } catch (const DBException& dbe) {
+ warning() << "database " << path << " " << name
+ << " could not be opened due to DBException " << dbe.getCode() << ": "
+ << dbe.what();
+ throw;
+ } catch (const std::exception& e) {
+ warning() << "database " << path << " " << name << " could not be opened " << e.what();
+ throw;
}
+}
- MMAPV1DatabaseCatalogEntry::~MMAPV1DatabaseCatalogEntry() {
- for ( CollectionMap::const_iterator i = _collections.begin();
- i != _collections.end();
- ++i ) {
- delete i->second;
- }
- _collections.clear();
+MMAPV1DatabaseCatalogEntry::~MMAPV1DatabaseCatalogEntry() {
+ for (CollectionMap::const_iterator i = _collections.begin(); i != _collections.end(); ++i) {
+ delete i->second;
}
+ _collections.clear();
+}
- intmax_t dbSize( const string& database ); // from repair_database.cpp
+intmax_t dbSize(const string& database); // from repair_database.cpp
- int64_t MMAPV1DatabaseCatalogEntry::sizeOnDisk( OperationContext* opCtx ) const {
- return static_cast<int64_t>( dbSize( name() ) );
+int64_t MMAPV1DatabaseCatalogEntry::sizeOnDisk(OperationContext* opCtx) const {
+ return static_cast<int64_t>(dbSize(name()));
+}
+
+void MMAPV1DatabaseCatalogEntry::_removeFromCache(RecoveryUnit* ru, StringData ns) {
+ CollectionMap::iterator i = _collections.find(ns.toString());
+ if (i == _collections.end()) {
+ return;
}
- void MMAPV1DatabaseCatalogEntry::_removeFromCache(RecoveryUnit* ru,
- StringData ns) {
- CollectionMap::iterator i = _collections.find(ns.toString());
- if (i == _collections.end()) {
- return;
- }
+ // If there is an operation context, register a rollback to restore the cache entry
+ if (ru) {
+ ru->registerChange(new EntryRemoval(ns, this, i->second));
+ } else {
+ delete i->second;
+ }
+ _collections.erase(i);
+}
- // If there is an operation context, register a rollback to restore the cache entry
- if (ru) {
- ru->registerChange(new EntryRemoval(ns, this, i->second));
- }
- else {
- delete i->second;
- }
- _collections.erase(i);
+Status MMAPV1DatabaseCatalogEntry::dropCollection(OperationContext* txn, StringData ns) {
+ invariant(txn->lockState()->isCollectionLockedForMode(ns, MODE_X));
+ _removeFromCache(txn->recoveryUnit(), ns);
+
+ NamespaceDetails* details = _namespaceIndex.details(ns);
+
+ if (!details) {
+ return Status(ErrorCodes::NamespaceNotFound, str::stream() << "ns not found: " << ns);
}
- Status MMAPV1DatabaseCatalogEntry::dropCollection(OperationContext* txn, StringData ns) {
- invariant(txn->lockState()->isCollectionLockedForMode(ns, MODE_X));
- _removeFromCache(txn->recoveryUnit(), ns);
+ invariant(details->nIndexes == 0); // TODO: delete instead?
+ invariant(details->indexBuildsInProgress == 0); // TODO: delete instead?
- NamespaceDetails* details = _namespaceIndex.details( ns );
+ _removeNamespaceFromNamespaceCollection(txn, ns);
- if ( !details ) {
- return Status( ErrorCodes::NamespaceNotFound, str::stream() << "ns not found: " << ns );
- }
+ // free extents
+ if (!details->firstExtent.isNull()) {
+ _extentManager.freeExtents(txn, details->firstExtent, details->lastExtent);
+ *txn->recoveryUnit()->writing(&details->firstExtent) = DiskLoc().setInvalid();
+ *txn->recoveryUnit()->writing(&details->lastExtent) = DiskLoc().setInvalid();
+ }
- invariant( details->nIndexes == 0 ); // TODO: delete instead?
- invariant( details->indexBuildsInProgress == 0 ); // TODO: delete instead?
+ // remove from the catalog hashtable
+ _namespaceIndex.kill_ns(txn, ns);
- _removeNamespaceFromNamespaceCollection( txn, ns );
+ return Status::OK();
+}
- // free extents
- if( !details->firstExtent.isNull() ) {
- _extentManager.freeExtents(txn, details->firstExtent, details->lastExtent);
- *txn->recoveryUnit()->writing( &details->firstExtent ) = DiskLoc().setInvalid();
- *txn->recoveryUnit()->writing( &details->lastExtent ) = DiskLoc().setInvalid();
- }
- // remove from the catalog hashtable
- _namespaceIndex.kill_ns( txn, ns );
+Status MMAPV1DatabaseCatalogEntry::renameCollection(OperationContext* txn,
+ StringData fromNS,
+ StringData toNS,
+ bool stayTemp) {
+ Status s = _renameSingleNamespace(txn, fromNS, toNS, stayTemp);
+ if (!s.isOK())
+ return s;
- return Status::OK();
- }
+ NamespaceDetails* details = _namespaceIndex.details(toNS);
+ invariant(details);
+ RecordStoreV1Base* systemIndexRecordStore = _getIndexRecordStore();
+ auto cursor = systemIndexRecordStore->getCursor(txn);
+ while (auto record = cursor->next()) {
+ BSONObj oldIndexSpec = record->data.releaseToBson();
+ if (fromNS != oldIndexSpec["ns"].valuestrsafe())
+ continue;
- Status MMAPV1DatabaseCatalogEntry::renameCollection( OperationContext* txn,
- StringData fromNS,
- StringData toNS,
- bool stayTemp ) {
- Status s = _renameSingleNamespace( txn, fromNS, toNS, stayTemp );
- if ( !s.isOK() )
- return s;
-
- NamespaceDetails* details = _namespaceIndex.details( toNS );
- invariant( details );
-
- RecordStoreV1Base* systemIndexRecordStore = _getIndexRecordStore();
- auto cursor = systemIndexRecordStore->getCursor(txn);
- while (auto record = cursor->next()) {
- BSONObj oldIndexSpec = record->data.releaseToBson();
- if ( fromNS != oldIndexSpec["ns"].valuestrsafe() )
- continue;
-
- BSONObj newIndexSpec;
- {
- BSONObjBuilder b;
- BSONObjIterator i( oldIndexSpec );
- while( i.more() ) {
- BSONElement e = i.next();
- if ( strcmp( e.fieldName(), "ns" ) != 0 )
- b.append( e );
- else
- b << "ns" << toNS;
- }
- newIndexSpec = b.obj();
+ BSONObj newIndexSpec;
+ {
+ BSONObjBuilder b;
+ BSONObjIterator i(oldIndexSpec);
+ while (i.more()) {
+ BSONElement e = i.next();
+ if (strcmp(e.fieldName(), "ns") != 0)
+ b.append(e);
+ else
+ b << "ns" << toNS;
}
+ newIndexSpec = b.obj();
+ }
- StatusWith<RecordId> newIndexSpecLoc =
- systemIndexRecordStore->insertRecord( txn,
- newIndexSpec.objdata(),
- newIndexSpec.objsize(),
- false );
- if ( !newIndexSpecLoc.isOK() )
- return newIndexSpecLoc.getStatus();
-
- const string& indexName = oldIndexSpec.getStringField( "name" );
-
- {
- // fix IndexDetails pointer
- NamespaceDetailsCollectionCatalogEntry ce( toNS,
- details,
- _getNamespaceRecordStore(),
- systemIndexRecordStore,
- this );
- int indexI = ce._findIndexNumber( txn, indexName );
-
- IndexDetails& indexDetails = details->idx(indexI);
- *txn->recoveryUnit()->writing(&indexDetails.info) =
- DiskLoc::fromRecordId(newIndexSpecLoc.getValue());
- }
+ StatusWith<RecordId> newIndexSpecLoc = systemIndexRecordStore->insertRecord(
+ txn, newIndexSpec.objdata(), newIndexSpec.objsize(), false);
+ if (!newIndexSpecLoc.isOK())
+ return newIndexSpecLoc.getStatus();
- {
- // move underlying namespac
- string oldIndexNs = IndexDescriptor::makeIndexNamespace( fromNS, indexName );
- string newIndexNs = IndexDescriptor::makeIndexNamespace( toNS, indexName );
+ const string& indexName = oldIndexSpec.getStringField("name");
- Status s = _renameSingleNamespace( txn, oldIndexNs, newIndexNs, false );
- if ( !s.isOK() )
- return s;
- }
+ {
+ // fix IndexDetails pointer
+ NamespaceDetailsCollectionCatalogEntry ce(
+ toNS, details, _getNamespaceRecordStore(), systemIndexRecordStore, this);
+ int indexI = ce._findIndexNumber(txn, indexName);
- systemIndexRecordStore->deleteRecord( txn, record->id );
+ IndexDetails& indexDetails = details->idx(indexI);
+ *txn->recoveryUnit()->writing(&indexDetails.info) =
+ DiskLoc::fromRecordId(newIndexSpecLoc.getValue());
}
- return Status::OK();
+ {
+ // move underlying namespac
+ string oldIndexNs = IndexDescriptor::makeIndexNamespace(fromNS, indexName);
+ string newIndexNs = IndexDescriptor::makeIndexNamespace(toNS, indexName);
+
+ Status s = _renameSingleNamespace(txn, oldIndexNs, newIndexNs, false);
+ if (!s.isOK())
+ return s;
+ }
+
+ systemIndexRecordStore->deleteRecord(txn, record->id);
}
- Status MMAPV1DatabaseCatalogEntry::_renameSingleNamespace( OperationContext* txn,
- StringData fromNS,
- StringData toNS,
- bool stayTemp ) {
- // some sanity checking
- NamespaceDetails* fromDetails = _namespaceIndex.details( fromNS );
- if ( !fromDetails )
- return Status( ErrorCodes::BadValue, "from namespace doesn't exist" );
+ return Status::OK();
+}
- if ( _namespaceIndex.details( toNS ) )
- return Status( ErrorCodes::BadValue, "to namespace already exists" );
+Status MMAPV1DatabaseCatalogEntry::_renameSingleNamespace(OperationContext* txn,
+ StringData fromNS,
+ StringData toNS,
+ bool stayTemp) {
+ // some sanity checking
+ NamespaceDetails* fromDetails = _namespaceIndex.details(fromNS);
+ if (!fromDetails)
+ return Status(ErrorCodes::BadValue, "from namespace doesn't exist");
- _removeFromCache(txn->recoveryUnit(), fromNS);
+ if (_namespaceIndex.details(toNS))
+ return Status(ErrorCodes::BadValue, "to namespace already exists");
- // at this point, we haven't done anything destructive yet
+ _removeFromCache(txn->recoveryUnit(), fromNS);
- // ----
- // actually start moving
- // ----
+ // at this point, we haven't done anything destructive yet
- // this could throw, but if it does we're ok
- _namespaceIndex.add_ns( txn, toNS, fromDetails );
- NamespaceDetails* toDetails = _namespaceIndex.details( toNS );
+ // ----
+ // actually start moving
+ // ----
- try {
- toDetails->copyingFrom(txn,
- toNS,
- _namespaceIndex,
- fromDetails); // fixes extraOffset
- }
- catch( DBException& ) {
- // could end up here if .ns is full - if so try to clean up / roll back a little
- _namespaceIndex.kill_ns( txn, toNS );
- throw;
- }
+ // this could throw, but if it does we're ok
+ _namespaceIndex.add_ns(txn, toNS, fromDetails);
+ NamespaceDetails* toDetails = _namespaceIndex.details(toNS);
- // at this point, code .ns stuff moved
+ try {
+ toDetails->copyingFrom(txn, toNS, _namespaceIndex, fromDetails); // fixes extraOffset
+ } catch (DBException&) {
+ // could end up here if .ns is full - if so try to clean up / roll back a little
+ _namespaceIndex.kill_ns(txn, toNS);
+ throw;
+ }
- _namespaceIndex.kill_ns( txn, fromNS );
- fromDetails = NULL;
+ // at this point, code .ns stuff moved
- // fix system.namespaces
- BSONObj newSpec;
- RecordId oldSpecLocation;
- {
+ _namespaceIndex.kill_ns(txn, fromNS);
+ fromDetails = NULL;
- BSONObj oldSpec;
- {
- RecordStoreV1Base* rs = _getNamespaceRecordStore();
- auto cursor = rs->getCursor(txn);
- while (auto record = cursor->next()) {
- BSONObj entry = record->data.releaseToBson();
- if ( fromNS == entry["name"].String() ) {
- oldSpecLocation = record->id;
- oldSpec = entry.getOwned();
- break;
- }
+ // fix system.namespaces
+ BSONObj newSpec;
+ RecordId oldSpecLocation;
+ {
+ BSONObj oldSpec;
+ {
+ RecordStoreV1Base* rs = _getNamespaceRecordStore();
+ auto cursor = rs->getCursor(txn);
+ while (auto record = cursor->next()) {
+ BSONObj entry = record->data.releaseToBson();
+ if (fromNS == entry["name"].String()) {
+ oldSpecLocation = record->id;
+ oldSpec = entry.getOwned();
+ break;
}
}
- invariant( !oldSpec.isEmpty() );
- invariant( !oldSpecLocation.isNull() );
+ }
+ invariant(!oldSpec.isEmpty());
+ invariant(!oldSpecLocation.isNull());
- BSONObjBuilder b;
- BSONObjIterator i( oldSpec.getObjectField( "options" ) );
- while( i.more() ) {
- BSONElement e = i.next();
- if ( strcmp( e.fieldName(), "create" ) != 0 ) {
- if (stayTemp || (strcmp(e.fieldName(), "temp") != 0))
- b.append( e );
- }
- else {
- b << "create" << toNS;
- }
+ BSONObjBuilder b;
+ BSONObjIterator i(oldSpec.getObjectField("options"));
+ while (i.more()) {
+ BSONElement e = i.next();
+ if (strcmp(e.fieldName(), "create") != 0) {
+ if (stayTemp || (strcmp(e.fieldName(), "temp") != 0))
+ b.append(e);
+ } else {
+ b << "create" << toNS;
}
- newSpec = b.obj();
}
+ newSpec = b.obj();
+ }
- _addNamespaceToNamespaceCollection( txn, toNS, newSpec.isEmpty() ? 0 : &newSpec );
-
- _getNamespaceRecordStore()->deleteRecord( txn, oldSpecLocation );
+ _addNamespaceToNamespaceCollection(txn, toNS, newSpec.isEmpty() ? 0 : &newSpec);
- Entry*& entry = _collections[toNS.toString()];
- invariant( entry == NULL );
- txn->recoveryUnit()->registerChange(new EntryInsertion(toNS, this));
- entry = new Entry();
- _insertInCache(txn, toNS, entry);
+ _getNamespaceRecordStore()->deleteRecord(txn, oldSpecLocation);
- return Status::OK();
- }
+ Entry*& entry = _collections[toNS.toString()];
+ invariant(entry == NULL);
+ txn->recoveryUnit()->registerChange(new EntryInsertion(toNS, this));
+ entry = new Entry();
+ _insertInCache(txn, toNS, entry);
- void MMAPV1DatabaseCatalogEntry::appendExtraStats( OperationContext* opCtx,
- BSONObjBuilder* output,
- double scale ) const {
- if ( isEmpty() ) {
- output->appendNumber( "fileSize", 0 );
- }
- else {
- output->appendNumber( "fileSize", _extentManager.fileSize() / scale );
- output->appendNumber( "nsSizeMB", static_cast<int>( _namespaceIndex.fileLength() /
- ( 1024 * 1024 ) ) );
+ return Status::OK();
+}
- int freeListSize = 0;
- int64_t freeListSpace = 0;
- _extentManager.freeListStats(opCtx, &freeListSize, &freeListSpace);
+void MMAPV1DatabaseCatalogEntry::appendExtraStats(OperationContext* opCtx,
+ BSONObjBuilder* output,
+ double scale) const {
+ if (isEmpty()) {
+ output->appendNumber("fileSize", 0);
+ } else {
+ output->appendNumber("fileSize", _extentManager.fileSize() / scale);
+ output->appendNumber("nsSizeMB",
+ static_cast<int>(_namespaceIndex.fileLength() / (1024 * 1024)));
- BSONObjBuilder extentFreeList( output->subobjStart( "extentFreeList" ) );
- extentFreeList.append( "num", freeListSize );
- extentFreeList.appendNumber( "totalSize",
- static_cast<long long>( freeListSpace / scale ) );
- extentFreeList.done();
+ int freeListSize = 0;
+ int64_t freeListSpace = 0;
+ _extentManager.freeListStats(opCtx, &freeListSize, &freeListSpace);
- {
+ BSONObjBuilder extentFreeList(output->subobjStart("extentFreeList"));
+ extentFreeList.append("num", freeListSize);
+ extentFreeList.appendNumber("totalSize", static_cast<long long>(freeListSpace / scale));
+ extentFreeList.done();
- const DataFileVersion version = _extentManager.getFileFormat(opCtx);
+ {
+ const DataFileVersion version = _extentManager.getFileFormat(opCtx);
- BSONObjBuilder dataFileVersion( output->subobjStart( "dataFileVersion" ) );
- dataFileVersion.append( "major", version.majorRaw() );
- dataFileVersion.append( "minor", version.minorRaw() );
- dataFileVersion.done();
- }
+ BSONObjBuilder dataFileVersion(output->subobjStart("dataFileVersion"));
+ dataFileVersion.append("major", version.majorRaw());
+ dataFileVersion.append("minor", version.minorRaw());
+ dataFileVersion.done();
}
-
}
+}
- bool MMAPV1DatabaseCatalogEntry::isOlderThan24( OperationContext* opCtx ) const {
- if ( _extentManager.numFiles() == 0 )
- return false;
+bool MMAPV1DatabaseCatalogEntry::isOlderThan24(OperationContext* opCtx) const {
+ if (_extentManager.numFiles() == 0)
+ return false;
- const DataFileVersion version = _extentManager.getFileFormat(opCtx);
+ const DataFileVersion version = _extentManager.getFileFormat(opCtx);
- invariant(version.isCompatibleWithCurrentCode());
+ invariant(version.isCompatibleWithCurrentCode());
- return !version.is24IndexClean();
- }
+ return !version.is24IndexClean();
+}
- void MMAPV1DatabaseCatalogEntry::markIndexSafe24AndUp( OperationContext* opCtx ) {
- if ( _extentManager.numFiles() == 0 )
- return;
+void MMAPV1DatabaseCatalogEntry::markIndexSafe24AndUp(OperationContext* opCtx) {
+ if (_extentManager.numFiles() == 0)
+ return;
- DataFileVersion version = _extentManager.getFileFormat(opCtx);
+ DataFileVersion version = _extentManager.getFileFormat(opCtx);
- invariant(version.isCompatibleWithCurrentCode());
+ invariant(version.isCompatibleWithCurrentCode());
- if (version.is24IndexClean())
- return; // nothing to do
+ if (version.is24IndexClean())
+ return; // nothing to do
- version.setIs24IndexClean();
- _extentManager.setFileFormat(opCtx, version);
- }
+ version.setIs24IndexClean();
+ _extentManager.setFileFormat(opCtx, version);
+}
- bool MMAPV1DatabaseCatalogEntry::currentFilesCompatible( OperationContext* opCtx ) const {
- if ( _extentManager.numFiles() == 0 )
- return true;
+bool MMAPV1DatabaseCatalogEntry::currentFilesCompatible(OperationContext* opCtx) const {
+ if (_extentManager.numFiles() == 0)
+ return true;
- return _extentManager.getOpenFile( 0 )->getHeader()->version.isCompatibleWithCurrentCode();
- }
+ return _extentManager.getOpenFile(0)->getHeader()->version.isCompatibleWithCurrentCode();
+}
- void MMAPV1DatabaseCatalogEntry::getCollectionNamespaces( std::list<std::string>* tofill ) const {
- _namespaceIndex.getCollectionNamespaces( tofill );
- }
+void MMAPV1DatabaseCatalogEntry::getCollectionNamespaces(std::list<std::string>* tofill) const {
+ _namespaceIndex.getCollectionNamespaces(tofill);
+}
- void MMAPV1DatabaseCatalogEntry::_ensureSystemCollection(OperationContext* txn,
- StringData ns) {
-
- NamespaceDetails* details = _namespaceIndex.details(ns);
- if (details) {
- return;
- }
- _namespaceIndex.add_ns( txn, ns, DiskLoc(), false );
+void MMAPV1DatabaseCatalogEntry::_ensureSystemCollection(OperationContext* txn, StringData ns) {
+ NamespaceDetails* details = _namespaceIndex.details(ns);
+ if (details) {
+ return;
}
+ _namespaceIndex.add_ns(txn, ns, DiskLoc(), false);
+}
- void MMAPV1DatabaseCatalogEntry::_init(OperationContext* txn) {
- WriteUnitOfWork wunit(txn);
+void MMAPV1DatabaseCatalogEntry::_init(OperationContext* txn) {
+ WriteUnitOfWork wunit(txn);
- // Upgrade freelist
- const NamespaceString oldFreeList(name(), "$freelist");
- NamespaceDetails* freeListDetails = _namespaceIndex.details(oldFreeList.ns());
- if (freeListDetails) {
- if (!freeListDetails->firstExtent.isNull()) {
- _extentManager.freeExtents(txn,
- freeListDetails->firstExtent,
- freeListDetails->lastExtent);
- }
-
- _namespaceIndex.kill_ns(txn, oldFreeList.ns());
+ // Upgrade freelist
+ const NamespaceString oldFreeList(name(), "$freelist");
+ NamespaceDetails* freeListDetails = _namespaceIndex.details(oldFreeList.ns());
+ if (freeListDetails) {
+ if (!freeListDetails->firstExtent.isNull()) {
+ _extentManager.freeExtents(
+ txn, freeListDetails->firstExtent, freeListDetails->lastExtent);
}
- DataFileVersion version = _extentManager.getFileFormat(txn);
- if (version.isCompatibleWithCurrentCode() && !version.mayHave28Freelist()) {
- // Any DB that can be opened and written to gets this flag set.
- version.setMayHave28Freelist();
- _extentManager.setFileFormat(txn, version);
- }
+ _namespaceIndex.kill_ns(txn, oldFreeList.ns());
+ }
- const NamespaceString nsi(name(), "system.indexes");
- const NamespaceString nsn(name(), "system.namespaces");
+ DataFileVersion version = _extentManager.getFileFormat(txn);
+ if (version.isCompatibleWithCurrentCode() && !version.mayHave28Freelist()) {
+ // Any DB that can be opened and written to gets this flag set.
+ version.setMayHave28Freelist();
+ _extentManager.setFileFormat(txn, version);
+ }
- bool isSystemNamespacesGoingToBeNew = _namespaceIndex.details(nsn.toString()) == NULL;
- bool isSystemIndexesGoingToBeNew = _namespaceIndex.details(nsi.toString()) == NULL;
+ const NamespaceString nsi(name(), "system.indexes");
+ const NamespaceString nsn(name(), "system.namespaces");
- _ensureSystemCollection(txn, nsn.toString());
- _ensureSystemCollection(txn, nsi.toString());
+ bool isSystemNamespacesGoingToBeNew = _namespaceIndex.details(nsn.toString()) == NULL;
+ bool isSystemIndexesGoingToBeNew = _namespaceIndex.details(nsi.toString()) == NULL;
- if (isSystemNamespacesGoingToBeNew) {
- txn->recoveryUnit()->registerChange(new EntryInsertion(nsn.toString(), this));
- }
- if (isSystemIndexesGoingToBeNew) {
- txn->recoveryUnit()->registerChange(new EntryInsertion(nsi.toString(), this));
- }
+ _ensureSystemCollection(txn, nsn.toString());
+ _ensureSystemCollection(txn, nsi.toString());
- Entry*& indexEntry = _collections[nsi.toString()];
- Entry*& nsEntry = _collections[nsn.toString()];
+ if (isSystemNamespacesGoingToBeNew) {
+ txn->recoveryUnit()->registerChange(new EntryInsertion(nsn.toString(), this));
+ }
+ if (isSystemIndexesGoingToBeNew) {
+ txn->recoveryUnit()->registerChange(new EntryInsertion(nsi.toString(), this));
+ }
- NamespaceDetails* const indexDetails = _namespaceIndex.details(nsi.toString());
- NamespaceDetails* const nsDetails = _namespaceIndex.details(nsn.toString());
+ Entry*& indexEntry = _collections[nsi.toString()];
+ Entry*& nsEntry = _collections[nsn.toString()];
- // order has to be:
- // 1) ns rs
- // 2) i rs
- // 3) catalog entries
+ NamespaceDetails* const indexDetails = _namespaceIndex.details(nsi.toString());
+ NamespaceDetails* const nsDetails = _namespaceIndex.details(nsn.toString());
- if (!nsEntry) {
- nsEntry = new Entry();
+ // order has to be:
+ // 1) ns rs
+ // 2) i rs
+ // 3) catalog entries
- NamespaceDetailsRSV1MetaData* md = new NamespaceDetailsRSV1MetaData(nsn.toString(),
- nsDetails);
- nsEntry->recordStore.reset(new SimpleRecordStoreV1(txn,
- nsn.toString(),
- md,
- &_extentManager,
- false));
- }
+ if (!nsEntry) {
+ nsEntry = new Entry();
- if (!indexEntry) {
- indexEntry = new Entry();
+ NamespaceDetailsRSV1MetaData* md =
+ new NamespaceDetailsRSV1MetaData(nsn.toString(), nsDetails);
+ nsEntry->recordStore.reset(
+ new SimpleRecordStoreV1(txn, nsn.toString(), md, &_extentManager, false));
+ }
- NamespaceDetailsRSV1MetaData* md =
- new NamespaceDetailsRSV1MetaData(nsi.toString(), indexDetails);
+ if (!indexEntry) {
+ indexEntry = new Entry();
- indexEntry->recordStore.reset(new SimpleRecordStoreV1(txn,
- nsi.toString(),
- md,
- &_extentManager,
- true));
- }
+ NamespaceDetailsRSV1MetaData* md =
+ new NamespaceDetailsRSV1MetaData(nsi.toString(), indexDetails);
- if (isSystemIndexesGoingToBeNew) {
- _addNamespaceToNamespaceCollection(txn, nsi.toString(), NULL);
- }
+ indexEntry->recordStore.reset(
+ new SimpleRecordStoreV1(txn, nsi.toString(), md, &_extentManager, true));
+ }
- if (!nsEntry->catalogEntry) {
- nsEntry->catalogEntry.reset(
- new NamespaceDetailsCollectionCatalogEntry(nsn.toString(),
- nsDetails,
- nsEntry->recordStore.get(),
- indexEntry->recordStore.get(),
- this));
- }
+ if (isSystemIndexesGoingToBeNew) {
+ _addNamespaceToNamespaceCollection(txn, nsi.toString(), NULL);
+ }
- if (!indexEntry->catalogEntry) {
- indexEntry->catalogEntry.reset(
- new NamespaceDetailsCollectionCatalogEntry(nsi.toString(),
- indexDetails,
- nsEntry->recordStore.get(),
- indexEntry->recordStore.get(),
- this));
- }
+ if (!nsEntry->catalogEntry) {
+ nsEntry->catalogEntry.reset(
+ new NamespaceDetailsCollectionCatalogEntry(nsn.toString(),
+ nsDetails,
+ nsEntry->recordStore.get(),
+ indexEntry->recordStore.get(),
+ this));
+ }
- wunit.commit();
+ if (!indexEntry->catalogEntry) {
+ indexEntry->catalogEntry.reset(
+ new NamespaceDetailsCollectionCatalogEntry(nsi.toString(),
+ indexDetails,
+ nsEntry->recordStore.get(),
+ indexEntry->recordStore.get(),
+ this));
+ }
- // Now put everything in the cache of namespaces. None of the operations below do any
- // transactional operations.
- std::list<std::string> namespaces;
- _namespaceIndex.getCollectionNamespaces(&namespaces);
+ wunit.commit();
- for (std::list<std::string>::const_iterator i = namespaces.begin();
- i != namespaces.end(); // we add to the list in the loop so can't cache end().
- i++) {
+ // Now put everything in the cache of namespaces. None of the operations below do any
+ // transactional operations.
+ std::list<std::string> namespaces;
+ _namespaceIndex.getCollectionNamespaces(&namespaces);
- const std::string& ns = *i;
- Entry*& entry = _collections[ns];
+ for (std::list<std::string>::const_iterator i = namespaces.begin();
+ i != namespaces.end(); // we add to the list in the loop so can't cache end().
+ i++) {
+ const std::string& ns = *i;
+ Entry*& entry = _collections[ns];
- // The two cases where entry is not null is for system.indexes and system.namespaces,
- // which we manually instantiated above. It is OK to skip these two collections,
- // because they don't have indexes on them anyway.
- if (entry) {
- continue;
- }
+ // The two cases where entry is not null is for system.indexes and system.namespaces,
+ // which we manually instantiated above. It is OK to skip these two collections,
+ // because they don't have indexes on them anyway.
+ if (entry) {
+ continue;
+ }
- entry = new Entry();
- _insertInCache(txn, ns, entry);
+ entry = new Entry();
+ _insertInCache(txn, ns, entry);
- // Add the indexes on this namespace to the list of namespaces to load.
- std::vector<std::string> indexNames;
- entry->catalogEntry->getAllIndexes(txn, &indexNames);
+ // Add the indexes on this namespace to the list of namespaces to load.
+ std::vector<std::string> indexNames;
+ entry->catalogEntry->getAllIndexes(txn, &indexNames);
- for (size_t i = 0; i < indexNames.size(); i++) {
- namespaces.push_back(IndexDescriptor::makeIndexNamespace(ns, indexNames[i]));
- }
+ for (size_t i = 0; i < indexNames.size(); i++) {
+ namespaces.push_back(IndexDescriptor::makeIndexNamespace(ns, indexNames[i]));
}
}
+}
- Status MMAPV1DatabaseCatalogEntry::createCollection( OperationContext* txn,
- StringData ns,
- const CollectionOptions& options,
- bool allocateDefaultSpace ) {
- if ( _namespaceIndex.details( ns ) ) {
- return Status( ErrorCodes::NamespaceExists,
- str::stream() << "namespace already exists: " << ns );
- }
+Status MMAPV1DatabaseCatalogEntry::createCollection(OperationContext* txn,
+ StringData ns,
+ const CollectionOptions& options,
+ bool allocateDefaultSpace) {
+ if (_namespaceIndex.details(ns)) {
+ return Status(ErrorCodes::NamespaceExists,
+ str::stream() << "namespace already exists: " << ns);
+ }
- BSONObj optionsAsBSON = options.toBSON();
- _addNamespaceToNamespaceCollection( txn, ns, &optionsAsBSON );
+ BSONObj optionsAsBSON = options.toBSON();
+ _addNamespaceToNamespaceCollection(txn, ns, &optionsAsBSON);
- _namespaceIndex.add_ns( txn, ns, DiskLoc(), options.capped );
- NamespaceDetails* details = _namespaceIndex.details(ns);
+ _namespaceIndex.add_ns(txn, ns, DiskLoc(), options.capped);
+ NamespaceDetails* details = _namespaceIndex.details(ns);
- // Set the flags.
- NamespaceDetailsRSV1MetaData(ns, details).replaceUserFlags(txn, options.flags);
+ // Set the flags.
+ NamespaceDetailsRSV1MetaData(ns, details).replaceUserFlags(txn, options.flags);
- if (options.capped && options.cappedMaxDocs > 0) {
- txn->recoveryUnit()->writingInt( details->maxDocsInCapped ) = options.cappedMaxDocs;
- }
-
- Entry*& entry = _collections[ns.toString()];
- invariant( !entry );
- txn->recoveryUnit()->registerChange(new EntryInsertion(ns, this));
- entry = new Entry();
- _insertInCache(txn, ns, entry);
+ if (options.capped && options.cappedMaxDocs > 0) {
+ txn->recoveryUnit()->writingInt(details->maxDocsInCapped) = options.cappedMaxDocs;
+ }
- if ( allocateDefaultSpace ) {
- RecordStoreV1Base* rs = _getRecordStore( ns );
- if ( options.initialNumExtents > 0 ) {
- int size = _massageExtentSize( &_extentManager, options.cappedSize );
- for ( int i = 0; i < options.initialNumExtents; i++ ) {
- rs->increaseStorageSize( txn, size, false );
- }
- }
- else if ( !options.initialExtentSizes.empty() ) {
- for ( size_t i = 0; i < options.initialExtentSizes.size(); i++ ) {
- int size = options.initialExtentSizes[i];
- size = _massageExtentSize( &_extentManager, size );
- rs->increaseStorageSize( txn, size, false );
- }
+ Entry*& entry = _collections[ns.toString()];
+ invariant(!entry);
+ txn->recoveryUnit()->registerChange(new EntryInsertion(ns, this));
+ entry = new Entry();
+ _insertInCache(txn, ns, entry);
+
+ if (allocateDefaultSpace) {
+ RecordStoreV1Base* rs = _getRecordStore(ns);
+ if (options.initialNumExtents > 0) {
+ int size = _massageExtentSize(&_extentManager, options.cappedSize);
+ for (int i = 0; i < options.initialNumExtents; i++) {
+ rs->increaseStorageSize(txn, size, false);
}
- else if ( options.capped ) {
- // normal
- do {
- // Must do this at least once, otherwise we leave the collection with no
- // extents, which is invalid.
- int sz = _massageExtentSize( &_extentManager,
- options.cappedSize - rs->storageSize(txn) );
- sz &= 0xffffff00;
- rs->increaseStorageSize( txn, sz, false );
- } while( rs->storageSize(txn) < options.cappedSize );
- }
- else {
- rs->increaseStorageSize( txn, _extentManager.initialSize( 128 ), false );
+ } else if (!options.initialExtentSizes.empty()) {
+ for (size_t i = 0; i < options.initialExtentSizes.size(); i++) {
+ int size = options.initialExtentSizes[i];
+ size = _massageExtentSize(&_extentManager, size);
+ rs->increaseStorageSize(txn, size, false);
}
+ } else if (options.capped) {
+ // normal
+ do {
+ // Must do this at least once, otherwise we leave the collection with no
+ // extents, which is invalid.
+ int sz =
+ _massageExtentSize(&_extentManager, options.cappedSize - rs->storageSize(txn));
+ sz &= 0xffffff00;
+ rs->increaseStorageSize(txn, sz, false);
+ } while (rs->storageSize(txn) < options.cappedSize);
+ } else {
+ rs->increaseStorageSize(txn, _extentManager.initialSize(128), false);
}
-
- return Status::OK();
}
- void MMAPV1DatabaseCatalogEntry::createNamespaceForIndex(OperationContext* txn,
- StringData name) {
- // This is a simplified form of createCollection.
- invariant(!_namespaceIndex.details(name));
-
- _addNamespaceToNamespaceCollection(txn, name, NULL);
- _namespaceIndex.add_ns(txn, name, DiskLoc(), false);
+ return Status::OK();
+}
- Entry*& entry = _collections[name.toString()];
- invariant( !entry );
- txn->recoveryUnit()->registerChange(new EntryInsertion(name, this));
- entry = new Entry();
- _insertInCache(txn, name, entry);
- }
+void MMAPV1DatabaseCatalogEntry::createNamespaceForIndex(OperationContext* txn, StringData name) {
+ // This is a simplified form of createCollection.
+ invariant(!_namespaceIndex.details(name));
- CollectionCatalogEntry* MMAPV1DatabaseCatalogEntry::getCollectionCatalogEntry(
- StringData ns ) const {
+ _addNamespaceToNamespaceCollection(txn, name, NULL);
+ _namespaceIndex.add_ns(txn, name, DiskLoc(), false);
- CollectionMap::const_iterator i = _collections.find( ns.toString() );
- if (i == _collections.end()) {
- return NULL;
- }
+ Entry*& entry = _collections[name.toString()];
+ invariant(!entry);
+ txn->recoveryUnit()->registerChange(new EntryInsertion(name, this));
+ entry = new Entry();
+ _insertInCache(txn, name, entry);
+}
- invariant( i->second->catalogEntry.get() );
- return i->second->catalogEntry.get();
+CollectionCatalogEntry* MMAPV1DatabaseCatalogEntry::getCollectionCatalogEntry(StringData ns) const {
+ CollectionMap::const_iterator i = _collections.find(ns.toString());
+ if (i == _collections.end()) {
+ return NULL;
}
- void MMAPV1DatabaseCatalogEntry::_insertInCache(OperationContext* txn,
- StringData ns,
- Entry* entry) {
+ invariant(i->second->catalogEntry.get());
+ return i->second->catalogEntry.get();
+}
- NamespaceDetails* details = _namespaceIndex.details(ns);
- invariant(details);
+void MMAPV1DatabaseCatalogEntry::_insertInCache(OperationContext* txn,
+ StringData ns,
+ Entry* entry) {
+ NamespaceDetails* details = _namespaceIndex.details(ns);
+ invariant(details);
- entry->catalogEntry.reset(
- new NamespaceDetailsCollectionCatalogEntry(ns,
- details,
- _getNamespaceRecordStore(),
- _getIndexRecordStore(),
- this));
+ entry->catalogEntry.reset(new NamespaceDetailsCollectionCatalogEntry(
+ ns, details, _getNamespaceRecordStore(), _getIndexRecordStore(), this));
- unique_ptr<NamespaceDetailsRSV1MetaData> md(new NamespaceDetailsRSV1MetaData(ns, details));
- const NamespaceString nss(ns);
+ unique_ptr<NamespaceDetailsRSV1MetaData> md(new NamespaceDetailsRSV1MetaData(ns, details));
+ const NamespaceString nss(ns);
- if (details->isCapped) {
- entry->recordStore.reset(new CappedRecordStoreV1(txn,
- NULL,
- ns,
- md.release(),
- &_extentManager,
- nss.coll() == "system.indexes"));
- }
- else {
- entry->recordStore.reset(new SimpleRecordStoreV1(txn,
- ns,
- md.release(),
- &_extentManager,
- nss.coll() == "system.indexes"));
- }
+ if (details->isCapped) {
+ entry->recordStore.reset(new CappedRecordStoreV1(
+ txn, NULL, ns, md.release(), &_extentManager, nss.coll() == "system.indexes"));
+ } else {
+ entry->recordStore.reset(new SimpleRecordStoreV1(
+ txn, ns, md.release(), &_extentManager, nss.coll() == "system.indexes"));
}
+}
- RecordStore* MMAPV1DatabaseCatalogEntry::getRecordStore( StringData ns ) const {
- return _getRecordStore( ns );
+RecordStore* MMAPV1DatabaseCatalogEntry::getRecordStore(StringData ns) const {
+ return _getRecordStore(ns);
+}
+
+RecordStoreV1Base* MMAPV1DatabaseCatalogEntry::_getRecordStore(StringData ns) const {
+ CollectionMap::const_iterator i = _collections.find(ns.toString());
+ if (i == _collections.end()) {
+ return NULL;
}
- RecordStoreV1Base* MMAPV1DatabaseCatalogEntry::_getRecordStore( StringData ns ) const {
- CollectionMap::const_iterator i = _collections.find( ns.toString() );
- if (i == _collections.end()) {
- return NULL;
- }
+ invariant(i->second->recordStore.get());
+ return i->second->recordStore.get();
+}
- invariant( i->second->recordStore.get() );
- return i->second->recordStore.get();
- }
+IndexAccessMethod* MMAPV1DatabaseCatalogEntry::getIndex(OperationContext* txn,
+ const CollectionCatalogEntry* collection,
+ IndexCatalogEntry* entry) {
+ const string& type = entry->descriptor()->getAccessMethodName();
- IndexAccessMethod* MMAPV1DatabaseCatalogEntry::getIndex( OperationContext* txn,
- const CollectionCatalogEntry* collection,
- IndexCatalogEntry* entry ) {
- const string& type = entry->descriptor()->getAccessMethodName();
+ string ns = collection->ns().ns();
- string ns = collection->ns().ns();
+ RecordStoreV1Base* rs = _getRecordStore(entry->descriptor()->indexNamespace());
+ invariant(rs);
- RecordStoreV1Base* rs = _getRecordStore(entry->descriptor()->indexNamespace());
- invariant(rs);
+ std::unique_ptr<SortedDataInterface> btree(
+ getMMAPV1Interface(entry->headManager(),
+ rs,
+ &rs->savedCursors,
+ entry->ordering(),
+ entry->descriptor()->indexNamespace(),
+ entry->descriptor()->version()));
- std::unique_ptr<SortedDataInterface> btree(
- getMMAPV1Interface(entry->headManager(),
- rs,
- &rs->savedCursors,
- entry->ordering(),
- entry->descriptor()->indexNamespace(),
- entry->descriptor()->version()));
+ if (IndexNames::HASHED == type)
+ return new HashAccessMethod(entry, btree.release());
- if (IndexNames::HASHED == type)
- return new HashAccessMethod( entry, btree.release() );
+ if (IndexNames::GEO_2DSPHERE == type)
+ return new S2AccessMethod(entry, btree.release());
- if (IndexNames::GEO_2DSPHERE == type)
- return new S2AccessMethod( entry, btree.release() );
+ if (IndexNames::TEXT == type)
+ return new FTSAccessMethod(entry, btree.release());
- if (IndexNames::TEXT == type)
- return new FTSAccessMethod( entry, btree.release() );
+ if (IndexNames::GEO_HAYSTACK == type)
+ return new HaystackAccessMethod(entry, btree.release());
- if (IndexNames::GEO_HAYSTACK == type)
- return new HaystackAccessMethod( entry, btree.release() );
+ if ("" == type)
+ return new BtreeAccessMethod(entry, btree.release());
- if ("" == type)
- return new BtreeAccessMethod( entry, btree.release() );
+ if (IndexNames::GEO_2D == type)
+ return new TwoDAccessMethod(entry, btree.release());
- if (IndexNames::GEO_2D == type)
- return new TwoDAccessMethod( entry, btree.release() );
+ log() << "Can't find index for keyPattern " << entry->descriptor()->keyPattern();
+ fassertFailed(17489);
+}
- log() << "Can't find index for keyPattern " << entry->descriptor()->keyPattern();
- fassertFailed(17489);
- }
+RecordStoreV1Base* MMAPV1DatabaseCatalogEntry::_getIndexRecordStore() {
+ const NamespaceString nss(name(), "system.indexes");
+ Entry* entry = _collections[nss.toString()];
+ invariant(entry);
- RecordStoreV1Base* MMAPV1DatabaseCatalogEntry::_getIndexRecordStore() {
- const NamespaceString nss(name(), "system.indexes");
- Entry* entry = _collections[nss.toString()];
- invariant( entry );
+ return entry->recordStore.get();
+}
- return entry->recordStore.get();
- }
+RecordStoreV1Base* MMAPV1DatabaseCatalogEntry::_getNamespaceRecordStore() const {
+ const NamespaceString nss(name(), "system.namespaces");
+ CollectionMap::const_iterator i = _collections.find(nss.toString());
+ invariant(i != _collections.end());
- RecordStoreV1Base* MMAPV1DatabaseCatalogEntry::_getNamespaceRecordStore() const {
- const NamespaceString nss( name(), "system.namespaces" );
- CollectionMap::const_iterator i = _collections.find( nss.toString() );
- invariant( i != _collections.end() );
+ return i->second->recordStore.get();
+}
- return i->second->recordStore.get();
+void MMAPV1DatabaseCatalogEntry::_addNamespaceToNamespaceCollection(OperationContext* txn,
+ StringData ns,
+ const BSONObj* options) {
+ if (nsToCollectionSubstring(ns) == "system.namespaces") {
+ // system.namespaces holds all the others, so it is not explicitly listed in the catalog.
+ return;
}
- void MMAPV1DatabaseCatalogEntry::_addNamespaceToNamespaceCollection(OperationContext* txn,
- StringData ns,
- const BSONObj* options) {
+ BSONObjBuilder b;
+ b.append("name", ns);
+ if (options && !options->isEmpty()) {
+ b.append("options", *options);
+ }
- if (nsToCollectionSubstring(ns) == "system.namespaces") {
- // system.namespaces holds all the others, so it is not explicitly listed in the catalog.
- return;
- }
+ const BSONObj obj = b.done();
- BSONObjBuilder b;
- b.append("name", ns);
- if (options && !options->isEmpty()) {
- b.append("options", *options);
- }
+ RecordStoreV1Base* rs = _getNamespaceRecordStore();
+ invariant(rs);
- const BSONObj obj = b.done();
+ StatusWith<RecordId> loc = rs->insertRecord(txn, obj.objdata(), obj.objsize(), false);
+ massertStatusOK(loc.getStatus());
+}
- RecordStoreV1Base* rs = _getNamespaceRecordStore();
- invariant( rs );
-
- StatusWith<RecordId> loc = rs->insertRecord( txn, obj.objdata(), obj.objsize(), false );
- massertStatusOK( loc.getStatus() );
+void MMAPV1DatabaseCatalogEntry::_removeNamespaceFromNamespaceCollection(OperationContext* txn,
+ StringData ns) {
+ if (nsToCollectionSubstring(ns) == "system.namespaces") {
+ // system.namespaces holds all the others, so it is not explicitly listed in the catalog.
+ return;
}
- void MMAPV1DatabaseCatalogEntry::_removeNamespaceFromNamespaceCollection(
- OperationContext* txn,
- StringData ns ) {
+ RecordStoreV1Base* rs = _getNamespaceRecordStore();
+ invariant(rs);
- if ( nsToCollectionSubstring( ns ) == "system.namespaces" ) {
- // system.namespaces holds all the others, so it is not explicitly listed in the catalog.
- return;
- }
-
- RecordStoreV1Base* rs = _getNamespaceRecordStore();
- invariant( rs );
-
- auto cursor = rs->getCursor(txn);
- while (auto record = cursor->next()) {
- BSONObj entry = record->data.releaseToBson();
- BSONElement name = entry["name"];
- if ( name.type() == String && name.String() == ns ) {
- rs->deleteRecord( txn, record->id );
- break;
- }
+ auto cursor = rs->getCursor(txn);
+ while (auto record = cursor->next()) {
+ BSONObj entry = record->data.releaseToBson();
+ BSONElement name = entry["name"];
+ if (name.type() == String && name.String() == ns) {
+ rs->deleteRecord(txn, record->id);
+ break;
}
}
+}
- CollectionOptions MMAPV1DatabaseCatalogEntry::getCollectionOptions( OperationContext* txn,
- StringData ns ) const {
- if ( nsToCollectionSubstring( ns ) == "system.namespaces" ) {
- return CollectionOptions();
- }
+CollectionOptions MMAPV1DatabaseCatalogEntry::getCollectionOptions(OperationContext* txn,
+ StringData ns) const {
+ if (nsToCollectionSubstring(ns) == "system.namespaces") {
+ return CollectionOptions();
+ }
- RecordStoreV1Base* rs = _getNamespaceRecordStore();
- invariant( rs );
-
- auto cursor = rs->getCursor(txn);
- while (auto record = cursor->next()) {
- BSONObj entry = record->data.releaseToBson();
- BSONElement name = entry["name"];
- if ( name.type() == String && name.String() == ns ) {
- CollectionOptions options;
- if ( entry["options"].isABSONObj() ) {
- Status status = options.parse( entry["options"].Obj() );
- fassert( 18523, status );
- }
- return options;
+ RecordStoreV1Base* rs = _getNamespaceRecordStore();
+ invariant(rs);
+
+ auto cursor = rs->getCursor(txn);
+ while (auto record = cursor->next()) {
+ BSONObj entry = record->data.releaseToBson();
+ BSONElement name = entry["name"];
+ if (name.type() == String && name.String() == ns) {
+ CollectionOptions options;
+ if (entry["options"].isABSONObj()) {
+ Status status = options.parse(entry["options"].Obj());
+ fassert(18523, status);
}
+ return options;
}
-
- return CollectionOptions();
}
-} // namespace mongo
+
+ return CollectionOptions();
+}
+} // namespace mongo
diff --git a/src/mongo/db/storage/mmap_v1/mmap_v1_database_catalog_entry.h b/src/mongo/db/storage/mmap_v1/mmap_v1_database_catalog_entry.h
index 1db5e8a1f87..2a922d3d89c 100644
--- a/src/mongo/db/storage/mmap_v1/mmap_v1_database_catalog_entry.h
+++ b/src/mongo/db/storage/mmap_v1/mmap_v1_database_catalog_entry.h
@@ -39,145 +39,150 @@
namespace mongo {
- class CollectionCatalogEntry;
- struct CollectionOptions;
- class IndexAccessMethod;
- class IndexCatalogEntry;
- class IndexDescriptor;
- class RecordStore;
- class RecordStoreV1Base;
- class RecoveryUnit;
- class OperationContext;
-
- class MMAPV1DatabaseCatalogEntry : public DatabaseCatalogEntry {
- public:
- MMAPV1DatabaseCatalogEntry( OperationContext* txn,
- StringData name,
- StringData path,
- bool directoryperdb,
- bool transient );
-
- virtual ~MMAPV1DatabaseCatalogEntry();
-
- // these two seem the same and yet different
- // TODO(ERH): consolidate into one ideally
- virtual bool exists() const { return _namespaceIndex.pathExists(); }
- virtual bool isEmpty() const { return !_namespaceIndex.allocated(); }
- virtual bool hasUserData() const {
- // The two collections which exist and can't be removed are:
- // system.indexes
- // system.namespaces
- return _collections.size() > 2;
- }
-
- virtual int64_t sizeOnDisk( OperationContext* opCtx ) const;
-
- virtual bool isOlderThan24( OperationContext* opCtx ) const;
- virtual void markIndexSafe24AndUp( OperationContext* opCtx );
-
- virtual bool currentFilesCompatible( OperationContext* opCtx ) const;
+class CollectionCatalogEntry;
+struct CollectionOptions;
+class IndexAccessMethod;
+class IndexCatalogEntry;
+class IndexDescriptor;
+class RecordStore;
+class RecordStoreV1Base;
+class RecoveryUnit;
+class OperationContext;
+
+class MMAPV1DatabaseCatalogEntry : public DatabaseCatalogEntry {
+public:
+ MMAPV1DatabaseCatalogEntry(OperationContext* txn,
+ StringData name,
+ StringData path,
+ bool directoryperdb,
+ bool transient);
+
+ virtual ~MMAPV1DatabaseCatalogEntry();
+
+ // these two seem the same and yet different
+ // TODO(ERH): consolidate into one ideally
+ virtual bool exists() const {
+ return _namespaceIndex.pathExists();
+ }
+ virtual bool isEmpty() const {
+ return !_namespaceIndex.allocated();
+ }
+ virtual bool hasUserData() const {
+ // The two collections which exist and can't be removed are:
+ // system.indexes
+ // system.namespaces
+ return _collections.size() > 2;
+ }
+
+ virtual int64_t sizeOnDisk(OperationContext* opCtx) const;
+
+ virtual bool isOlderThan24(OperationContext* opCtx) const;
+ virtual void markIndexSafe24AndUp(OperationContext* opCtx);
+
+ virtual bool currentFilesCompatible(OperationContext* opCtx) const;
+
+ virtual void appendExtraStats(OperationContext* opCtx, BSONObjBuilder* out, double scale) const;
+
+ Status createCollection(OperationContext* txn,
+ StringData ns,
+ const CollectionOptions& options,
+ bool allocateDefaultSpace);
+
+ Status dropCollection(OperationContext* txn, StringData ns);
+
+ Status renameCollection(OperationContext* txn,
+ StringData fromNS,
+ StringData toNS,
+ bool stayTemp);
+
+ void getCollectionNamespaces(std::list<std::string>* tofill) const;
+
+ /**
+ * will return NULL if ns does not exist
+ */
+ CollectionCatalogEntry* getCollectionCatalogEntry(StringData ns) const;
+
+ RecordStore* getRecordStore(StringData ns) const;
+
+ IndexAccessMethod* getIndex(OperationContext* txn,
+ const CollectionCatalogEntry* collection,
+ IndexCatalogEntry* index);
+
+ const MmapV1ExtentManager* getExtentManager() const {
+ return &_extentManager;
+ }
+ MmapV1ExtentManager* getExtentManager() {
+ return &_extentManager;
+ }
+
+ CollectionOptions getCollectionOptions(OperationContext* txn, StringData ns) const;
+
+ /**
+ * Creates a CollectionCatalogEntry in the form of an index rather than a collection.
+ * MMAPv1 puts both indexes and collections into CCEs. A namespace named 'name' must not
+ * exist.
+ */
+ void createNamespaceForIndex(OperationContext* txn, StringData name);
+
+private:
+ class EntryInsertion;
+ class EntryRemoval;
+
+ friend class NamespaceDetailsCollectionCatalogEntry;
+
+ // The _collections map is a cache for efficiently looking up namespace information. Access
+ // to the cache is protected by holding the appropriate DB lock. Regular operations
+ // (insert/update/delete/query) hold intent locks on the database and they access the cache
+ // directly. Metadata operations, such as create db/collection, etc acquire exclusive lock
+ // on the database, which protects against concurrent readers of the cache.
+ //
+ // Once initialized, the cache must remain consistent with the data in the memory-mapped
+ // database files through _removeFromCache and _insertInCache. These methods use the
+ // RecoveryUnit to ensure correct handling of rollback.
+
+ struct Entry {
+ std::unique_ptr<CollectionCatalogEntry> catalogEntry;
+ std::unique_ptr<RecordStoreV1Base> recordStore;
+ };
- virtual void appendExtraStats( OperationContext* opCtx,
- BSONObjBuilder* out,
- double scale ) const;
-
- Status createCollection( OperationContext* txn,
- StringData ns,
- const CollectionOptions& options,
- bool allocateDefaultSpace );
-
- Status dropCollection( OperationContext* txn, StringData ns );
-
- Status renameCollection( OperationContext* txn,
- StringData fromNS,
- StringData toNS,
- bool stayTemp );
-
- void getCollectionNamespaces( std::list<std::string>* tofill ) const;
-
- /**
- * will return NULL if ns does not exist
- */
- CollectionCatalogEntry* getCollectionCatalogEntry( StringData ns ) const;
-
- RecordStore* getRecordStore( StringData ns ) const;
-
- IndexAccessMethod* getIndex( OperationContext* txn,
- const CollectionCatalogEntry* collection,
- IndexCatalogEntry* index );
-
- const MmapV1ExtentManager* getExtentManager() const { return &_extentManager; }
- MmapV1ExtentManager* getExtentManager() { return &_extentManager; }
-
- CollectionOptions getCollectionOptions( OperationContext* txn,
- StringData ns ) const;
-
- /**
- * Creates a CollectionCatalogEntry in the form of an index rather than a collection.
- * MMAPv1 puts both indexes and collections into CCEs. A namespace named 'name' must not
- * exist.
- */
- void createNamespaceForIndex(OperationContext* txn, StringData name);
-
- private:
- class EntryInsertion;
- class EntryRemoval;
-
- friend class NamespaceDetailsCollectionCatalogEntry;
-
- // The _collections map is a cache for efficiently looking up namespace information. Access
- // to the cache is protected by holding the appropriate DB lock. Regular operations
- // (insert/update/delete/query) hold intent locks on the database and they access the cache
- // directly. Metadata operations, such as create db/collection, etc acquire exclusive lock
- // on the database, which protects against concurrent readers of the cache.
- //
- // Once initialized, the cache must remain consistent with the data in the memory-mapped
- // database files through _removeFromCache and _insertInCache. These methods use the
- // RecoveryUnit to ensure correct handling of rollback.
-
- struct Entry {
- std::unique_ptr<CollectionCatalogEntry> catalogEntry;
- std::unique_ptr<RecordStoreV1Base> recordStore;
- };
-
- typedef std::map<std::string, Entry*> CollectionMap;
+ typedef std::map<std::string, Entry*> CollectionMap;
- RecordStoreV1Base* _getIndexRecordStore();
- RecordStoreV1Base* _getNamespaceRecordStore() const;
- RecordStoreV1Base* _getRecordStore(StringData ns) const;
+ RecordStoreV1Base* _getIndexRecordStore();
+ RecordStoreV1Base* _getNamespaceRecordStore() const;
+ RecordStoreV1Base* _getRecordStore(StringData ns) const;
- void _addNamespaceToNamespaceCollection(OperationContext* txn,
- StringData ns,
- const BSONObj* options);
+ void _addNamespaceToNamespaceCollection(OperationContext* txn,
+ StringData ns,
+ const BSONObj* options);
- void _removeNamespaceFromNamespaceCollection(OperationContext* txn, StringData ns);
+ void _removeNamespaceFromNamespaceCollection(OperationContext* txn, StringData ns);
- Status _renameSingleNamespace( OperationContext* txn,
- StringData fromNS,
- StringData toNS,
- bool stayTemp );
+ Status _renameSingleNamespace(OperationContext* txn,
+ StringData fromNS,
+ StringData toNS,
+ bool stayTemp);
- void _ensureSystemCollection(OperationContext* txn, StringData ns);
+ void _ensureSystemCollection(OperationContext* txn, StringData ns);
- void _init( OperationContext* txn );
+ void _init(OperationContext* txn);
- /**
- * Populate the _collections cache.
- */
- void _insertInCache(OperationContext* opCtx, StringData ns, Entry* entry);
+ /**
+ * Populate the _collections cache.
+ */
+ void _insertInCache(OperationContext* opCtx, StringData ns, Entry* entry);
- /**
- * Drop cached information for specified namespace. If a RecoveryUnit is specified,
- * use it to allow rollback. When ru is null, removal is unconditional.
- */
- void _removeFromCache(RecoveryUnit* ru, StringData ns);
+ /**
+ * Drop cached information for specified namespace. If a RecoveryUnit is specified,
+ * use it to allow rollback. When ru is null, removal is unconditional.
+ */
+ void _removeFromCache(RecoveryUnit* ru, StringData ns);
- const std::string _path;
+ const std::string _path;
- NamespaceIndex _namespaceIndex;
- MmapV1ExtentManager _extentManager;
- CollectionMap _collections;
- };
+ NamespaceIndex _namespaceIndex;
+ MmapV1ExtentManager _extentManager;
+ CollectionMap _collections;
+};
}
diff --git a/src/mongo/db/storage/mmap_v1/mmap_v1_engine.cpp b/src/mongo/db/storage/mmap_v1/mmap_v1_engine.cpp
index b4550f135db..b1fd028a1d5 100644
--- a/src/mongo/db/storage/mmap_v1/mmap_v1_engine.cpp
+++ b/src/mongo/db/storage/mmap_v1/mmap_v1_engine.cpp
@@ -53,304 +53,300 @@
namespace mongo {
- using std::endl;
- using std::ifstream;
- using std::string;
- using std::stringstream;
- using std::vector;
+using std::endl;
+using std::ifstream;
+using std::string;
+using std::stringstream;
+using std::vector;
namespace {
#if !defined(__sun)
- // if doingRepair is true don't consider unclean shutdown an error
- void acquirePathLock(MMAPV1Engine* storageEngine,
- bool doingRepair,
- const StorageEngineLockFile& lockFile) {
- string name = lockFile.getFilespec();
- bool oldFile = lockFile.createdByUncleanShutdown();
-
- if ( oldFile ) {
- // we check this here because we want to see if we can get the lock
- // if we can't, then its probably just another mongod running
-
- string errmsg;
- if (doingRepair && dur::haveJournalFiles()) {
- errmsg = "************** \n"
- "You specified --repair but there are dirty journal files. Please\n"
- "restart without --repair to allow the journal files to be replayed.\n"
- "If you wish to repair all databases, please shutdown cleanly and\n"
- "run with --repair again.\n"
- "**************";
- }
- else if (storageGlobalParams.dur) {
- if (!dur::haveJournalFiles(/*anyFiles=*/true)) {
- // Passing anyFiles=true as we are trying to protect against starting in an
- // unclean state with the journal directory unmounted. If there are any files,
- // even prealloc files, then it means that it is mounted so we can continue.
- // Previously there was an issue (SERVER-5056) where we would fail to start up
- // if killed during prealloc.
-
- vector<string> dbnames;
- storageEngine->listDatabases( &dbnames );
-
- if ( dbnames.size() == 0 ) {
- // this means that mongod crashed
- // between initial startup and when journaling was initialized
- // it is safe to continue
- }
- else {
- errmsg = str::stream()
- << "************** \n"
- << "old lock file: " << name << ". probably means unclean shutdown,\n"
- << "but there are no journal files to recover.\n"
- << "this is likely human error or filesystem corruption.\n"
- << "please make sure that your journal directory is mounted.\n"
- << "found " << dbnames.size() << " dbs.\n"
- << "see: http://dochub.mongodb.org/core/repair for more information\n"
- << "*************";
- }
-
- }
- }
- else {
- if (!dur::haveJournalFiles() && !doingRepair) {
+// if doingRepair is true don't consider unclean shutdown an error
+void acquirePathLock(MMAPV1Engine* storageEngine,
+ bool doingRepair,
+ const StorageEngineLockFile& lockFile) {
+ string name = lockFile.getFilespec();
+ bool oldFile = lockFile.createdByUncleanShutdown();
+
+ if (oldFile) {
+ // we check this here because we want to see if we can get the lock
+ // if we can't, then its probably just another mongod running
+
+ string errmsg;
+ if (doingRepair && dur::haveJournalFiles()) {
+ errmsg =
+ "************** \n"
+ "You specified --repair but there are dirty journal files. Please\n"
+ "restart without --repair to allow the journal files to be replayed.\n"
+ "If you wish to repair all databases, please shutdown cleanly and\n"
+ "run with --repair again.\n"
+ "**************";
+ } else if (storageGlobalParams.dur) {
+ if (!dur::haveJournalFiles(/*anyFiles=*/true)) {
+ // Passing anyFiles=true as we are trying to protect against starting in an
+ // unclean state with the journal directory unmounted. If there are any files,
+ // even prealloc files, then it means that it is mounted so we can continue.
+ // Previously there was an issue (SERVER-5056) where we would fail to start up
+ // if killed during prealloc.
+
+ vector<string> dbnames;
+ storageEngine->listDatabases(&dbnames);
+
+ if (dbnames.size() == 0) {
+ // this means that mongod crashed
+ // between initial startup and when journaling was initialized
+ // it is safe to continue
+ } else {
errmsg = str::stream()
- << "************** \n"
- << "Unclean shutdown detected.\n"
- << "Please visit http://dochub.mongodb.org/core/repair for recovery instructions.\n"
- << "*************";
+ << "************** \n"
+ << "old lock file: " << name << ". probably means unclean shutdown,\n"
+ << "but there are no journal files to recover.\n"
+ << "this is likely human error or filesystem corruption.\n"
+ << "please make sure that your journal directory is mounted.\n"
+ << "found " << dbnames.size() << " dbs.\n"
+ << "see: http://dochub.mongodb.org/core/repair for more information\n"
+ << "*************";
}
}
-
- if (!errmsg.empty()) {
- log() << errmsg << endl;
- uassert( 12596 , "old lock file" , 0 );
+ } else {
+ if (!dur::haveJournalFiles() && !doingRepair) {
+ errmsg = str::stream() << "************** \n"
+ << "Unclean shutdown detected.\n"
+ << "Please visit http://dochub.mongodb.org/core/repair for "
+ "recovery instructions.\n"
+ << "*************";
}
}
- // Not related to lock file, but this is where we handle unclean shutdown
- if (!storageGlobalParams.dur && dur::haveJournalFiles()) {
- log() << "**************" << endl;
- log() << "Error: journal files are present in journal directory, yet starting without journaling enabled." << endl;
- log() << "It is recommended that you start with journaling enabled so that recovery may occur." << endl;
- log() << "**************" << endl;
- uasserted(13597, "can't start without --journal enabled when journal/ files are present");
+ if (!errmsg.empty()) {
+ log() << errmsg << endl;
+ uassert(12596, "old lock file", 0);
}
}
+
+ // Not related to lock file, but this is where we handle unclean shutdown
+ if (!storageGlobalParams.dur && dur::haveJournalFiles()) {
+ log() << "**************" << endl;
+ log() << "Error: journal files are present in journal directory, yet starting without "
+ "journaling enabled." << endl;
+ log() << "It is recommended that you start with journaling enabled so that recovery may "
+ "occur." << endl;
+ log() << "**************" << endl;
+ uasserted(13597, "can't start without --journal enabled when journal/ files are present");
+ }
+}
#else
- void acquirePathLock(MMAPV1Engine* storageEngine,
- bool doingRepair,
- const StorageEngineLockFile& lockFile) {
- // TODO - this is very bad that the code above not running here.
-
- // Not related to lock file, but this is where we handle unclean shutdown
- if (!storageGlobalParams.dur && dur::haveJournalFiles()) {
- log() << "**************" << endl;
- log() << "Error: journal files are present in journal directory, yet starting without --journal enabled." << endl;
- log() << "It is recommended that you start with journaling enabled so that recovery may occur." << endl;
- log() << "Alternatively (not recommended), you can backup everything, then delete the journal files, and run --repair" << endl;
- log() << "**************" << endl;
- uasserted(13618, "can't start without --journal enabled when journal/ files are present");
- }
+void acquirePathLock(MMAPV1Engine* storageEngine,
+ bool doingRepair,
+ const StorageEngineLockFile& lockFile) {
+ // TODO - this is very bad that the code above not running here.
+
+ // Not related to lock file, but this is where we handle unclean shutdown
+ if (!storageGlobalParams.dur && dur::haveJournalFiles()) {
+ log() << "**************" << endl;
+ log() << "Error: journal files are present in journal directory, yet starting without "
+ "--journal enabled." << endl;
+ log() << "It is recommended that you start with journaling enabled so that recovery may "
+ "occur." << endl;
+ log() << "Alternatively (not recommended), you can backup everything, then delete the "
+ "journal files, and run --repair" << endl;
+ log() << "**************" << endl;
+ uasserted(13618, "can't start without --journal enabled when journal/ files are present");
}
+}
#endif // !defined(__sun)
- /// warn if readahead > 256KB (gridfs chunk size)
- void checkReadAhead(const string& dir) {
+/// warn if readahead > 256KB (gridfs chunk size)
+void checkReadAhead(const string& dir) {
#ifdef __linux__
- try {
- const dev_t dev = getPartition(dir);
-
- // This path handles the case where the filesystem uses the whole device (including LVM)
- string path = str::stream() <<
- "/sys/dev/block/" << major(dev) << ':' << minor(dev) << "/queue/read_ahead_kb";
-
- if (!boost::filesystem::exists(path)){
- // This path handles the case where the filesystem is on a partition.
- path = str::stream()
- << "/sys/dev/block/" << major(dev) << ':' << minor(dev) // this is a symlink
- << "/.." // parent directory of a partition is for the whole device
- << "/queue/read_ahead_kb";
- }
+ try {
+ const dev_t dev = getPartition(dir);
+
+ // This path handles the case where the filesystem uses the whole device (including LVM)
+ string path = str::stream() << "/sys/dev/block/" << major(dev) << ':' << minor(dev)
+ << "/queue/read_ahead_kb";
+
+ if (!boost::filesystem::exists(path)) {
+ // This path handles the case where the filesystem is on a partition.
+ path =
+ str::stream() << "/sys/dev/block/" << major(dev) << ':'
+ << minor(dev) // this is a symlink
+ << "/.." // parent directory of a partition is for the whole device
+ << "/queue/read_ahead_kb";
+ }
- if (boost::filesystem::exists(path)) {
- ifstream file (path.c_str());
- if (file.is_open()) {
- int kb;
- file >> kb;
- if (kb > 256) {
- log() << startupWarningsLog;
+ if (boost::filesystem::exists(path)) {
+ ifstream file(path.c_str());
+ if (file.is_open()) {
+ int kb;
+ file >> kb;
+ if (kb > 256) {
+ log() << startupWarningsLog;
- log() << "** WARNING: Readahead for " << dir << " is set to " << kb << "KB"
- << startupWarningsLog;
+ log() << "** WARNING: Readahead for " << dir << " is set to " << kb << "KB"
+ << startupWarningsLog;
- log() << "** We suggest setting it to 256KB (512 sectors) or less"
- << startupWarningsLog;
+ log() << "** We suggest setting it to 256KB (512 sectors) or less"
+ << startupWarningsLog;
- log() << "** http://dochub.mongodb.org/core/readahead"
- << startupWarningsLog;
- }
+ log() << "** http://dochub.mongodb.org/core/readahead"
+ << startupWarningsLog;
}
}
}
- catch (const std::exception& e) {
- log() << "unable to validate readahead settings due to error: " << e.what()
- << startupWarningsLog;
- log() << "for more information, see http://dochub.mongodb.org/core/readahead"
- << startupWarningsLog;
- }
-#endif // __linux__
+ } catch (const std::exception& e) {
+ log() << "unable to validate readahead settings due to error: " << e.what()
+ << startupWarningsLog;
+ log() << "for more information, see http://dochub.mongodb.org/core/readahead"
+ << startupWarningsLog;
}
+#endif // __linux__
+}
- // This is unrelated to the _tmp directory in dbpath.
- void clearTmpFiles() {
- boost::filesystem::path path(storageGlobalParams.dbpath);
- for ( boost::filesystem::directory_iterator i( path );
- i != boost::filesystem::directory_iterator(); ++i ) {
- string fileName = boost::filesystem::path(*i).leaf().string();
- if ( boost::filesystem::is_directory( *i ) &&
- fileName.length() && fileName[ 0 ] == '$' )
- boost::filesystem::remove_all( *i );
- }
+// This is unrelated to the _tmp directory in dbpath.
+void clearTmpFiles() {
+ boost::filesystem::path path(storageGlobalParams.dbpath);
+ for (boost::filesystem::directory_iterator i(path);
+ i != boost::filesystem::directory_iterator();
+ ++i) {
+ string fileName = boost::filesystem::path(*i).leaf().string();
+ if (boost::filesystem::is_directory(*i) && fileName.length() && fileName[0] == '$')
+ boost::filesystem::remove_all(*i);
}
-} // namespace
+}
+} // namespace
- MMAPV1Engine::MMAPV1Engine(const StorageEngineLockFile& lockFile) {
- // TODO check non-journal subdirs if using directory-per-db
- checkReadAhead(storageGlobalParams.dbpath);
+MMAPV1Engine::MMAPV1Engine(const StorageEngineLockFile& lockFile) {
+ // TODO check non-journal subdirs if using directory-per-db
+ checkReadAhead(storageGlobalParams.dbpath);
- acquirePathLock(this, storageGlobalParams.repair, lockFile);
+ acquirePathLock(this, storageGlobalParams.repair, lockFile);
- FileAllocator::get()->start();
+ FileAllocator::get()->start();
- MONGO_ASSERT_ON_EXCEPTION_WITH_MSG( clearTmpFiles(), "clear tmp files" );
- }
+ MONGO_ASSERT_ON_EXCEPTION_WITH_MSG(clearTmpFiles(), "clear tmp files");
+}
- void MMAPV1Engine::finishInit() {
- dataFileSync.go();
+void MMAPV1Engine::finishInit() {
+ dataFileSync.go();
- // Replays the journal (if needed) and starts the background thread. This requires the
- // ability to create OperationContexts.
- dur::startup();
- }
+ // Replays the journal (if needed) and starts the background thread. This requires the
+ // ability to create OperationContexts.
+ dur::startup();
+}
- MMAPV1Engine::~MMAPV1Engine() {
- for ( EntryMap::const_iterator it = _entryMap.begin(); it != _entryMap.end(); ++it ) {
- delete it->second;
- }
- _entryMap.clear();
+MMAPV1Engine::~MMAPV1Engine() {
+ for (EntryMap::const_iterator it = _entryMap.begin(); it != _entryMap.end(); ++it) {
+ delete it->second;
}
+ _entryMap.clear();
+}
- RecoveryUnit* MMAPV1Engine::newRecoveryUnit() {
- return new DurRecoveryUnit();
- }
+RecoveryUnit* MMAPV1Engine::newRecoveryUnit() {
+ return new DurRecoveryUnit();
+}
- void MMAPV1Engine::listDatabases( std::vector<std::string>* out ) const {
- _listDatabases( storageGlobalParams.dbpath, out );
- }
+void MMAPV1Engine::listDatabases(std::vector<std::string>* out) const {
+ _listDatabases(storageGlobalParams.dbpath, out);
+}
- DatabaseCatalogEntry* MMAPV1Engine::getDatabaseCatalogEntry( OperationContext* opCtx,
- StringData db ) {
- {
- stdx::lock_guard<stdx::mutex> lk(_entryMapMutex);
- EntryMap::const_iterator iter = _entryMap.find(db.toString());
- if (iter != _entryMap.end()) {
- return iter->second;
- }
+DatabaseCatalogEntry* MMAPV1Engine::getDatabaseCatalogEntry(OperationContext* opCtx,
+ StringData db) {
+ {
+ stdx::lock_guard<stdx::mutex> lk(_entryMapMutex);
+ EntryMap::const_iterator iter = _entryMap.find(db.toString());
+ if (iter != _entryMap.end()) {
+ return iter->second;
}
+ }
- // This is an on-demand database create/open. At this point, we are locked under X lock for
- // the database (MMAPV1DatabaseCatalogEntry's constructor checks that) so no two threads
- // can be creating the same database concurrenty. We need to create the database outside of
- // the _entryMapMutex so we do not deadlock (see SERVER-15880).
- MMAPV1DatabaseCatalogEntry* entry =
- new MMAPV1DatabaseCatalogEntry(opCtx,
- db,
- storageGlobalParams.dbpath,
- storageGlobalParams.directoryperdb,
- false);
+ // This is an on-demand database create/open. At this point, we are locked under X lock for
+ // the database (MMAPV1DatabaseCatalogEntry's constructor checks that) so no two threads
+ // can be creating the same database concurrenty. We need to create the database outside of
+ // the _entryMapMutex so we do not deadlock (see SERVER-15880).
+ MMAPV1DatabaseCatalogEntry* entry = new MMAPV1DatabaseCatalogEntry(
+ opCtx, db, storageGlobalParams.dbpath, storageGlobalParams.directoryperdb, false);
- stdx::lock_guard<stdx::mutex> lk(_entryMapMutex);
+ stdx::lock_guard<stdx::mutex> lk(_entryMapMutex);
- // Sanity check that we are not overwriting something
- invariant(_entryMap.insert(EntryMap::value_type(db.toString(), entry)).second);
+ // Sanity check that we are not overwriting something
+ invariant(_entryMap.insert(EntryMap::value_type(db.toString(), entry)).second);
- return entry;
- }
+ return entry;
+}
- Status MMAPV1Engine::closeDatabase( OperationContext* txn, StringData db ) {
- // Before the files are closed, flush any potentially outstanding changes, which might
- // reference this database. Otherwise we will assert when subsequent applications of the
- // global journal entries occur, which happen to have write intents for the removed files.
- getDur().syncDataAndTruncateJournal(txn);
-
- stdx::lock_guard<stdx::mutex> lk( _entryMapMutex );
- MMAPV1DatabaseCatalogEntry* entry = _entryMap[db.toString()];
- delete entry;
- _entryMap.erase( db.toString() );
- return Status::OK();
- }
+Status MMAPV1Engine::closeDatabase(OperationContext* txn, StringData db) {
+ // Before the files are closed, flush any potentially outstanding changes, which might
+ // reference this database. Otherwise we will assert when subsequent applications of the
+ // global journal entries occur, which happen to have write intents for the removed files.
+ getDur().syncDataAndTruncateJournal(txn);
+
+ stdx::lock_guard<stdx::mutex> lk(_entryMapMutex);
+ MMAPV1DatabaseCatalogEntry* entry = _entryMap[db.toString()];
+ delete entry;
+ _entryMap.erase(db.toString());
+ return Status::OK();
+}
- Status MMAPV1Engine::dropDatabase( OperationContext* txn, StringData db ) {
- Status status = closeDatabase( txn, db );
- if ( !status.isOK() )
- return status;
+Status MMAPV1Engine::dropDatabase(OperationContext* txn, StringData db) {
+ Status status = closeDatabase(txn, db);
+ if (!status.isOK())
+ return status;
- _deleteDataFiles( db.toString() );
+ _deleteDataFiles(db.toString());
- return Status::OK();
- }
+ return Status::OK();
+}
- void MMAPV1Engine::_listDatabases( const std::string& directory,
- std::vector<std::string>* out ) {
- boost::filesystem::path path( directory );
- for ( boost::filesystem::directory_iterator i( path );
- i != boost::filesystem::directory_iterator();
- ++i ) {
- if (storageGlobalParams.directoryperdb) {
- boost::filesystem::path p = *i;
- string dbName = p.leaf().string();
- p /= ( dbName + ".ns" );
- if ( exists( p ) )
- out->push_back( dbName );
- }
- else {
- string fileName = boost::filesystem::path(*i).leaf().string();
- if ( fileName.length() > 3 && fileName.substr( fileName.length() - 3, 3 ) == ".ns" )
- out->push_back( fileName.substr( 0, fileName.length() - 3 ) );
- }
+void MMAPV1Engine::_listDatabases(const std::string& directory, std::vector<std::string>* out) {
+ boost::filesystem::path path(directory);
+ for (boost::filesystem::directory_iterator i(path);
+ i != boost::filesystem::directory_iterator();
+ ++i) {
+ if (storageGlobalParams.directoryperdb) {
+ boost::filesystem::path p = *i;
+ string dbName = p.leaf().string();
+ p /= (dbName + ".ns");
+ if (exists(p))
+ out->push_back(dbName);
+ } else {
+ string fileName = boost::filesystem::path(*i).leaf().string();
+ if (fileName.length() > 3 && fileName.substr(fileName.length() - 3, 3) == ".ns")
+ out->push_back(fileName.substr(0, fileName.length() - 3));
}
}
+}
- int MMAPV1Engine::flushAllFiles( bool sync ) {
- return MongoFile::flushAll( sync );
- }
-
- bool MMAPV1Engine::isDurable() const {
- return getDur().isDurable();
- }
+int MMAPV1Engine::flushAllFiles(bool sync) {
+ return MongoFile::flushAll(sync);
+}
- RecordAccessTracker& MMAPV1Engine::getRecordAccessTracker() {
- return _recordAccessTracker;
- }
+bool MMAPV1Engine::isDurable() const {
+ return getDur().isDurable();
+}
- void MMAPV1Engine::cleanShutdown() {
- // wait until file preallocation finishes
- // we would only hang here if the file_allocator code generates a
- // synchronous signal, which we don't expect
- log() << "shutdown: waiting for fs preallocator..." << endl;
- FileAllocator::get()->waitUntilFinished();
+RecordAccessTracker& MMAPV1Engine::getRecordAccessTracker() {
+ return _recordAccessTracker;
+}
- if (storageGlobalParams.dur) {
- log() << "shutdown: final commit..." << endl;
+void MMAPV1Engine::cleanShutdown() {
+ // wait until file preallocation finishes
+ // we would only hang here if the file_allocator code generates a
+ // synchronous signal, which we don't expect
+ log() << "shutdown: waiting for fs preallocator..." << endl;
+ FileAllocator::get()->waitUntilFinished();
- getDur().commitAndStopDurThread();
- }
+ if (storageGlobalParams.dur) {
+ log() << "shutdown: final commit..." << endl;
- log() << "shutdown: closing all files..." << endl;
- stringstream ss3;
- MemoryMappedFile::closeAllFiles( ss3 );
- log() << ss3.str() << endl;
+ getDur().commitAndStopDurThread();
}
+
+ log() << "shutdown: closing all files..." << endl;
+ stringstream ss3;
+ MemoryMappedFile::closeAllFiles(ss3);
+ log() << ss3.str() << endl;
+}
}
diff --git a/src/mongo/db/storage/mmap_v1/mmap_v1_engine.h b/src/mongo/db/storage/mmap_v1/mmap_v1_engine.h
index 4141794c426..25c38500831 100644
--- a/src/mongo/db/storage/mmap_v1/mmap_v1_engine.h
+++ b/src/mongo/db/storage/mmap_v1/mmap_v1_engine.h
@@ -38,68 +38,70 @@
namespace mongo {
- class MMAPV1DatabaseCatalogEntry;
+class MMAPV1DatabaseCatalogEntry;
- class MMAPV1Engine : public StorageEngine {
- public:
- MMAPV1Engine(const StorageEngineLockFile& lockFile);
- virtual ~MMAPV1Engine();
+class MMAPV1Engine : public StorageEngine {
+public:
+ MMAPV1Engine(const StorageEngineLockFile& lockFile);
+ virtual ~MMAPV1Engine();
- void finishInit();
+ void finishInit();
- RecoveryUnit* newRecoveryUnit();
- void listDatabases( std::vector<std::string>* out ) const;
- int flushAllFiles( bool sync );
+ RecoveryUnit* newRecoveryUnit();
+ void listDatabases(std::vector<std::string>* out) const;
+ int flushAllFiles(bool sync);
- DatabaseCatalogEntry* getDatabaseCatalogEntry( OperationContext* opCtx,
- StringData db );
+ DatabaseCatalogEntry* getDatabaseCatalogEntry(OperationContext* opCtx, StringData db);
- virtual bool supportsDocLocking() const { return false; }
- virtual bool isMmapV1() const { return true; }
+ virtual bool supportsDocLocking() const {
+ return false;
+ }
+ virtual bool isMmapV1() const {
+ return true;
+ }
- virtual bool isDurable() const;
+ virtual bool isDurable() const;
- virtual Status closeDatabase(OperationContext* txn, StringData db);
+ virtual Status closeDatabase(OperationContext* txn, StringData db);
- virtual Status dropDatabase(OperationContext* txn, StringData db);
+ virtual Status dropDatabase(OperationContext* txn, StringData db);
- virtual void cleanShutdown();
+ virtual void cleanShutdown();
- // Callers should use repairDatabase instead.
- virtual Status repairRecordStore(OperationContext* txn, const std::string& ns) {
- return Status(ErrorCodes::InternalError, "MMAPv1 doesn't support repairRecordStore");
- }
+ // Callers should use repairDatabase instead.
+ virtual Status repairRecordStore(OperationContext* txn, const std::string& ns) {
+ return Status(ErrorCodes::InternalError, "MMAPv1 doesn't support repairRecordStore");
+ }
- // MMAPv1 specific (non-virtual)
- Status repairDatabase( OperationContext* txn,
- const std::string& dbName,
- bool preserveClonedFilesOnFailure,
- bool backupOriginalFiles );
+ // MMAPv1 specific (non-virtual)
+ Status repairDatabase(OperationContext* txn,
+ const std::string& dbName,
+ bool preserveClonedFilesOnFailure,
+ bool backupOriginalFiles);
- /**
- * Gets a reference to the abstraction used by MMAP v1 to track recently used memory
- * addresses.
- *
- * MMAPv1 specific (non-virtual). This is non-const because callers are allowed to use
- * the returned reference to modify the RecordAccessTracker.
- *
- * The RecordAccessTracker is thread-safe (it uses its own mutex internally).
- */
- RecordAccessTracker& getRecordAccessTracker();
+ /**
+ * Gets a reference to the abstraction used by MMAP v1 to track recently used memory
+ * addresses.
+ *
+ * MMAPv1 specific (non-virtual). This is non-const because callers are allowed to use
+ * the returned reference to modify the RecordAccessTracker.
+ *
+ * The RecordAccessTracker is thread-safe (it uses its own mutex internally).
+ */
+ RecordAccessTracker& getRecordAccessTracker();
- private:
- static void _listDatabases( const std::string& directory,
- std::vector<std::string>* out );
+private:
+ static void _listDatabases(const std::string& directory, std::vector<std::string>* out);
- stdx::mutex _entryMapMutex;
- typedef std::map<std::string,MMAPV1DatabaseCatalogEntry*> EntryMap;
- EntryMap _entryMap;
+ stdx::mutex _entryMapMutex;
+ typedef std::map<std::string, MMAPV1DatabaseCatalogEntry*> EntryMap;
+ EntryMap _entryMap;
- // A record access tracker is essentially a large table which tracks recently used
- // addresses. It is used when higher layers (e.g. the query system) need to ask
- // the storage engine whether data is likely in physical memory.
- RecordAccessTracker _recordAccessTracker;
- };
+ // A record access tracker is essentially a large table which tracks recently used
+ // addresses. It is used when higher layers (e.g. the query system) need to ask
+ // the storage engine whether data is likely in physical memory.
+ RecordAccessTracker _recordAccessTracker;
+};
- void _deleteDataFiles(const std::string& database);
+void _deleteDataFiles(const std::string& database);
}
diff --git a/src/mongo/db/storage/mmap_v1/mmap_v1_extent_manager.cpp b/src/mongo/db/storage/mmap_v1/mmap_v1_extent_manager.cpp
index ed4f160e1a9..69d80422e66 100644
--- a/src/mongo/db/storage/mmap_v1/mmap_v1_extent_manager.cpp
+++ b/src/mongo/db/storage/mmap_v1/mmap_v1_extent_manager.cpp
@@ -55,632 +55,612 @@
namespace mongo {
- using std::unique_ptr;
- using std::endl;
- using std::max;
- using std::string;
- using std::stringstream;
-
- // Turn on this failpoint to force the system to yield for a fetch. Setting to "alwaysOn"
- // will cause yields for fetching to occur on every 'kNeedsFetchFailFreq'th call to
- // recordNeedsFetch().
- static const int kNeedsFetchFailFreq = 2;
- static Counter64 needsFetchFailCounter;
- MONGO_FP_DECLARE(recordNeedsFetchFail);
-
- // Used to make sure the compiler doesn't get too smart on us when we're
- // trying to touch records.
- volatile int __record_touch_dummy = 1;
-
- class MmapV1RecordFetcher : public RecordFetcher {
- MONGO_DISALLOW_COPYING(MmapV1RecordFetcher);
- public:
- explicit MmapV1RecordFetcher(const MmapV1RecordHeader* record)
- : _record(record) { }
-
- virtual void setup() {
- invariant(!_filesLock.get());
- _filesLock.reset(new LockMongoFilesShared());
- }
+using std::unique_ptr;
+using std::endl;
+using std::max;
+using std::string;
+using std::stringstream;
+
+// Turn on this failpoint to force the system to yield for a fetch. Setting to "alwaysOn"
+// will cause yields for fetching to occur on every 'kNeedsFetchFailFreq'th call to
+// recordNeedsFetch().
+static const int kNeedsFetchFailFreq = 2;
+static Counter64 needsFetchFailCounter;
+MONGO_FP_DECLARE(recordNeedsFetchFail);
+
+// Used to make sure the compiler doesn't get too smart on us when we're
+// trying to touch records.
+volatile int __record_touch_dummy = 1;
+
+class MmapV1RecordFetcher : public RecordFetcher {
+ MONGO_DISALLOW_COPYING(MmapV1RecordFetcher);
+
+public:
+ explicit MmapV1RecordFetcher(const MmapV1RecordHeader* record) : _record(record) {}
+
+ virtual void setup() {
+ invariant(!_filesLock.get());
+ _filesLock.reset(new LockMongoFilesShared());
+ }
+
+ virtual void fetch() {
+ // It's only legal to touch the record while we're holding a lock on the data files.
+ invariant(_filesLock.get());
+
+ const char* recordChar = reinterpret_cast<const char*>(_record);
+
+ // Here's where we actually deference a pointer into the record. This is where
+ // we expect a page fault to occur, so we should this out of the lock.
+ __record_touch_dummy += *recordChar;
+
+ // We're not going to touch the record anymore, so we can give up our
+ // lock on mongo files. We do this here because we have to release the
+ // lock on mongo files prior to reacquiring lock mgr locks.
+ _filesLock.reset();
+ }
+
+private:
+ // The record which needs to be touched in order to page fault. Not owned by us.
+ const MmapV1RecordHeader* _record;
+
+ // This ensures that our MmapV1RecordHeader* does not drop out from under our feet before
+ // we dereference it.
+ std::unique_ptr<LockMongoFilesShared> _filesLock;
+};
+
+MmapV1ExtentManager::MmapV1ExtentManager(StringData dbname, StringData path, bool directoryPerDB)
+ : _dbname(dbname.toString()),
+ _path(path.toString()),
+ _directoryPerDB(directoryPerDB),
+ _rid(RESOURCE_METADATA, dbname) {
+ StorageEngine* engine = getGlobalServiceContext()->getGlobalStorageEngine();
+ invariant(engine->isMmapV1());
+ MMAPV1Engine* mmapEngine = static_cast<MMAPV1Engine*>(engine);
+ _recordAccessTracker = &mmapEngine->getRecordAccessTracker();
+}
- virtual void fetch() {
- // It's only legal to touch the record while we're holding a lock on the data files.
- invariant(_filesLock.get());
+boost::filesystem::path MmapV1ExtentManager::_fileName(int n) const {
+ stringstream ss;
+ ss << _dbname << '.' << n;
+ boost::filesystem::path fullName(_path);
+ if (_directoryPerDB)
+ fullName /= _dbname;
+ fullName /= ss.str();
+ return fullName;
+}
- const char* recordChar = reinterpret_cast<const char*>(_record);
- // Here's where we actually deference a pointer into the record. This is where
- // we expect a page fault to occur, so we should this out of the lock.
- __record_touch_dummy += *recordChar;
+Status MmapV1ExtentManager::init(OperationContext* txn) {
+ invariant(_files.empty());
- // We're not going to touch the record anymore, so we can give up our
- // lock on mongo files. We do this here because we have to release the
- // lock on mongo files prior to reacquiring lock mgr locks.
- _filesLock.reset();
+ for (int n = 0; n < DiskLoc::MaxFiles; n++) {
+ const boost::filesystem::path fullName = _fileName(n);
+ if (!boost::filesystem::exists(fullName)) {
+ break;
}
- private:
- // The record which needs to be touched in order to page fault. Not owned by us.
- const MmapV1RecordHeader* _record;
-
- // This ensures that our MmapV1RecordHeader* does not drop out from under our feet before
- // we dereference it.
- std::unique_ptr<LockMongoFilesShared> _filesLock;
- };
-
- MmapV1ExtentManager::MmapV1ExtentManager(StringData dbname,
- StringData path,
- bool directoryPerDB)
- : _dbname(dbname.toString()),
- _path(path.toString()),
- _directoryPerDB(directoryPerDB),
- _rid(RESOURCE_METADATA, dbname) {
- StorageEngine* engine = getGlobalServiceContext()->getGlobalStorageEngine();
- invariant(engine->isMmapV1());
- MMAPV1Engine* mmapEngine = static_cast<MMAPV1Engine*>(engine);
- _recordAccessTracker = &mmapEngine->getRecordAccessTracker();
- }
+ const std::string fullNameString = fullName.string();
- boost::filesystem::path MmapV1ExtentManager::_fileName(int n) const {
- stringstream ss;
- ss << _dbname << '.' << n;
- boost::filesystem::path fullName( _path );
- if ( _directoryPerDB )
- fullName /= _dbname;
- fullName /= ss.str();
- return fullName;
- }
-
-
- Status MmapV1ExtentManager::init(OperationContext* txn) {
- invariant(_files.empty());
-
- for (int n = 0; n < DiskLoc::MaxFiles; n++) {
- const boost::filesystem::path fullName = _fileName(n);
- if (!boost::filesystem::exists(fullName)) {
+ {
+ // If the file is uninitialized we exit the loop because it is just prealloced. We
+ // do this on a bare File object rather than using the DataFile because closing a
+ // DataFile triggers dur::closingFileNotification() which is fatal if there are any
+ // pending writes. Therefore we must only open files that we know we want to keep.
+ File preview;
+ preview.open(fullNameString.c_str(), /*readOnly*/ true);
+ invariant(preview.is_open());
+
+ // File can't be initialized if too small.
+ if (preview.len() < sizeof(DataFileHeader)) {
break;
}
- const std::string fullNameString = fullName.string();
-
- {
- // If the file is uninitialized we exit the loop because it is just prealloced. We
- // do this on a bare File object rather than using the DataFile because closing a
- // DataFile triggers dur::closingFileNotification() which is fatal if there are any
- // pending writes. Therefore we must only open files that we know we want to keep.
- File preview;
- preview.open(fullNameString.c_str(), /*readOnly*/ true);
- invariant(preview.is_open());
-
- // File can't be initialized if too small.
- if (preview.len() < sizeof(DataFileHeader)) {
- break;
- }
-
- // This is the equivalent of DataFileHeader::uninitialized().
- int version;
- preview.read(0, reinterpret_cast<char*>(&version), sizeof(version));
- invariant(!preview.bad());
- if (version == 0) {
- break;
- }
- }
-
- unique_ptr<DataFile> df(new DataFile(n));
-
- Status s = df->openExisting(fullNameString.c_str());
- if (!s.isOK()) {
- return s;
+ // This is the equivalent of DataFileHeader::uninitialized().
+ int version;
+ preview.read(0, reinterpret_cast<char*>(&version), sizeof(version));
+ invariant(!preview.bad());
+ if (version == 0) {
+ break;
}
+ }
- invariant(!df->getHeader()->uninitialized());
-
- // We only checkUpgrade on files that we are keeping, not preallocs.
- df->getHeader()->checkUpgrade(txn);
+ unique_ptr<DataFile> df(new DataFile(n));
- _files.push_back( df.release() );
+ Status s = df->openExisting(fullNameString.c_str());
+ if (!s.isOK()) {
+ return s;
}
- // If this is a new database being created, instantiate the first file and one extent so
- // we can have a coherent database.
- if (_files.empty()) {
- WriteUnitOfWork wuow(txn);
- _createExtent(txn, initialSize(128), false);
- wuow.commit();
+ invariant(!df->getHeader()->uninitialized());
- // Commit the journal and all changes to disk so that even if exceptions occur during
- // subsequent initialization, we won't have uncommited changes during file close.
- getDur().commitNow(txn);
- }
+ // We only checkUpgrade on files that we are keeping, not preallocs.
+ df->getHeader()->checkUpgrade(txn);
- return Status::OK();
+ _files.push_back(df.release());
}
- const DataFile* MmapV1ExtentManager::_getOpenFile(int fileId) const {
- if (fileId < 0 || fileId >= _files.size()) {
- log() << "_getOpenFile() invalid file index requested " << fileId;
- invariant(false);
- }
+ // If this is a new database being created, instantiate the first file and one extent so
+ // we can have a coherent database.
+ if (_files.empty()) {
+ WriteUnitOfWork wuow(txn);
+ _createExtent(txn, initialSize(128), false);
+ wuow.commit();
- return _files[fileId];
+ // Commit the journal and all changes to disk so that even if exceptions occur during
+ // subsequent initialization, we won't have uncommited changes during file close.
+ getDur().commitNow(txn);
}
- DataFile* MmapV1ExtentManager::_getOpenFile(int fileId) {
- if (fileId < 0 || fileId >= _files.size()) {
- log() << "_getOpenFile() invalid file index requested " << fileId;
- invariant(false);
- }
+ return Status::OK();
+}
- return _files[fileId];
+const DataFile* MmapV1ExtentManager::_getOpenFile(int fileId) const {
+ if (fileId < 0 || fileId >= _files.size()) {
+ log() << "_getOpenFile() invalid file index requested " << fileId;
+ invariant(false);
}
- DataFile* MmapV1ExtentManager::_addAFile(OperationContext* txn,
- int sizeNeeded,
- bool preallocateNextFile) {
-
- // Database must be stable and we need to be in some sort of an update operation in order
- // to add a new file.
- invariant(txn->lockState()->isDbLockedForMode(_dbname, MODE_IX));
+ return _files[fileId];
+}
- const int allocFileId = _files.size();
+DataFile* MmapV1ExtentManager::_getOpenFile(int fileId) {
+ if (fileId < 0 || fileId >= _files.size()) {
+ log() << "_getOpenFile() invalid file index requested " << fileId;
+ invariant(false);
+ }
- int minSize = 0;
- if (allocFileId > 0) {
- // Make the next file at least as large as the previous
- minSize = _files[allocFileId - 1]->getHeader()->fileLength;
- }
+ return _files[fileId];
+}
- if (minSize < sizeNeeded + DataFileHeader::HeaderSize) {
- minSize = sizeNeeded + DataFileHeader::HeaderSize;
- }
+DataFile* MmapV1ExtentManager::_addAFile(OperationContext* txn,
+ int sizeNeeded,
+ bool preallocateNextFile) {
+ // Database must be stable and we need to be in some sort of an update operation in order
+ // to add a new file.
+ invariant(txn->lockState()->isDbLockedForMode(_dbname, MODE_IX));
- {
- unique_ptr<DataFile> allocFile(new DataFile(allocFileId));
- const string allocFileName = _fileName(allocFileId).string();
+ const int allocFileId = _files.size();
- Timer t;
+ int minSize = 0;
+ if (allocFileId > 0) {
+ // Make the next file at least as large as the previous
+ minSize = _files[allocFileId - 1]->getHeader()->fileLength;
+ }
- allocFile->open(txn, allocFileName.c_str(), minSize, false);
- if (t.seconds() > 1) {
- log() << "MmapV1ExtentManager took "
- << t.seconds()
- << " seconds to open: "
- << allocFileName;
- }
+ if (minSize < sizeNeeded + DataFileHeader::HeaderSize) {
+ minSize = sizeNeeded + DataFileHeader::HeaderSize;
+ }
- // It's all good
- _files.push_back(allocFile.release());
- }
+ {
+ unique_ptr<DataFile> allocFile(new DataFile(allocFileId));
+ const string allocFileName = _fileName(allocFileId).string();
- // Preallocate is asynchronous
- if (preallocateNextFile) {
- unique_ptr<DataFile> nextFile(new DataFile(allocFileId + 1));
- const string nextFileName = _fileName(allocFileId + 1).string();
+ Timer t;
- nextFile->open(txn, nextFileName.c_str(), minSize, false);
+ allocFile->open(txn, allocFileName.c_str(), minSize, false);
+ if (t.seconds() > 1) {
+ log() << "MmapV1ExtentManager took " << t.seconds()
+ << " seconds to open: " << allocFileName;
}
- // Returns the last file added
- return _files[allocFileId];
- }
-
- int MmapV1ExtentManager::numFiles() const {
- return _files.size();
+ // It's all good
+ _files.push_back(allocFile.release());
}
- long long MmapV1ExtentManager::fileSize() const {
- long long size = 0;
- for (int n = 0; boost::filesystem::exists(_fileName(n)); n++) {
- size += boost::filesystem::file_size(_fileName(n));
- }
+ // Preallocate is asynchronous
+ if (preallocateNextFile) {
+ unique_ptr<DataFile> nextFile(new DataFile(allocFileId + 1));
+ const string nextFileName = _fileName(allocFileId + 1).string();
- return size;
+ nextFile->open(txn, nextFileName.c_str(), minSize, false);
}
- MmapV1RecordHeader* MmapV1ExtentManager::_recordForV1( const DiskLoc& loc ) const {
- loc.assertOk();
- const DataFile* df = _getOpenFile( loc.a() );
+ // Returns the last file added
+ return _files[allocFileId];
+}
- int ofs = loc.getOfs();
- if ( ofs < DataFileHeader::HeaderSize ) {
- df->badOfs(ofs); // will msgassert - external call to keep out of the normal code path
- }
+int MmapV1ExtentManager::numFiles() const {
+ return _files.size();
+}
- return reinterpret_cast<MmapV1RecordHeader*>( df->p() + ofs );
+long long MmapV1ExtentManager::fileSize() const {
+ long long size = 0;
+ for (int n = 0; boost::filesystem::exists(_fileName(n)); n++) {
+ size += boost::filesystem::file_size(_fileName(n));
}
- MmapV1RecordHeader* MmapV1ExtentManager::recordForV1( const DiskLoc& loc ) const {
- MmapV1RecordHeader* record = _recordForV1( loc );
- _recordAccessTracker->markAccessed( record );
- return record;
- }
+ return size;
+}
- std::unique_ptr<RecordFetcher> MmapV1ExtentManager::recordNeedsFetch(const DiskLoc& loc) const {
- if (loc.isNull()) return {};
- MmapV1RecordHeader* record = _recordForV1( loc );
+MmapV1RecordHeader* MmapV1ExtentManager::_recordForV1(const DiskLoc& loc) const {
+ loc.assertOk();
+ const DataFile* df = _getOpenFile(loc.a());
- // For testing: if failpoint is enabled we randomly request fetches without
- // going to the RecordAccessTracker.
- if ( MONGO_FAIL_POINT( recordNeedsFetchFail ) ) {
- needsFetchFailCounter.increment();
- if ( ( needsFetchFailCounter.get() % kNeedsFetchFailFreq ) == 0 ) {
- return stdx::make_unique<MmapV1RecordFetcher>( record );
- }
- }
+ int ofs = loc.getOfs();
+ if (ofs < DataFileHeader::HeaderSize) {
+ df->badOfs(ofs); // will msgassert - external call to keep out of the normal code path
+ }
- if ( !_recordAccessTracker->checkAccessedAndMark( record ) ) {
- return stdx::make_unique<MmapV1RecordFetcher>( record );
- }
+ return reinterpret_cast<MmapV1RecordHeader*>(df->p() + ofs);
+}
+
+MmapV1RecordHeader* MmapV1ExtentManager::recordForV1(const DiskLoc& loc) const {
+ MmapV1RecordHeader* record = _recordForV1(loc);
+ _recordAccessTracker->markAccessed(record);
+ return record;
+}
+std::unique_ptr<RecordFetcher> MmapV1ExtentManager::recordNeedsFetch(const DiskLoc& loc) const {
+ if (loc.isNull())
return {};
+ MmapV1RecordHeader* record = _recordForV1(loc);
+
+ // For testing: if failpoint is enabled we randomly request fetches without
+ // going to the RecordAccessTracker.
+ if (MONGO_FAIL_POINT(recordNeedsFetchFail)) {
+ needsFetchFailCounter.increment();
+ if ((needsFetchFailCounter.get() % kNeedsFetchFailFreq) == 0) {
+ return stdx::make_unique<MmapV1RecordFetcher>(record);
+ }
}
- DiskLoc MmapV1ExtentManager::extentLocForV1( const DiskLoc& loc ) const {
- MmapV1RecordHeader* record = recordForV1( loc );
- return DiskLoc( loc.a(), record->extentOfs() );
+ if (!_recordAccessTracker->checkAccessedAndMark(record)) {
+ return stdx::make_unique<MmapV1RecordFetcher>(record);
}
- Extent* MmapV1ExtentManager::extentForV1( const DiskLoc& loc ) const {
- DiskLoc extentLoc = extentLocForV1( loc );
- return getExtent( extentLoc );
- }
+ return {};
+}
- Extent* MmapV1ExtentManager::getExtent( const DiskLoc& loc, bool doSanityCheck ) const {
- loc.assertOk();
- Extent* e = reinterpret_cast<Extent*>( _getOpenFile( loc.a() )->p() + loc.getOfs() );
- if ( doSanityCheck )
- e->assertOk();
+DiskLoc MmapV1ExtentManager::extentLocForV1(const DiskLoc& loc) const {
+ MmapV1RecordHeader* record = recordForV1(loc);
+ return DiskLoc(loc.a(), record->extentOfs());
+}
- _recordAccessTracker->markAccessed( e );
+Extent* MmapV1ExtentManager::extentForV1(const DiskLoc& loc) const {
+ DiskLoc extentLoc = extentLocForV1(loc);
+ return getExtent(extentLoc);
+}
- return e;
- }
+Extent* MmapV1ExtentManager::getExtent(const DiskLoc& loc, bool doSanityCheck) const {
+ loc.assertOk();
+ Extent* e = reinterpret_cast<Extent*>(_getOpenFile(loc.a())->p() + loc.getOfs());
+ if (doSanityCheck)
+ e->assertOk();
- void _checkQuota( bool enforceQuota, int fileNo ) {
- if ( !enforceQuota )
- return;
+ _recordAccessTracker->markAccessed(e);
- if ( fileNo < mmapv1GlobalOptions.quotaFiles )
- return;
+ return e;
+}
- uasserted(12501, "quota exceeded");
- }
+void _checkQuota(bool enforceQuota, int fileNo) {
+ if (!enforceQuota)
+ return;
- int MmapV1ExtentManager::maxSize() const {
- return DataFile::maxSize() - DataFileHeader::HeaderSize - 16;
- }
+ if (fileNo < mmapv1GlobalOptions.quotaFiles)
+ return;
- DiskLoc MmapV1ExtentManager::_createExtentInFile( OperationContext* txn,
- int fileNo,
- DataFile* f,
- int size,
- bool enforceQuota ) {
+ uasserted(12501, "quota exceeded");
+}
- _checkQuota( enforceQuota, fileNo - 1 );
+int MmapV1ExtentManager::maxSize() const {
+ return DataFile::maxSize() - DataFileHeader::HeaderSize - 16;
+}
- massert( 10358, "bad new extent size", size >= minSize() && size <= maxSize() );
+DiskLoc MmapV1ExtentManager::_createExtentInFile(
+ OperationContext* txn, int fileNo, DataFile* f, int size, bool enforceQuota) {
+ _checkQuota(enforceQuota, fileNo - 1);
- DiskLoc loc = f->allocExtentArea( txn, size );
- loc.assertOk();
+ massert(10358, "bad new extent size", size >= minSize() && size <= maxSize());
- Extent *e = getExtent( loc, false );
- verify( e );
+ DiskLoc loc = f->allocExtentArea(txn, size);
+ loc.assertOk();
- *txn->recoveryUnit()->writing(&e->magic) = Extent::extentSignature;
- *txn->recoveryUnit()->writing(&e->myLoc) = loc;
- *txn->recoveryUnit()->writing(&e->length) = size;
+ Extent* e = getExtent(loc, false);
+ verify(e);
- return loc;
- }
+ *txn->recoveryUnit()->writing(&e->magic) = Extent::extentSignature;
+ *txn->recoveryUnit()->writing(&e->myLoc) = loc;
+ *txn->recoveryUnit()->writing(&e->length) = size;
+ return loc;
+}
- DiskLoc MmapV1ExtentManager::_createExtent( OperationContext* txn,
- int size,
- bool enforceQuota ) {
- size = quantizeExtentSize( size );
- if ( size > maxSize() )
- size = maxSize();
+DiskLoc MmapV1ExtentManager::_createExtent(OperationContext* txn, int size, bool enforceQuota) {
+ size = quantizeExtentSize(size);
- verify( size < DataFile::maxSize() );
+ if (size > maxSize())
+ size = maxSize();
- for ( int i = numFiles() - 1; i >= 0; i-- ) {
- DataFile* f = _getOpenFile(i);
- invariant(f);
+ verify(size < DataFile::maxSize());
- if ( f->getHeader()->unusedLength >= size ) {
- return _createExtentInFile( txn, i, f, size, enforceQuota );
- }
- }
+ for (int i = numFiles() - 1; i >= 0; i--) {
+ DataFile* f = _getOpenFile(i);
+ invariant(f);
- _checkQuota( enforceQuota, numFiles() );
+ if (f->getHeader()->unusedLength >= size) {
+ return _createExtentInFile(txn, i, f, size, enforceQuota);
+ }
+ }
- // no space in an existing file
- // allocate files until we either get one big enough or hit maxSize
- for ( int i = 0; i < 8; i++ ) {
- DataFile* f = _addAFile( txn, size, false );
+ _checkQuota(enforceQuota, numFiles());
- if ( f->getHeader()->unusedLength >= size ) {
- return _createExtentInFile( txn, numFiles() - 1, f, size, enforceQuota );
- }
+ // no space in an existing file
+ // allocate files until we either get one big enough or hit maxSize
+ for (int i = 0; i < 8; i++) {
+ DataFile* f = _addAFile(txn, size, false);
+ if (f->getHeader()->unusedLength >= size) {
+ return _createExtentInFile(txn, numFiles() - 1, f, size, enforceQuota);
}
-
- // callers don't check for null return code, so assert
- msgasserted(14810, "couldn't allocate space for a new extent" );
}
- DiskLoc MmapV1ExtentManager::_allocFromFreeList( OperationContext* txn,
- int approxSize,
- bool capped ) {
- // setup extent constraints
-
- int low, high;
- if ( capped ) {
- // be strict about the size
- low = approxSize;
- if ( low > 2048 ) low -= 256;
- high = (int) (approxSize * 1.05) + 256;
- }
- else {
- low = (int) (approxSize * 0.8);
- high = (int) (approxSize * 1.4);
- }
- if ( high <= 0 ) {
- // overflowed
- high = max(approxSize, maxSize());
- }
- if ( high <= minSize() ) {
- // the minimum extent size is 4097
- high = minSize() + 1;
- }
-
- // scan free list looking for something suitable
+ // callers don't check for null return code, so assert
+ msgasserted(14810, "couldn't allocate space for a new extent");
+}
- int n = 0;
- Extent *best = 0;
- int bestDiff = 0x7fffffff;
- {
- Timer t;
- DiskLoc L = _getFreeListStart();
- while( !L.isNull() ) {
- Extent* e = getExtent( L );
- if ( e->length >= low && e->length <= high ) {
- int diff = abs(e->length - approxSize);
- if ( diff < bestDiff ) {
- bestDiff = diff;
- best = e;
- if ( ((double) diff) / approxSize < 0.1 ) {
- // close enough
- break;
- }
- if ( t.seconds() >= 2 ) {
- // have spent lots of time in write lock, and we are in [low,high], so close enough
- // could come into play if extent freelist is very long
- break;
- }
+DiskLoc MmapV1ExtentManager::_allocFromFreeList(OperationContext* txn,
+ int approxSize,
+ bool capped) {
+ // setup extent constraints
+
+ int low, high;
+ if (capped) {
+ // be strict about the size
+ low = approxSize;
+ if (low > 2048)
+ low -= 256;
+ high = (int)(approxSize * 1.05) + 256;
+ } else {
+ low = (int)(approxSize * 0.8);
+ high = (int)(approxSize * 1.4);
+ }
+ if (high <= 0) {
+ // overflowed
+ high = max(approxSize, maxSize());
+ }
+ if (high <= minSize()) {
+ // the minimum extent size is 4097
+ high = minSize() + 1;
+ }
+
+ // scan free list looking for something suitable
+
+ int n = 0;
+ Extent* best = 0;
+ int bestDiff = 0x7fffffff;
+ {
+ Timer t;
+ DiskLoc L = _getFreeListStart();
+ while (!L.isNull()) {
+ Extent* e = getExtent(L);
+ if (e->length >= low && e->length <= high) {
+ int diff = abs(e->length - approxSize);
+ if (diff < bestDiff) {
+ bestDiff = diff;
+ best = e;
+ if (((double)diff) / approxSize < 0.1) {
+ // close enough
+ break;
}
- else {
- OCCASIONALLY {
- if ( high < 64 * 1024 && t.seconds() >= 2 ) {
- // be less picky if it is taking a long time
- high = 64 * 1024;
- }
+ if (t.seconds() >= 2) {
+ // have spent lots of time in write lock, and we are in [low,high], so close enough
+ // could come into play if extent freelist is very long
+ break;
+ }
+ } else {
+ OCCASIONALLY {
+ if (high < 64 * 1024 && t.seconds() >= 2) {
+ // be less picky if it is taking a long time
+ high = 64 * 1024;
}
}
}
- L = e->xnext;
- ++n;
- }
- if ( t.seconds() >= 10 ) {
- log() << "warning: slow scan in allocFromFreeList (in write lock)" << endl;
}
+ L = e->xnext;
+ ++n;
}
+ if (t.seconds() >= 10) {
+ log() << "warning: slow scan in allocFromFreeList (in write lock)" << endl;
+ }
+ }
- if ( n > 128 ) { LOG( n < 512 ? 1 : 0 ) << "warning: newExtent " << n << " scanned\n"; }
-
- if ( !best )
- return DiskLoc();
-
- // remove from the free list
- if ( !best->xprev.isNull() )
- *txn->recoveryUnit()->writing(&getExtent( best->xprev )->xnext) = best->xnext;
- if ( !best->xnext.isNull() )
- *txn->recoveryUnit()->writing(&getExtent( best->xnext )->xprev) = best->xprev;
- if ( _getFreeListStart() == best->myLoc )
- _setFreeListStart( txn, best->xnext );
- if ( _getFreeListEnd() == best->myLoc )
- _setFreeListEnd( txn, best->xprev );
-
- return best->myLoc;
+ if (n > 128) {
+ LOG(n < 512 ? 1 : 0) << "warning: newExtent " << n << " scanned\n";
}
- DiskLoc MmapV1ExtentManager::allocateExtent(OperationContext* txn,
- bool capped,
- int size,
- bool enforceQuota) {
- Lock::ResourceLock rlk(txn->lockState(), _rid, MODE_X);
- bool fromFreeList = true;
- DiskLoc eloc = _allocFromFreeList( txn, size, capped );
- if ( eloc.isNull() ) {
- fromFreeList = false;
- eloc = _createExtent( txn, size, enforceQuota );
- }
+ if (!best)
+ return DiskLoc();
- invariant( !eloc.isNull() );
- invariant( eloc.isValid() );
+ // remove from the free list
+ if (!best->xprev.isNull())
+ *txn->recoveryUnit()->writing(&getExtent(best->xprev)->xnext) = best->xnext;
+ if (!best->xnext.isNull())
+ *txn->recoveryUnit()->writing(&getExtent(best->xnext)->xprev) = best->xprev;
+ if (_getFreeListStart() == best->myLoc)
+ _setFreeListStart(txn, best->xnext);
+ if (_getFreeListEnd() == best->myLoc)
+ _setFreeListEnd(txn, best->xprev);
- LOG(1) << "MmapV1ExtentManager::allocateExtent"
- << " desiredSize:" << size
- << " fromFreeList: " << fromFreeList
- << " eloc: " << eloc;
+ return best->myLoc;
+}
- return eloc;
+DiskLoc MmapV1ExtentManager::allocateExtent(OperationContext* txn,
+ bool capped,
+ int size,
+ bool enforceQuota) {
+ Lock::ResourceLock rlk(txn->lockState(), _rid, MODE_X);
+ bool fromFreeList = true;
+ DiskLoc eloc = _allocFromFreeList(txn, size, capped);
+ if (eloc.isNull()) {
+ fromFreeList = false;
+ eloc = _createExtent(txn, size, enforceQuota);
}
- void MmapV1ExtentManager::freeExtent(OperationContext* txn, DiskLoc firstExt ) {
- Lock::ResourceLock rlk(txn->lockState(), _rid, MODE_X);
- Extent* e = getExtent( firstExt );
- txn->recoveryUnit()->writing( &e->xnext )->Null();
- txn->recoveryUnit()->writing( &e->xprev )->Null();
- txn->recoveryUnit()->writing( &e->firstRecord )->Null();
- txn->recoveryUnit()->writing( &e->lastRecord )->Null();
-
-
- if( _getFreeListStart().isNull() ) {
- _setFreeListStart( txn, firstExt );
- _setFreeListEnd( txn, firstExt );
- }
- else {
- DiskLoc a = _getFreeListStart();
- invariant( getExtent( a )->xprev.isNull() );
- *txn->recoveryUnit()->writing( &getExtent( a )->xprev ) = firstExt;
- *txn->recoveryUnit()->writing( &getExtent( firstExt )->xnext ) = a;
- _setFreeListStart( txn, firstExt );
- }
+ invariant(!eloc.isNull());
+ invariant(eloc.isValid());
- }
+ LOG(1) << "MmapV1ExtentManager::allocateExtent"
+ << " desiredSize:" << size << " fromFreeList: " << fromFreeList << " eloc: " << eloc;
- void MmapV1ExtentManager::freeExtents(OperationContext* txn, DiskLoc firstExt, DiskLoc lastExt) {
- Lock::ResourceLock rlk(txn->lockState(), _rid, MODE_X);
+ return eloc;
+}
- if ( firstExt.isNull() && lastExt.isNull() )
- return;
+void MmapV1ExtentManager::freeExtent(OperationContext* txn, DiskLoc firstExt) {
+ Lock::ResourceLock rlk(txn->lockState(), _rid, MODE_X);
+ Extent* e = getExtent(firstExt);
+ txn->recoveryUnit()->writing(&e->xnext)->Null();
+ txn->recoveryUnit()->writing(&e->xprev)->Null();
+ txn->recoveryUnit()->writing(&e->firstRecord)->Null();
+ txn->recoveryUnit()->writing(&e->lastRecord)->Null();
- {
- verify( !firstExt.isNull() && !lastExt.isNull() );
- Extent *f = getExtent( firstExt );
- Extent *l = getExtent( lastExt );
- verify( f->xprev.isNull() );
- verify( l->xnext.isNull() );
- verify( f==l || !f->xnext.isNull() );
- verify( f==l || !l->xprev.isNull() );
- }
- if( _getFreeListStart().isNull() ) {
- _setFreeListStart( txn, firstExt );
- _setFreeListEnd( txn, lastExt );
- }
- else {
- DiskLoc a = _getFreeListStart();
- invariant( getExtent( a )->xprev.isNull() );
- *txn->recoveryUnit()->writing( &getExtent( a )->xprev ) = lastExt;
- *txn->recoveryUnit()->writing( &getExtent( lastExt )->xnext ) = a;
- _setFreeListStart( txn, firstExt );
- }
+ if (_getFreeListStart().isNull()) {
+ _setFreeListStart(txn, firstExt);
+ _setFreeListEnd(txn, firstExt);
+ } else {
+ DiskLoc a = _getFreeListStart();
+ invariant(getExtent(a)->xprev.isNull());
+ *txn->recoveryUnit()->writing(&getExtent(a)->xprev) = firstExt;
+ *txn->recoveryUnit()->writing(&getExtent(firstExt)->xnext) = a;
+ _setFreeListStart(txn, firstExt);
}
+}
- DiskLoc MmapV1ExtentManager::_getFreeListStart() const {
- if ( _files.empty() )
- return DiskLoc();
- const DataFile* file = _getOpenFile(0);
- return file->header()->freeListStart;
- }
+void MmapV1ExtentManager::freeExtents(OperationContext* txn, DiskLoc firstExt, DiskLoc lastExt) {
+ Lock::ResourceLock rlk(txn->lockState(), _rid, MODE_X);
- DiskLoc MmapV1ExtentManager::_getFreeListEnd() const {
- if ( _files.empty() )
- return DiskLoc();
- const DataFile* file = _getOpenFile(0);
- return file->header()->freeListEnd;
- }
+ if (firstExt.isNull() && lastExt.isNull())
+ return;
- void MmapV1ExtentManager::_setFreeListStart( OperationContext* txn, DiskLoc loc ) {
- invariant( !_files.empty() );
- DataFile* file = _files[0];
- *txn->recoveryUnit()->writing( &file->header()->freeListStart ) = loc;
+ {
+ verify(!firstExt.isNull() && !lastExt.isNull());
+ Extent* f = getExtent(firstExt);
+ Extent* l = getExtent(lastExt);
+ verify(f->xprev.isNull());
+ verify(l->xnext.isNull());
+ verify(f == l || !f->xnext.isNull());
+ verify(f == l || !l->xprev.isNull());
}
- void MmapV1ExtentManager::_setFreeListEnd( OperationContext* txn, DiskLoc loc ) {
- invariant( !_files.empty() );
- DataFile* file = _files[0];
- *txn->recoveryUnit()->writing( &file->header()->freeListEnd ) = loc;
+ if (_getFreeListStart().isNull()) {
+ _setFreeListStart(txn, firstExt);
+ _setFreeListEnd(txn, lastExt);
+ } else {
+ DiskLoc a = _getFreeListStart();
+ invariant(getExtent(a)->xprev.isNull());
+ *txn->recoveryUnit()->writing(&getExtent(a)->xprev) = lastExt;
+ *txn->recoveryUnit()->writing(&getExtent(lastExt)->xnext) = a;
+ _setFreeListStart(txn, firstExt);
}
+}
- void MmapV1ExtentManager::freeListStats(OperationContext* txn,
- int* numExtents,
- int64_t* totalFreeSizeBytes) const {
- Lock::ResourceLock rlk(txn->lockState(), _rid, MODE_S);
+DiskLoc MmapV1ExtentManager::_getFreeListStart() const {
+ if (_files.empty())
+ return DiskLoc();
+ const DataFile* file = _getOpenFile(0);
+ return file->header()->freeListStart;
+}
- invariant(numExtents);
- invariant(totalFreeSizeBytes);
+DiskLoc MmapV1ExtentManager::_getFreeListEnd() const {
+ if (_files.empty())
+ return DiskLoc();
+ const DataFile* file = _getOpenFile(0);
+ return file->header()->freeListEnd;
+}
- *numExtents = 0;
- *totalFreeSizeBytes = 0;
+void MmapV1ExtentManager::_setFreeListStart(OperationContext* txn, DiskLoc loc) {
+ invariant(!_files.empty());
+ DataFile* file = _files[0];
+ *txn->recoveryUnit()->writing(&file->header()->freeListStart) = loc;
+}
- DiskLoc a = _getFreeListStart();
- while( !a.isNull() ) {
- Extent *e = getExtent( a );
- (*numExtents)++;
- (*totalFreeSizeBytes) += e->length;
- a = e->xnext;
- }
+void MmapV1ExtentManager::_setFreeListEnd(OperationContext* txn, DiskLoc loc) {
+ invariant(!_files.empty());
+ DataFile* file = _files[0];
+ *txn->recoveryUnit()->writing(&file->header()->freeListEnd) = loc;
+}
- }
+void MmapV1ExtentManager::freeListStats(OperationContext* txn,
+ int* numExtents,
+ int64_t* totalFreeSizeBytes) const {
+ Lock::ResourceLock rlk(txn->lockState(), _rid, MODE_S);
- void MmapV1ExtentManager::printFreeList() const {
- log() << "dump freelist " << _dbname << endl;
+ invariant(numExtents);
+ invariant(totalFreeSizeBytes);
- DiskLoc a = _getFreeListStart();
- while( !a.isNull() ) {
- Extent *e = getExtent( a );
- log() << " extent " << a.toString()
- << " len:" << e->length
- << " prev:" << e->xprev.toString() << endl;
- a = e->xnext;
- }
+ *numExtents = 0;
+ *totalFreeSizeBytes = 0;
- log() << "end freelist" << endl;
+ DiskLoc a = _getFreeListStart();
+ while (!a.isNull()) {
+ Extent* e = getExtent(a);
+ (*numExtents)++;
+ (*totalFreeSizeBytes) += e->length;
+ a = e->xnext;
}
+}
- namespace {
- class CacheHintMadvise : public ExtentManager::CacheHint {
- public:
- CacheHintMadvise(void *p, unsigned len, MAdvise::Advice a)
- : _advice( p, len, a ) {
- }
- private:
- MAdvise _advice;
- };
- }
+void MmapV1ExtentManager::printFreeList() const {
+ log() << "dump freelist " << _dbname << endl;
- ExtentManager::CacheHint* MmapV1ExtentManager::cacheHint( const DiskLoc& extentLoc,
- const ExtentManager::HintType& hint ) {
- invariant ( hint == Sequential );
- Extent* e = getExtent( extentLoc );
- return new CacheHintMadvise( reinterpret_cast<void*>( e ),
- e->length,
- MAdvise::Sequential );
+ DiskLoc a = _getFreeListStart();
+ while (!a.isNull()) {
+ Extent* e = getExtent(a);
+ log() << " extent " << a.toString() << " len:" << e->length
+ << " prev:" << e->xprev.toString() << endl;
+ a = e->xnext;
}
- MmapV1ExtentManager::FilesArray::~FilesArray() {
- for (int i = 0; i < size(); i++) {
- delete _files[i];
- }
- }
+ log() << "end freelist" << endl;
+}
- void MmapV1ExtentManager::FilesArray::push_back(DataFile* val) {
- stdx::lock_guard<stdx::mutex> lk(_writersMutex);
- const int n = _size.load();
- invariant(n < DiskLoc::MaxFiles);
- // Note ordering: _size update must come after updating the _files array
- _files[n] = val;
- _size.store(n + 1);
- }
+namespace {
+class CacheHintMadvise : public ExtentManager::CacheHint {
+public:
+ CacheHintMadvise(void* p, unsigned len, MAdvise::Advice a) : _advice(p, len, a) {}
+
+private:
+ MAdvise _advice;
+};
+}
- DataFileVersion MmapV1ExtentManager::getFileFormat(OperationContext* txn) const {
- if ( numFiles() == 0 )
- return DataFileVersion(0, 0);
+ExtentManager::CacheHint* MmapV1ExtentManager::cacheHint(const DiskLoc& extentLoc,
+ const ExtentManager::HintType& hint) {
+ invariant(hint == Sequential);
+ Extent* e = getExtent(extentLoc);
+ return new CacheHintMadvise(reinterpret_cast<void*>(e), e->length, MAdvise::Sequential);
+}
- // We explicitly only look at the first file.
- return _getOpenFile(0)->getHeader()->version;
+MmapV1ExtentManager::FilesArray::~FilesArray() {
+ for (int i = 0; i < size(); i++) {
+ delete _files[i];
}
+}
- void MmapV1ExtentManager::setFileFormat(OperationContext* txn, DataFileVersion newVersion) {
- invariant(numFiles() > 0);
+void MmapV1ExtentManager::FilesArray::push_back(DataFile* val) {
+ stdx::lock_guard<stdx::mutex> lk(_writersMutex);
+ const int n = _size.load();
+ invariant(n < DiskLoc::MaxFiles);
+ // Note ordering: _size update must come after updating the _files array
+ _files[n] = val;
+ _size.store(n + 1);
+}
- DataFile* df = _getOpenFile(0);
- invariant(df);
+DataFileVersion MmapV1ExtentManager::getFileFormat(OperationContext* txn) const {
+ if (numFiles() == 0)
+ return DataFileVersion(0, 0);
- *txn->recoveryUnit()->writing(&df->getHeader()->version) = newVersion;
- }
+ // We explicitly only look at the first file.
+ return _getOpenFile(0)->getHeader()->version;
+}
+
+void MmapV1ExtentManager::setFileFormat(OperationContext* txn, DataFileVersion newVersion) {
+ invariant(numFiles() > 0);
+
+ DataFile* df = _getOpenFile(0);
+ invariant(df);
+
+ *txn->recoveryUnit()->writing(&df->getHeader()->version) = newVersion;
+}
}
diff --git a/src/mongo/db/storage/mmap_v1/mmap_v1_extent_manager.h b/src/mongo/db/storage/mmap_v1/mmap_v1_extent_manager.h
index 8253d0f87a3..1f7a0963aa1 100644
--- a/src/mongo/db/storage/mmap_v1/mmap_v1_extent_manager.h
+++ b/src/mongo/db/storage/mmap_v1/mmap_v1_extent_manager.h
@@ -45,204 +45,198 @@
namespace mongo {
- class DataFile;
- class DataFileVersion;
- class MmapV1RecordHeader;
- class OperationContext;
+class DataFile;
+class DataFileVersion;
+class MmapV1RecordHeader;
+class OperationContext;
- struct Extent;
+struct Extent;
+/**
+ * ExtentManager basics
+ * - one per database
+ * - responsible for managing <db>.# files
+ * - NOT responsible for .ns file
+ * - gives out extents
+ * - responsible for figuring out how to get a new extent
+ * - can use any method it wants to do so
+ * - this structure is NOT stored on disk
+ * - this class is thread safe, except as indicated below
+ *
+ * Implementation:
+ * - ExtentManager holds a preallocated list of DataFile
+ * - files will not be removed from the EM, so _files access can be lock-free
+ * - extent size and loc are immutable
+ * - Any non-const public operations on an ExtentManager will acquire an MODE_X lock on its
+ * RESOURCE_MMAPv1_EXTENT_MANAGER resource from the lock-manager, which will extend life
+ * to during WriteUnitOfWorks that might need rollback. Private methods will only
+ * be called from public ones.
+ */
+class MmapV1ExtentManager : public ExtentManager {
+ MONGO_DISALLOW_COPYING(MmapV1ExtentManager);
+
+public:
/**
- * ExtentManager basics
- * - one per database
- * - responsible for managing <db>.# files
- * - NOT responsible for .ns file
- * - gives out extents
- * - responsible for figuring out how to get a new extent
- * - can use any method it wants to do so
- * - this structure is NOT stored on disk
- * - this class is thread safe, except as indicated below
- *
- * Implementation:
- * - ExtentManager holds a preallocated list of DataFile
- * - files will not be removed from the EM, so _files access can be lock-free
- * - extent size and loc are immutable
- * - Any non-const public operations on an ExtentManager will acquire an MODE_X lock on its
- * RESOURCE_MMAPv1_EXTENT_MANAGER resource from the lock-manager, which will extend life
- * to during WriteUnitOfWorks that might need rollback. Private methods will only
- * be called from public ones.
+ * @param freeListDetails this is a reference into the .ns file
+ * while a bit odd, this is not a layer violation as extents
+ * are a peer to the .ns file, without any layering
*/
- class MmapV1ExtentManager : public ExtentManager {
- MONGO_DISALLOW_COPYING( MmapV1ExtentManager );
- public:
- /**
- * @param freeListDetails this is a reference into the .ns file
- * while a bit odd, this is not a layer violation as extents
- * are a peer to the .ns file, without any layering
- */
- MmapV1ExtentManager(StringData dbname, StringData path,
- bool directoryPerDB);
+ MmapV1ExtentManager(StringData dbname, StringData path, bool directoryPerDB);
- /**
- * opens all current files, not thread safe
- */
- Status init(OperationContext* txn);
+ /**
+ * opens all current files, not thread safe
+ */
+ Status init(OperationContext* txn);
- int numFiles() const;
- long long fileSize() const;
+ int numFiles() const;
+ long long fileSize() const;
- // must call Extent::reuse on the returned extent
- DiskLoc allocateExtent( OperationContext* txn,
- bool capped,
- int size,
- bool enforceQuota );
+ // must call Extent::reuse on the returned extent
+ DiskLoc allocateExtent(OperationContext* txn, bool capped, int size, bool enforceQuota);
- /**
- * firstExt has to be == lastExt or a chain
- */
- void freeExtents( OperationContext* txn, DiskLoc firstExt, DiskLoc lastExt );
+ /**
+ * firstExt has to be == lastExt or a chain
+ */
+ void freeExtents(OperationContext* txn, DiskLoc firstExt, DiskLoc lastExt);
- /**
- * frees a single extent
- * ignores all fields in the Extent except: magic, myLoc, length
- */
- void freeExtent( OperationContext* txn, DiskLoc extent );
+ /**
+ * frees a single extent
+ * ignores all fields in the Extent except: magic, myLoc, length
+ */
+ void freeExtent(OperationContext* txn, DiskLoc extent);
- // For debug only: not thread safe
- void printFreeList() const;
+ // For debug only: not thread safe
+ void printFreeList() const;
- void freeListStats(OperationContext* txn,
- int* numExtents,
- int64_t* totalFreeSizeBytes) const;
+ void freeListStats(OperationContext* txn, int* numExtents, int64_t* totalFreeSizeBytes) const;
- /**
- * @param loc - has to be for a specific MmapV1RecordHeader
- * Note(erh): this sadly cannot be removed.
- * A MmapV1RecordHeader DiskLoc has an offset from a file, while a RecordStore really wants an offset
- * from an extent. This intrinsically links an original record store to the original extent
- * manager.
- */
- MmapV1RecordHeader* recordForV1( const DiskLoc& loc ) const;
+ /**
+ * @param loc - has to be for a specific MmapV1RecordHeader
+ * Note(erh): this sadly cannot be removed.
+ * A MmapV1RecordHeader DiskLoc has an offset from a file, while a RecordStore really wants an offset
+ * from an extent. This intrinsically links an original record store to the original extent
+ * manager.
+ */
+ MmapV1RecordHeader* recordForV1(const DiskLoc& loc) const;
- std::unique_ptr<RecordFetcher> recordNeedsFetch( const DiskLoc& loc ) const final;
+ std::unique_ptr<RecordFetcher> recordNeedsFetch(const DiskLoc& loc) const final;
- /**
- * @param loc - has to be for a specific MmapV1RecordHeader (not an Extent)
- * Note(erh) see comment on recordFor
- */
- Extent* extentForV1( const DiskLoc& loc ) const;
+ /**
+ * @param loc - has to be for a specific MmapV1RecordHeader (not an Extent)
+ * Note(erh) see comment on recordFor
+ */
+ Extent* extentForV1(const DiskLoc& loc) const;
- /**
- * @param loc - has to be for a specific MmapV1RecordHeader (not an Extent)
- * Note(erh) see comment on recordFor
- */
- DiskLoc extentLocForV1( const DiskLoc& loc ) const;
+ /**
+ * @param loc - has to be for a specific MmapV1RecordHeader (not an Extent)
+ * Note(erh) see comment on recordFor
+ */
+ DiskLoc extentLocForV1(const DiskLoc& loc) const;
- /**
- * @param loc - has to be for a specific Extent
- */
- Extent* getExtent( const DiskLoc& loc, bool doSanityCheck = true ) const;
+ /**
+ * @param loc - has to be for a specific Extent
+ */
+ Extent* getExtent(const DiskLoc& loc, bool doSanityCheck = true) const;
- /**
- * Not thread safe, requires a database exclusive lock
- */
- DataFileVersion getFileFormat(OperationContext* txn) const;
- void setFileFormat(OperationContext* txn, DataFileVersion newVersion);
+ /**
+ * Not thread safe, requires a database exclusive lock
+ */
+ DataFileVersion getFileFormat(OperationContext* txn) const;
+ void setFileFormat(OperationContext* txn, DataFileVersion newVersion);
- const DataFile* getOpenFile( int n ) const { return _getOpenFile( n ); }
+ const DataFile* getOpenFile(int n) const {
+ return _getOpenFile(n);
+ }
- virtual int maxSize() const;
+ virtual int maxSize() const;
- virtual CacheHint* cacheHint( const DiskLoc& extentLoc, const HintType& hint );
+ virtual CacheHint* cacheHint(const DiskLoc& extentLoc, const HintType& hint);
- private:
- /**
- * will return NULL if nothing suitable in free list
- */
- DiskLoc _allocFromFreeList( OperationContext* txn, int approxSize, bool capped );
+private:
+ /**
+ * will return NULL if nothing suitable in free list
+ */
+ DiskLoc _allocFromFreeList(OperationContext* txn, int approxSize, bool capped);
- /* allocate a new Extent, does not check free list
- */
- DiskLoc _createExtent( OperationContext* txn, int approxSize, bool enforceQuota );
+ /* allocate a new Extent, does not check free list
+ */
+ DiskLoc _createExtent(OperationContext* txn, int approxSize, bool enforceQuota);
- DataFile* _addAFile( OperationContext* txn, int sizeNeeded, bool preallocateNextFile );
+ DataFile* _addAFile(OperationContext* txn, int sizeNeeded, bool preallocateNextFile);
- /**
- * Shared record retrieval logic used by the public recordForV1() and likelyInPhysicalMem()
- * above.
- */
- MmapV1RecordHeader* _recordForV1( const DiskLoc& loc ) const;
+ /**
+ * Shared record retrieval logic used by the public recordForV1() and likelyInPhysicalMem()
+ * above.
+ */
+ MmapV1RecordHeader* _recordForV1(const DiskLoc& loc) const;
- DiskLoc _getFreeListStart() const;
- DiskLoc _getFreeListEnd() const;
- void _setFreeListStart( OperationContext* txn, DiskLoc loc );
- void _setFreeListEnd( OperationContext* txn, DiskLoc loc );
+ DiskLoc _getFreeListStart() const;
+ DiskLoc _getFreeListEnd() const;
+ void _setFreeListStart(OperationContext* txn, DiskLoc loc);
+ void _setFreeListEnd(OperationContext* txn, DiskLoc loc);
- const DataFile* _getOpenFile(int fileId) const;
- DataFile* _getOpenFile(int fileId);
+ const DataFile* _getOpenFile(int fileId) const;
+ DataFile* _getOpenFile(int fileId);
- DiskLoc _createExtentInFile( OperationContext* txn,
- int fileNo,
- DataFile* f,
- int size,
- bool enforceQuota );
+ DiskLoc _createExtentInFile(
+ OperationContext* txn, int fileNo, DataFile* f, int size, bool enforceQuota);
- boost::filesystem::path _fileName(int n) const;
+ boost::filesystem::path _fileName(int n) const;
-// -----
+ // -----
- const std::string _dbname; // i.e. "test"
- const std::string _path; // i.e. "/data/db"
- const bool _directoryPerDB;
- const ResourceId _rid;
+ const std::string _dbname; // i.e. "test"
+ const std::string _path; // i.e. "/data/db"
+ const bool _directoryPerDB;
+ const ResourceId _rid;
- // This reference points into the MMAPv1 engine and is only valid as long as the
- // engine is valid. Not owned here.
- RecordAccessTracker* _recordAccessTracker;
+ // This reference points into the MMAPv1 engine and is only valid as long as the
+ // engine is valid. Not owned here.
+ RecordAccessTracker* _recordAccessTracker;
+
+ /**
+ * Simple wrapper around an array object to allow append-only modification of the array,
+ * as well as concurrent read-accesses. This class has a minimal interface to keep
+ * implementation simple and easy to modify.
+ */
+ class FilesArray {
+ public:
+ FilesArray() : _size(0) {}
+ ~FilesArray();
+
+ /**
+ * Returns file at location 'n' in the array, with 'n' less than number of files added.
+ * Will always return the same pointer for a given file.
+ */
+ DataFile* operator[](int n) const {
+ invariant(n >= 0 && n < size());
+ return _files[n];
+ }
/**
- * Simple wrapper around an array object to allow append-only modification of the array,
- * as well as concurrent read-accesses. This class has a minimal interface to keep
- * implementation simple and easy to modify.
+ * Returns true iff no files were added
*/
- class FilesArray {
- public:
- FilesArray() : _size(0) { }
- ~FilesArray();
-
- /**
- * Returns file at location 'n' in the array, with 'n' less than number of files added.
- * Will always return the same pointer for a given file.
- */
- DataFile* operator[](int n) const {
- invariant(n >= 0 && n < size());
- return _files[n];
- }
-
- /**
- * Returns true iff no files were added
- */
- bool empty() const {
- return size() == 0;
- }
-
- /**
- * Returns number of files added to the array
- */
- int size() const {
- return _size.load();
- }
-
- // Appends val to the array, taking ownership of its pointer
- void push_back(DataFile* val);
-
- private:
- stdx::mutex _writersMutex;
- AtomicInt32 _size; // number of files in the array
- DataFile* _files[DiskLoc::MaxFiles];
- };
-
- FilesArray _files;
+ bool empty() const {
+ return size() == 0;
+ }
+
+ /**
+ * Returns number of files added to the array
+ */
+ int size() const {
+ return _size.load();
+ }
+
+ // Appends val to the array, taking ownership of its pointer
+ void push_back(DataFile* val);
+
+ private:
+ stdx::mutex _writersMutex;
+ AtomicInt32 _size; // number of files in the array
+ DataFile* _files[DiskLoc::MaxFiles];
};
+
+ FilesArray _files;
+};
}
diff --git a/src/mongo/db/storage/mmap_v1/mmap_v1_init.cpp b/src/mongo/db/storage/mmap_v1/mmap_v1_init.cpp
index 29fb1bc8c97..920a6d89182 100644
--- a/src/mongo/db/storage/mmap_v1/mmap_v1_init.cpp
+++ b/src/mongo/db/storage/mmap_v1/mmap_v1_init.cpp
@@ -38,46 +38,44 @@
namespace mongo {
- namespace {
+namespace {
- class MMAPV1Factory : public StorageEngine::Factory {
- public:
- virtual ~MMAPV1Factory() { }
- virtual StorageEngine* create(const StorageGlobalParams& params,
- const StorageEngineLockFile& lockFile) const {
- return new MMAPV1Engine(lockFile);
- }
-
- virtual StringData getCanonicalName() const {
- return "mmapv1";
- }
+class MMAPV1Factory : public StorageEngine::Factory {
+public:
+ virtual ~MMAPV1Factory() {}
+ virtual StorageEngine* create(const StorageGlobalParams& params,
+ const StorageEngineLockFile& lockFile) const {
+ return new MMAPV1Engine(lockFile);
+ }
- virtual Status validateMetadata(const StorageEngineMetadata& metadata,
- const StorageGlobalParams& params) const {
- Status status = metadata.validateStorageEngineOption(
- "directoryPerDB", params.directoryperdb);
- if (!status.isOK()) {
- return status;
- }
+ virtual StringData getCanonicalName() const {
+ return "mmapv1";
+ }
- return Status::OK();
- }
+ virtual Status validateMetadata(const StorageEngineMetadata& metadata,
+ const StorageGlobalParams& params) const {
+ Status status =
+ metadata.validateStorageEngineOption("directoryPerDB", params.directoryperdb);
+ if (!status.isOK()) {
+ return status;
+ }
- virtual BSONObj createMetadataOptions(const StorageGlobalParams& params) const {
- BSONObjBuilder builder;
- builder.appendBool("directoryPerDB", params.directoryperdb);
- return builder.obj();
- }
- };
+ return Status::OK();
+ }
- } // namespace
+ virtual BSONObj createMetadataOptions(const StorageGlobalParams& params) const {
+ BSONObjBuilder builder;
+ builder.appendBool("directoryPerDB", params.directoryperdb);
+ return builder.obj();
+ }
+};
- MONGO_INITIALIZER_WITH_PREREQUISITES(MMAPV1EngineInit,
- ("SetGlobalEnvironment"))
- (InitializerContext* context) {
+} // namespace
- getGlobalServiceContext()->registerStorageEngine("mmapv1", new MMAPV1Factory());
- return Status::OK();
- }
+MONGO_INITIALIZER_WITH_PREREQUISITES(MMAPV1EngineInit, ("SetGlobalEnvironment"))
+(InitializerContext* context) {
+ getGlobalServiceContext()->registerStorageEngine("mmapv1", new MMAPV1Factory());
+ return Status::OK();
+}
} // namespace mongo
diff --git a/src/mongo/db/storage/mmap_v1/mmap_v1_init_test.cpp b/src/mongo/db/storage/mmap_v1/mmap_v1_init_test.cpp
index 62ecdde5aa1..d5323f1b398 100644
--- a/src/mongo/db/storage/mmap_v1/mmap_v1_init_test.cpp
+++ b/src/mongo/db/storage/mmap_v1/mmap_v1_init_test.cpp
@@ -38,93 +38,89 @@
namespace {
- using namespace mongo;
-
- class MMAPV1FactoryTest : public mongo::unittest::Test {
- private:
- virtual void setUp() {
- ServiceContext* globalEnv = getGlobalServiceContext();
- ASSERT_TRUE(globalEnv);
- ASSERT_TRUE(getGlobalServiceContext()->isRegisteredStorageEngine("mmapv1"));
- std::unique_ptr<StorageFactoriesIterator> sfi(getGlobalServiceContext()->
- makeStorageFactoriesIterator());
- ASSERT_TRUE(sfi);
- bool found = false;
- while (sfi->more()) {
- const StorageEngine::Factory* currentFactory = sfi->next();
- if (currentFactory->getCanonicalName() == "mmapv1") {
- found = true;
- factory = currentFactory;
- break;
- }
+using namespace mongo;
+
+class MMAPV1FactoryTest : public mongo::unittest::Test {
+private:
+ virtual void setUp() {
+ ServiceContext* globalEnv = getGlobalServiceContext();
+ ASSERT_TRUE(globalEnv);
+ ASSERT_TRUE(getGlobalServiceContext()->isRegisteredStorageEngine("mmapv1"));
+ std::unique_ptr<StorageFactoriesIterator> sfi(
+ getGlobalServiceContext()->makeStorageFactoriesIterator());
+ ASSERT_TRUE(sfi);
+ bool found = false;
+ while (sfi->more()) {
+ const StorageEngine::Factory* currentFactory = sfi->next();
+ if (currentFactory->getCanonicalName() == "mmapv1") {
+ found = true;
+ factory = currentFactory;
+ break;
}
- ASSERT_TRUE(found);
- }
-
- virtual void tearDown() {
- factory = NULL;
- }
-
- protected:
- const StorageEngine::Factory* factory;
- };
-
- void _testValidateMetadata(const StorageEngine::Factory* factory,
- const BSONObj& metadataOptions,
- bool directoryPerDB,
- ErrorCodes::Error expectedCode) {
- // It is fine to specify an invalid data directory for the metadata
- // as long as we do not invoke read() or write().
- StorageEngineMetadata metadata("no_such_directory");
- metadata.setStorageEngineOptions(metadataOptions);
-
- StorageGlobalParams storageOptions;
- storageOptions.directoryperdb = directoryPerDB;
-
- Status status = factory->validateMetadata(metadata, storageOptions);
- if (expectedCode != status.code()) {
- FAIL(str::stream()
- << "Unexpected StorageEngine::Factory::validateMetadata result. Expected: "
- << ErrorCodes::errorString(expectedCode) << " but got "
- << status.toString()
- << " instead. metadataOptions: " << metadataOptions
- << "; directoryPerDB: " << directoryPerDB);
}
+ ASSERT_TRUE(found);
}
- // Do not validate fields that are not present in metadata.
- TEST_F(MMAPV1FactoryTest, ValidateMetadataEmptyOptions) {
- _testValidateMetadata(factory, BSONObj(), false, ErrorCodes::OK);
- _testValidateMetadata(factory, BSONObj(), true, ErrorCodes::OK);
- }
-
- TEST_F(MMAPV1FactoryTest, ValidateMetadataDirectoryPerDB) {
- _testValidateMetadata(factory, fromjson("{directoryPerDB: 123}"), false,
- ErrorCodes::FailedToParse);
- _testValidateMetadata(factory, fromjson("{directoryPerDB: false}"), false,
- ErrorCodes::OK);
- _testValidateMetadata(factory, fromjson("{directoryPerDB: false}"), true,
- ErrorCodes::InvalidOptions);
- _testValidateMetadata(factory, fromjson("{directoryPerDB: true}"), false,
- ErrorCodes::InvalidOptions);
- _testValidateMetadata(factory, fromjson("{directoryPerDB: true}"), true,
- ErrorCodes::OK);
- }
-
- void _testCreateMetadataOptions(const StorageEngine::Factory* factory,
- bool directoryPerDB) {
- StorageGlobalParams storageOptions;
- storageOptions.directoryperdb = directoryPerDB;
-
- BSONObj metadataOptions = factory->createMetadataOptions(storageOptions);
- BSONElement directoryPerDBElement = metadataOptions.getField("directoryPerDB");
- ASSERT_TRUE(directoryPerDBElement.isBoolean());
- ASSERT_EQUALS(directoryPerDB, directoryPerDBElement.boolean());
+ virtual void tearDown() {
+ factory = NULL;
}
- TEST_F(MMAPV1FactoryTest, CreateMetadataOptions) {
- _testCreateMetadataOptions(factory, false);
- _testCreateMetadataOptions(factory, true);
+protected:
+ const StorageEngine::Factory* factory;
+};
+
+void _testValidateMetadata(const StorageEngine::Factory* factory,
+ const BSONObj& metadataOptions,
+ bool directoryPerDB,
+ ErrorCodes::Error expectedCode) {
+ // It is fine to specify an invalid data directory for the metadata
+ // as long as we do not invoke read() or write().
+ StorageEngineMetadata metadata("no_such_directory");
+ metadata.setStorageEngineOptions(metadataOptions);
+
+ StorageGlobalParams storageOptions;
+ storageOptions.directoryperdb = directoryPerDB;
+
+ Status status = factory->validateMetadata(metadata, storageOptions);
+ if (expectedCode != status.code()) {
+ FAIL(str::stream()
+ << "Unexpected StorageEngine::Factory::validateMetadata result. Expected: "
+ << ErrorCodes::errorString(expectedCode) << " but got " << status.toString()
+ << " instead. metadataOptions: " << metadataOptions
+ << "; directoryPerDB: " << directoryPerDB);
}
+}
+
+// Do not validate fields that are not present in metadata.
+TEST_F(MMAPV1FactoryTest, ValidateMetadataEmptyOptions) {
+ _testValidateMetadata(factory, BSONObj(), false, ErrorCodes::OK);
+ _testValidateMetadata(factory, BSONObj(), true, ErrorCodes::OK);
+}
+
+TEST_F(MMAPV1FactoryTest, ValidateMetadataDirectoryPerDB) {
+ _testValidateMetadata(
+ factory, fromjson("{directoryPerDB: 123}"), false, ErrorCodes::FailedToParse);
+ _testValidateMetadata(factory, fromjson("{directoryPerDB: false}"), false, ErrorCodes::OK);
+ _testValidateMetadata(
+ factory, fromjson("{directoryPerDB: false}"), true, ErrorCodes::InvalidOptions);
+ _testValidateMetadata(
+ factory, fromjson("{directoryPerDB: true}"), false, ErrorCodes::InvalidOptions);
+ _testValidateMetadata(factory, fromjson("{directoryPerDB: true}"), true, ErrorCodes::OK);
+}
+
+void _testCreateMetadataOptions(const StorageEngine::Factory* factory, bool directoryPerDB) {
+ StorageGlobalParams storageOptions;
+ storageOptions.directoryperdb = directoryPerDB;
+
+ BSONObj metadataOptions = factory->createMetadataOptions(storageOptions);
+ BSONElement directoryPerDBElement = metadataOptions.getField("directoryPerDB");
+ ASSERT_TRUE(directoryPerDBElement.isBoolean());
+ ASSERT_EQUALS(directoryPerDB, directoryPerDBElement.boolean());
+}
+
+TEST_F(MMAPV1FactoryTest, CreateMetadataOptions) {
+ _testCreateMetadataOptions(factory, false);
+ _testCreateMetadataOptions(factory, true);
+}
} // namespace
diff --git a/src/mongo/db/storage/mmap_v1/mmap_v1_options.cpp b/src/mongo/db/storage/mmap_v1/mmap_v1_options.cpp
index aa5168ea2c6..554a5eafe37 100644
--- a/src/mongo/db/storage/mmap_v1/mmap_v1_options.cpp
+++ b/src/mongo/db/storage/mmap_v1/mmap_v1_options.cpp
@@ -35,65 +35,65 @@
namespace mongo {
- MMAPV1Options mmapv1GlobalOptions;
+MMAPV1Options mmapv1GlobalOptions;
- /**
- * Specify an integer between 1 and 500 signifying the number of milliseconds (ms)
- * between journal commits.
- */
- class JournalCommitIntervalSetting : public ServerParameter {
- public:
- JournalCommitIntervalSetting() :
- ServerParameter(ServerParameterSet::getGlobal(), "journalCommitInterval",
- false, // allowedToChangeAtStartup
- true // allowedToChangeAtRuntime
- ) {}
+/**
+ * Specify an integer between 1 and 500 signifying the number of milliseconds (ms)
+ * between journal commits.
+ */
+class JournalCommitIntervalSetting : public ServerParameter {
+public:
+ JournalCommitIntervalSetting()
+ : ServerParameter(ServerParameterSet::getGlobal(),
+ "journalCommitInterval",
+ false, // allowedToChangeAtStartup
+ true // allowedToChangeAtRuntime
+ ) {}
- virtual void append(OperationContext* txn, BSONObjBuilder& b, const std::string& name) {
- b << name << mmapv1GlobalOptions.journalCommitInterval;
- }
+ virtual void append(OperationContext* txn, BSONObjBuilder& b, const std::string& name) {
+ b << name << mmapv1GlobalOptions.journalCommitInterval;
+ }
- virtual Status set(const BSONElement& newValueElement) {
- long long newValue;
- if (!newValueElement.isNumber()) {
- StringBuilder sb;
- sb << "Expected number type for journalCommitInterval via setParameter command: "
- << newValueElement;
- return Status(ErrorCodes::BadValue, sb.str());
- }
- if (newValueElement.type() == NumberDouble &&
- (newValueElement.numberDouble() - newValueElement.numberLong()) > 0) {
- StringBuilder sb;
- sb << "journalCommitInterval must be a whole number: "
- << newValueElement;
- return Status(ErrorCodes::BadValue, sb.str());
- }
- newValue = newValueElement.numberLong();
- if (newValue <= 1 || newValue >= 500) {
- StringBuilder sb;
- sb << "journalCommitInterval must be between 1 and 500, but attempted to set to: "
- << newValue;
- return Status(ErrorCodes::BadValue, sb.str());
- }
- mmapv1GlobalOptions.journalCommitInterval = static_cast<unsigned>(newValue);
- return Status::OK();
+ virtual Status set(const BSONElement& newValueElement) {
+ long long newValue;
+ if (!newValueElement.isNumber()) {
+ StringBuilder sb;
+ sb << "Expected number type for journalCommitInterval via setParameter command: "
+ << newValueElement;
+ return Status(ErrorCodes::BadValue, sb.str());
+ }
+ if (newValueElement.type() == NumberDouble &&
+ (newValueElement.numberDouble() - newValueElement.numberLong()) > 0) {
+ StringBuilder sb;
+ sb << "journalCommitInterval must be a whole number: " << newValueElement;
+ return Status(ErrorCodes::BadValue, sb.str());
}
+ newValue = newValueElement.numberLong();
+ if (newValue <= 1 || newValue >= 500) {
+ StringBuilder sb;
+ sb << "journalCommitInterval must be between 1 and 500, but attempted to set to: "
+ << newValue;
+ return Status(ErrorCodes::BadValue, sb.str());
+ }
+ mmapv1GlobalOptions.journalCommitInterval = static_cast<unsigned>(newValue);
+ return Status::OK();
+ }
- virtual Status setFromString(const std::string& str) {
- unsigned newValue;
- Status status = parseNumberFromString(str, &newValue);
- if (!status.isOK()) {
- return status;
- }
- if (newValue <= 1 || newValue >= 500) {
- StringBuilder sb;
- sb << "journalCommitInterval must be between 1 and 500, but attempted to set to: "
- << newValue;
- return Status(ErrorCodes::BadValue, sb.str());
- }
- mmapv1GlobalOptions.journalCommitInterval = newValue;
- return Status::OK();
+ virtual Status setFromString(const std::string& str) {
+ unsigned newValue;
+ Status status = parseNumberFromString(str, &newValue);
+ if (!status.isOK()) {
+ return status;
+ }
+ if (newValue <= 1 || newValue >= 500) {
+ StringBuilder sb;
+ sb << "journalCommitInterval must be between 1 and 500, but attempted to set to: "
+ << newValue;
+ return Status(ErrorCodes::BadValue, sb.str());
}
- } journalCommitIntervalSetting;
+ mmapv1GlobalOptions.journalCommitInterval = newValue;
+ return Status::OK();
+ }
+} journalCommitIntervalSetting;
-} // namespace mongo
+} // namespace mongo
diff --git a/src/mongo/db/storage/mmap_v1/mmap_v1_options.h b/src/mongo/db/storage/mmap_v1/mmap_v1_options.h
index f70dea73af7..d94d46c449e 100644
--- a/src/mongo/db/storage/mmap_v1/mmap_v1_options.h
+++ b/src/mongo/db/storage/mmap_v1/mmap_v1_options.h
@@ -37,60 +37,59 @@
namespace mongo {
- struct MMAPV1Options {
+struct MMAPV1Options {
+ MMAPV1Options()
+ : lenForNewNsFiles(16 * 1024 * 1024),
+ preallocj(true),
+ prealloc(false),
+ journalCommitInterval(0), // 0 means use default
+ quota(false),
+ quotaFiles(8) {}
- MMAPV1Options() :
- lenForNewNsFiles(16 * 1024 * 1024),
- preallocj(true),
- prealloc(false),
- journalCommitInterval(0), // 0 means use default
- quota(false),
- quotaFiles(8) {}
+ // --nssize
+ // Specifies the default size for namespace files, which are files that end in .ns.
+ // Each collection and index counts as a namespace.
+ unsigned lenForNewNsFiles;
- // --nssize
- // Specifies the default size for namespace files, which are files that end in .ns.
- // Each collection and index counts as a namespace.
- unsigned lenForNewNsFiles;
+ bool preallocj; // --nopreallocj no preallocation of journal files
+ bool prealloc; // --noprealloc no preallocation of data files
+ bool smallfiles; // --smallfiles allocate smaller data files
- bool preallocj; // --nopreallocj no preallocation of journal files
- bool prealloc; // --noprealloc no preallocation of data files
- bool smallfiles; // --smallfiles allocate smaller data files
+ // --journalCommitInterval
+ // The maximum amount of time the mongod process allows between journal operations.
+ // Values can range from 2 to 300 milliseconds. Lower values increase the durability
+ // of the journal, at the expense of disk performance.
+ unsigned journalCommitInterval; // group/batch commit interval ms
- // --journalCommitInterval
- // The maximum amount of time the mongod process allows between journal operations.
- // Values can range from 2 to 300 milliseconds. Lower values increase the durability
- // of the journal, at the expense of disk performance.
- unsigned journalCommitInterval; // group/batch commit interval ms
-
- // --journalOptions 7 dump journal and terminate without doing anything further
- // --journalOptions 4 recover and terminate without listening
- enum { // bits to be ORed
- JournalDumpJournal = 1, // dump diagnostics on the journal during recovery
- JournalScanOnly = 2, // don't do any real work, just scan and dump if dump
- // specified
- JournalRecoverOnly = 4, // terminate after recovery step
- JournalParanoid = 8, // paranoid mode enables extra checks
- JournalAlwaysCommit = 16, // do a group commit every time the writelock is released
- JournalAlwaysRemap = 32, // remap the private view after every group commit
- // (may lag to the next write lock acquisition,
- // but will do all files then)
- JournalNoCheckSpace = 64 // don't check that there is enough room for journal files
- // before startup (for diskfull tests)
- };
- int journalOptions; // --journalOptions <n> for debugging
+ // --journalOptions 7 dump journal and terminate without doing anything further
+ // --journalOptions 4 recover and terminate without listening
+ enum { // bits to be ORed
+ JournalDumpJournal = 1, // dump diagnostics on the journal during recovery
+ JournalScanOnly = 2, // don't do any real work, just scan and dump if dump
+ // specified
+ JournalRecoverOnly = 4, // terminate after recovery step
+ JournalParanoid = 8, // paranoid mode enables extra checks
+ JournalAlwaysCommit = 16, // do a group commit every time the writelock is released
+ JournalAlwaysRemap = 32, // remap the private view after every group commit
+ // (may lag to the next write lock acquisition,
+ // but will do all files then)
+ JournalNoCheckSpace = 64 // don't check that there is enough room for journal files
+ // before startup (for diskfull tests)
+ };
+ int journalOptions; // --journalOptions <n> for debugging
- // --quota
- // Enables a maximum limit for the number data files each database can have.
- // When running with the --quota option, MongoDB has a maximum of 8 data files
- // per database. Adjust the quota with --quotaFiles.
- bool quota;
+ // --quota
+ // Enables a maximum limit for the number data files each database can have.
+ // When running with the --quota option, MongoDB has a maximum of 8 data files
+ // per database. Adjust the quota with --quotaFiles.
+ bool quota;
- // --quotaFiles
- // Modifies the limit on the number of data files per database.
- // --quotaFiles option requires that you set --quota.
- int quotaFiles; // --quotaFiles
- };
+ // --quotaFiles
+ // Modifies the limit on the number of data files per database.
+ // --quotaFiles option requires that you set --quota.
+ int quotaFiles; // --quotaFiles
+};
- extern MMAPV1Options mmapv1GlobalOptions;
+extern MMAPV1Options mmapv1GlobalOptions;
-} // namespace mongo
+} // namespace mongo
diff --git a/src/mongo/db/storage/mmap_v1/mmap_v1_record_store_test.cpp b/src/mongo/db/storage/mmap_v1/mmap_v1_record_store_test.cpp
index 6e2e54d56c5..5a00a5a9a7f 100644
--- a/src/mongo/db/storage/mmap_v1/mmap_v1_record_store_test.cpp
+++ b/src/mongo/db/storage/mmap_v1/mmap_v1_record_store_test.cpp
@@ -41,58 +41,41 @@
namespace mongo {
- class MyHarnessHelper : public HarnessHelper {
- public:
- MyHarnessHelper() {
- }
+class MyHarnessHelper : public HarnessHelper {
+public:
+ MyHarnessHelper() {}
- virtual RecordStore* newNonCappedRecordStore() {
- OperationContextNoop txn;
- DummyRecordStoreV1MetaData* md = new DummyRecordStoreV1MetaData( false, 0 );
- md->setUserFlag( &txn, CollectionOptions::Flag_NoPadding );
- SimpleRecordStoreV1* rs = new SimpleRecordStoreV1( &txn,
- "a.b",
- md,
- &_em,
- false );
- return rs;
- }
-
- virtual RecordStore* newCappedRecordStore( int64_t cappedMaxSize,
- int64_t cappedMaxDocs ) {
- OperationContextNoop txn;
- DummyRecordStoreV1MetaData* md = new DummyRecordStoreV1MetaData( true, 0 );
- CappedRecordStoreV1* rs = new CappedRecordStoreV1( &txn,
- NULL,
- "a.b",
- md,
- &_em,
- false );
-
- LocAndSize records[] = {
- {}
- };
- LocAndSize drecs[] = {
- {DiskLoc(0, 1000), 1000},
- {}
- };
- md->setCapExtent(&txn, DiskLoc(0, 0));
- md->setCapFirstNewRecord(&txn, DiskLoc().setInvalid());
- initializeV1RS(&txn, records, drecs, NULL, &_em, md);
+ virtual RecordStore* newNonCappedRecordStore() {
+ OperationContextNoop txn;
+ DummyRecordStoreV1MetaData* md = new DummyRecordStoreV1MetaData(false, 0);
+ md->setUserFlag(&txn, CollectionOptions::Flag_NoPadding);
+ SimpleRecordStoreV1* rs = new SimpleRecordStoreV1(&txn, "a.b", md, &_em, false);
+ return rs;
+ }
- return rs;
- }
+ virtual RecordStore* newCappedRecordStore(int64_t cappedMaxSize, int64_t cappedMaxDocs) {
+ OperationContextNoop txn;
+ DummyRecordStoreV1MetaData* md = new DummyRecordStoreV1MetaData(true, 0);
+ CappedRecordStoreV1* rs = new CappedRecordStoreV1(&txn, NULL, "a.b", md, &_em, false);
- virtual RecoveryUnit* newRecoveryUnit() {
- return new RecoveryUnitNoop();
- }
+ LocAndSize records[] = {{}};
+ LocAndSize drecs[] = {{DiskLoc(0, 1000), 1000}, {}};
+ md->setCapExtent(&txn, DiskLoc(0, 0));
+ md->setCapFirstNewRecord(&txn, DiskLoc().setInvalid());
+ initializeV1RS(&txn, records, drecs, NULL, &_em, md);
- private:
- DummyExtentManager _em;
- };
+ return rs;
+ }
- HarnessHelper* newHarnessHelper() {
- return new MyHarnessHelper();
+ virtual RecoveryUnit* newRecoveryUnit() {
+ return new RecoveryUnitNoop();
}
+private:
+ DummyExtentManager _em;
+};
+
+HarnessHelper* newHarnessHelper() {
+ return new MyHarnessHelper();
+}
}
diff --git a/src/mongo/db/storage/mmap_v1/mmap_windows.cpp b/src/mongo/db/storage/mmap_v1/mmap_windows.cpp
index 2969028575a..88abedd9c77 100644
--- a/src/mongo/db/storage/mmap_v1/mmap_windows.cpp
+++ b/src/mongo/db/storage/mmap_v1/mmap_windows.cpp
@@ -43,498 +43,472 @@
namespace mongo {
- using std::endl;
- using std::string;
- using std::vector;
+using std::endl;
+using std::string;
+using std::vector;
- namespace {
- mongo::AtomicUInt64 mmfNextId(0);
- }
+namespace {
+mongo::AtomicUInt64 mmfNextId(0);
+}
- static size_t fetchMinOSPageSizeBytes() {
- SYSTEM_INFO si;
- GetSystemInfo(&si);
- size_t minOSPageSizeBytes = si.dwPageSize;
- minOSPageSizeBytesTest(minOSPageSizeBytes);
- return minOSPageSizeBytes;
- }
- const size_t g_minOSPageSizeBytes = fetchMinOSPageSizeBytes();
-
- // MapViewMutex
- //
- // Protects:
- // 1. Ensures all MapViewOfFile/UnMapViewOfFile operations are serialized to reduce chance of
- // "address in use" errors (error code 487)
- // - These errors can still occur if the memory is used for other purposes
- // (stack storage, heap)
- // 2. Prevents calls to VirtualProtect while we remapping files.
- // Lock Ordering:
- // - If taken, must be after previewViews._m to prevent deadlocks
- stdx::mutex mapViewMutex;
-
- MAdvise::MAdvise(void *,unsigned, Advice) { }
- MAdvise::~MAdvise() { }
-
- const unsigned long long memoryMappedFileLocationFloor = 256LL * 1024LL * 1024LL * 1024LL;
- static unsigned long long _nextMemoryMappedFileLocation = memoryMappedFileLocationFloor;
-
- // nextMemoryMappedFileLocationMutex
- //
- // Protects:
- // Windows 64-bit specific allocation of virtual memory regions for
- // placing memory mapped files in memory
- // Lock Ordering:
- // No restrictions
- static SimpleMutex _nextMemoryMappedFileLocationMutex;
-
- unsigned long long AlignNumber(unsigned long long number, unsigned long long granularity)
- {
- return (number + granularity - 1) & ~(granularity - 1);
+static size_t fetchMinOSPageSizeBytes() {
+ SYSTEM_INFO si;
+ GetSystemInfo(&si);
+ size_t minOSPageSizeBytes = si.dwPageSize;
+ minOSPageSizeBytesTest(minOSPageSizeBytes);
+ return minOSPageSizeBytes;
+}
+const size_t g_minOSPageSizeBytes = fetchMinOSPageSizeBytes();
+
+// MapViewMutex
+//
+// Protects:
+// 1. Ensures all MapViewOfFile/UnMapViewOfFile operations are serialized to reduce chance of
+// "address in use" errors (error code 487)
+// - These errors can still occur if the memory is used for other purposes
+// (stack storage, heap)
+// 2. Prevents calls to VirtualProtect while we remapping files.
+// Lock Ordering:
+// - If taken, must be after previewViews._m to prevent deadlocks
+stdx::mutex mapViewMutex;
+
+MAdvise::MAdvise(void*, unsigned, Advice) {}
+MAdvise::~MAdvise() {}
+
+const unsigned long long memoryMappedFileLocationFloor = 256LL * 1024LL * 1024LL * 1024LL;
+static unsigned long long _nextMemoryMappedFileLocation = memoryMappedFileLocationFloor;
+
+// nextMemoryMappedFileLocationMutex
+//
+// Protects:
+// Windows 64-bit specific allocation of virtual memory regions for
+// placing memory mapped files in memory
+// Lock Ordering:
+// No restrictions
+static SimpleMutex _nextMemoryMappedFileLocationMutex;
+
+unsigned long long AlignNumber(unsigned long long number, unsigned long long granularity) {
+ return (number + granularity - 1) & ~(granularity - 1);
+}
+
+static void* getNextMemoryMappedFileLocation(unsigned long long mmfSize) {
+ if (4 == sizeof(void*)) {
+ return 0;
}
+ stdx::lock_guard<SimpleMutex> lk(_nextMemoryMappedFileLocationMutex);
- static void* getNextMemoryMappedFileLocation(unsigned long long mmfSize) {
- if (4 == sizeof(void*)) {
- return 0;
- }
- stdx::lock_guard<SimpleMutex> lk(_nextMemoryMappedFileLocationMutex);
+ static unsigned long long granularity = 0;
- static unsigned long long granularity = 0;
+ if (0 == granularity) {
+ SYSTEM_INFO systemInfo;
+ GetSystemInfo(&systemInfo);
+ granularity = static_cast<unsigned long long>(systemInfo.dwAllocationGranularity);
+ }
- if (0 == granularity) {
- SYSTEM_INFO systemInfo;
- GetSystemInfo(&systemInfo);
- granularity = static_cast<unsigned long long>(systemInfo.dwAllocationGranularity);
- }
+ unsigned long long thisMemoryMappedFileLocation = _nextMemoryMappedFileLocation;
- unsigned long long thisMemoryMappedFileLocation = _nextMemoryMappedFileLocation;
+ int current_retry = 1;
- int current_retry = 1;
+ while (true) {
+ MEMORY_BASIC_INFORMATION memInfo;
- while (true) {
- MEMORY_BASIC_INFORMATION memInfo;
-
- if (VirtualQuery(reinterpret_cast<LPCVOID>(thisMemoryMappedFileLocation),
- &memInfo, sizeof(memInfo)) == 0) {
- DWORD gle = GetLastError();
-
- // If we exceed the limits of Virtual Memory
- // - 8TB before Windows 8.1/2012 R2, 128 TB after
- // restart scanning from our memory mapped floor once more
- // This is a linear scan of regions, not of every VM page
- if (gle == ERROR_INVALID_PARAMETER && current_retry == 1) {
- thisMemoryMappedFileLocation = memoryMappedFileLocationFloor;
- ++current_retry;
- continue;
- }
+ if (VirtualQuery(reinterpret_cast<LPCVOID>(thisMemoryMappedFileLocation),
+ &memInfo,
+ sizeof(memInfo)) == 0) {
+ DWORD gle = GetLastError();
- log() << "VirtualQuery of " << thisMemoryMappedFileLocation
- << " failed with error " << errnoWithDescription(gle);
- fassertFailed(17484);
+ // If we exceed the limits of Virtual Memory
+ // - 8TB before Windows 8.1/2012 R2, 128 TB after
+ // restart scanning from our memory mapped floor once more
+ // This is a linear scan of regions, not of every VM page
+ if (gle == ERROR_INVALID_PARAMETER && current_retry == 1) {
+ thisMemoryMappedFileLocation = memoryMappedFileLocationFloor;
+ ++current_retry;
+ continue;
}
- // Free memory regions that we can use for memory map files
- // 1. Marked MEM_FREE, not MEM_RESERVE
- // 2. Marked as PAGE_NOACCESS, not anything else
- if (memInfo.Protect == PAGE_NOACCESS &&
- memInfo.State == MEM_FREE &&
- memInfo.RegionSize > mmfSize)
- break;
-
- thisMemoryMappedFileLocation = reinterpret_cast<unsigned long long>(memInfo.BaseAddress)
- + memInfo.RegionSize;
+ log() << "VirtualQuery of " << thisMemoryMappedFileLocation << " failed with error "
+ << errnoWithDescription(gle);
+ fassertFailed(17484);
}
- _nextMemoryMappedFileLocation = thisMemoryMappedFileLocation
- + AlignNumber(mmfSize, granularity);
+ // Free memory regions that we can use for memory map files
+ // 1. Marked MEM_FREE, not MEM_RESERVE
+ // 2. Marked as PAGE_NOACCESS, not anything else
+ if (memInfo.Protect == PAGE_NOACCESS && memInfo.State == MEM_FREE &&
+ memInfo.RegionSize > mmfSize)
+ break;
- return reinterpret_cast<void*>(static_cast<uintptr_t>(thisMemoryMappedFileLocation));
+ thisMemoryMappedFileLocation =
+ reinterpret_cast<unsigned long long>(memInfo.BaseAddress) + memInfo.RegionSize;
}
- MemoryMappedFile::MemoryMappedFile()
- : _uniqueId(mmfNextId.fetchAndAdd(1)),
- fd(0),
- maphandle(0),
- len(0) {
+ _nextMemoryMappedFileLocation =
+ thisMemoryMappedFileLocation + AlignNumber(mmfSize, granularity);
- created();
- }
+ return reinterpret_cast<void*>(static_cast<uintptr_t>(thisMemoryMappedFileLocation));
+}
- void MemoryMappedFile::close() {
- LockMongoFilesShared::assertExclusivelyLocked();
+MemoryMappedFile::MemoryMappedFile()
+ : _uniqueId(mmfNextId.fetchAndAdd(1)), fd(0), maphandle(0), len(0) {
+ created();
+}
- // Prevent flush and close from concurrently running
- stdx::lock_guard<stdx::mutex> lk(_flushMutex);
+void MemoryMappedFile::close() {
+ LockMongoFilesShared::assertExclusivelyLocked();
- {
- stdx::lock_guard<stdx::mutex> lk(mapViewMutex);
+ // Prevent flush and close from concurrently running
+ stdx::lock_guard<stdx::mutex> lk(_flushMutex);
- for (vector<void*>::iterator i = views.begin(); i != views.end(); i++) {
- UnmapViewOfFile(*i);
- }
- }
+ {
+ stdx::lock_guard<stdx::mutex> lk(mapViewMutex);
- views.clear();
- if ( maphandle )
- CloseHandle(maphandle);
- maphandle = 0;
- if ( fd )
- CloseHandle(fd);
- fd = 0;
- destroyed(); // cleans up from the master list of mmaps
+ for (vector<void*>::iterator i = views.begin(); i != views.end(); i++) {
+ UnmapViewOfFile(*i);
+ }
}
- unsigned long long mapped = 0;
+ views.clear();
+ if (maphandle)
+ CloseHandle(maphandle);
+ maphandle = 0;
+ if (fd)
+ CloseHandle(fd);
+ fd = 0;
+ destroyed(); // cleans up from the master list of mmaps
+}
- void* MemoryMappedFile::createReadOnlyMap() {
- verify( maphandle );
+unsigned long long mapped = 0;
- stdx::lock_guard<stdx::mutex> lk(mapViewMutex);
+void* MemoryMappedFile::createReadOnlyMap() {
+ verify(maphandle);
- void* readOnlyMapAddress = NULL;
- int current_retry = 0;
+ stdx::lock_guard<stdx::mutex> lk(mapViewMutex);
- while (true) {
+ void* readOnlyMapAddress = NULL;
+ int current_retry = 0;
- LPVOID thisAddress = getNextMemoryMappedFileLocation(len);
+ while (true) {
+ LPVOID thisAddress = getNextMemoryMappedFileLocation(len);
- readOnlyMapAddress = MapViewOfFileEx(
- maphandle, // file mapping handle
- FILE_MAP_READ, // access
- 0, 0, // file offset, high and low
- 0, // bytes to map, 0 == all
- thisAddress); // address to place file
+ readOnlyMapAddress = MapViewOfFileEx(maphandle, // file mapping handle
+ FILE_MAP_READ, // access
+ 0,
+ 0, // file offset, high and low
+ 0, // bytes to map, 0 == all
+ thisAddress); // address to place file
- if (0 == readOnlyMapAddress) {
- DWORD dosError = GetLastError();
-
- ++current_retry;
-
- // If we failed to allocate a memory mapped file, try again in case we picked
- // an address that Windows is also trying to use for some other VM allocations
- if (dosError == ERROR_INVALID_ADDRESS && current_retry < 5) {
- continue;
- }
+ if (0 == readOnlyMapAddress) {
+ DWORD dosError = GetLastError();
- log() << "MapViewOfFileEx for " << filename()
- << " at address " << thisAddress
- << " failed with error " << errnoWithDescription(dosError)
- << " (file size is " << len << ")"
- << " in MemoryMappedFile::createReadOnlyMap"
- << endl;
+ ++current_retry;
- fassertFailed(16165);
+ // If we failed to allocate a memory mapped file, try again in case we picked
+ // an address that Windows is also trying to use for some other VM allocations
+ if (dosError == ERROR_INVALID_ADDRESS && current_retry < 5) {
+ continue;
}
- break;
+ log() << "MapViewOfFileEx for " << filename() << " at address " << thisAddress
+ << " failed with error " << errnoWithDescription(dosError) << " (file size is "
+ << len << ")"
+ << " in MemoryMappedFile::createReadOnlyMap" << endl;
+
+ fassertFailed(16165);
}
- views.push_back( readOnlyMapAddress );
- return readOnlyMapAddress;
+ break;
}
- void* MemoryMappedFile::map(const char *filenameIn, unsigned long long &length, int options) {
- verify( fd == 0 && len == 0 ); // can't open more than once
- setFilename(filenameIn);
- FileAllocator::get()->allocateAsap( filenameIn, length );
- /* big hack here: Babble uses db names with colons. doesn't seem to work on windows. temporary perhaps. */
- char filename[256];
- strncpy(filename, filenameIn, 255);
- filename[255] = 0;
- {
- size_t len = strlen( filename );
- for ( size_t i=len-1; i>=0; i-- ) {
- if ( filename[i] == '/' ||
- filename[i] == '\\' )
- break;
+ views.push_back(readOnlyMapAddress);
+ return readOnlyMapAddress;
+}
- if ( filename[i] == ':' )
- filename[i] = '_';
- }
+void* MemoryMappedFile::map(const char* filenameIn, unsigned long long& length, int options) {
+ verify(fd == 0 && len == 0); // can't open more than once
+ setFilename(filenameIn);
+ FileAllocator::get()->allocateAsap(filenameIn, length);
+ /* big hack here: Babble uses db names with colons. doesn't seem to work on windows. temporary perhaps. */
+ char filename[256];
+ strncpy(filename, filenameIn, 255);
+ filename[255] = 0;
+ {
+ size_t len = strlen(filename);
+ for (size_t i = len - 1; i >= 0; i--) {
+ if (filename[i] == '/' || filename[i] == '\\')
+ break;
+
+ if (filename[i] == ':')
+ filename[i] = '_';
}
+ }
- updateLength( filename, length );
+ updateLength(filename, length);
- {
- DWORD createOptions = FILE_ATTRIBUTE_NORMAL;
- if ( options & SEQUENTIAL )
- createOptions |= FILE_FLAG_SEQUENTIAL_SCAN;
- DWORD rw = GENERIC_READ | GENERIC_WRITE;
- fd = CreateFileW(
- toWideString(filename).c_str(),
- rw, // desired access
- FILE_SHARE_WRITE | FILE_SHARE_READ, // share mode
- NULL, // security
- OPEN_ALWAYS, // create disposition
- createOptions , // flags
- NULL); // hTempl
- if ( fd == INVALID_HANDLE_VALUE ) {
- DWORD dosError = GetLastError();
- log() << "CreateFileW for " << filename
- << " failed with " << errnoWithDescription( dosError )
- << " (file size is " << length << ")"
- << " in MemoryMappedFile::map"
- << endl;
- return 0;
- }
+ {
+ DWORD createOptions = FILE_ATTRIBUTE_NORMAL;
+ if (options & SEQUENTIAL)
+ createOptions |= FILE_FLAG_SEQUENTIAL_SCAN;
+ DWORD rw = GENERIC_READ | GENERIC_WRITE;
+ fd = CreateFileW(toWideString(filename).c_str(),
+ rw, // desired access
+ FILE_SHARE_WRITE | FILE_SHARE_READ, // share mode
+ NULL, // security
+ OPEN_ALWAYS, // create disposition
+ createOptions, // flags
+ NULL); // hTempl
+ if (fd == INVALID_HANDLE_VALUE) {
+ DWORD dosError = GetLastError();
+ log() << "CreateFileW for " << filename << " failed with "
+ << errnoWithDescription(dosError) << " (file size is " << length << ")"
+ << " in MemoryMappedFile::map" << endl;
+ return 0;
}
+ }
- mapped += length;
+ mapped += length;
- {
- DWORD flProtect = PAGE_READWRITE; //(options & READONLY)?PAGE_READONLY:PAGE_READWRITE;
- maphandle = CreateFileMappingW(fd, NULL, flProtect,
- length >> 32 /*maxsizehigh*/,
- (unsigned) length /*maxsizelow*/,
- NULL/*lpName*/);
- if ( maphandle == NULL ) {
- DWORD dosError = GetLastError();
- log() << "CreateFileMappingW for " << filename
- << " failed with " << errnoWithDescription( dosError )
- << " (file size is " << length << ")"
- << " in MemoryMappedFile::map"
- << endl;
- close();
- fassertFailed( 16225 );
- }
+ {
+ DWORD flProtect = PAGE_READWRITE; //(options & READONLY)?PAGE_READONLY:PAGE_READWRITE;
+ maphandle = CreateFileMappingW(fd,
+ NULL,
+ flProtect,
+ length >> 32 /*maxsizehigh*/,
+ (unsigned)length /*maxsizelow*/,
+ NULL /*lpName*/);
+ if (maphandle == NULL) {
+ DWORD dosError = GetLastError();
+ log() << "CreateFileMappingW for " << filename << " failed with "
+ << errnoWithDescription(dosError) << " (file size is " << length << ")"
+ << " in MemoryMappedFile::map" << endl;
+ close();
+ fassertFailed(16225);
}
+ }
- void *view = 0;
- {
- stdx::lock_guard<stdx::mutex> lk(mapViewMutex);
- DWORD access = ( options & READONLY ) ? FILE_MAP_READ : FILE_MAP_ALL_ACCESS;
-
- int current_retry = 0;
- while (true) {
+ void* view = 0;
+ {
+ stdx::lock_guard<stdx::mutex> lk(mapViewMutex);
+ DWORD access = (options & READONLY) ? FILE_MAP_READ : FILE_MAP_ALL_ACCESS;
- LPVOID thisAddress = getNextMemoryMappedFileLocation(length);
+ int current_retry = 0;
+ while (true) {
+ LPVOID thisAddress = getNextMemoryMappedFileLocation(length);
- view = MapViewOfFileEx(
- maphandle, // file mapping handle
- access, // access
- 0, 0, // file offset, high and low
- 0, // bytes to map, 0 == all
- thisAddress); // address to place file
+ view = MapViewOfFileEx(maphandle, // file mapping handle
+ access, // access
+ 0,
+ 0, // file offset, high and low
+ 0, // bytes to map, 0 == all
+ thisAddress); // address to place file
- if (view == 0) {
- DWORD dosError = GetLastError();
+ if (view == 0) {
+ DWORD dosError = GetLastError();
- ++current_retry;
+ ++current_retry;
- // If we failed to allocate a memory mapped file, try again in case we picked
- // an address that Windows is also trying to use for some other VM allocations
- if (dosError == ERROR_INVALID_ADDRESS && current_retry < 5) {
- continue;
- }
+ // If we failed to allocate a memory mapped file, try again in case we picked
+ // an address that Windows is also trying to use for some other VM allocations
+ if (dosError == ERROR_INVALID_ADDRESS && current_retry < 5) {
+ continue;
+ }
#ifndef _WIN64
- // Warn user that if they are running a 32-bit app on 64-bit Windows
- if (dosError == ERROR_NOT_ENOUGH_MEMORY) {
- BOOL wow64Process;
- BOOL retWow64 = IsWow64Process(GetCurrentProcess(), &wow64Process);
- if (retWow64 && wow64Process) {
- log() << "This is a 32-bit MongoDB binary running on a 64-bit"
- " operating system that has run out of virtual memory for"
- " databases. Switch to a 64-bit build of MongoDB to open"
- " the databases.";
- }
+ // Warn user that if they are running a 32-bit app on 64-bit Windows
+ if (dosError == ERROR_NOT_ENOUGH_MEMORY) {
+ BOOL wow64Process;
+ BOOL retWow64 = IsWow64Process(GetCurrentProcess(), &wow64Process);
+ if (retWow64 && wow64Process) {
+ log() << "This is a 32-bit MongoDB binary running on a 64-bit"
+ " operating system that has run out of virtual memory for"
+ " databases. Switch to a 64-bit build of MongoDB to open"
+ " the databases.";
}
+ }
#endif
- log() << "MapViewOfFileEx for " << filename
- << " at address " << thisAddress
- << " failed with " << errnoWithDescription(dosError)
- << " (file size is " << length << ")"
- << " in MemoryMappedFile::map"
- << endl;
-
- close();
- fassertFailed(16166);
- }
+ log() << "MapViewOfFileEx for " << filename << " at address " << thisAddress
+ << " failed with " << errnoWithDescription(dosError) << " (file size is "
+ << length << ")"
+ << " in MemoryMappedFile::map" << endl;
- break;
+ close();
+ fassertFailed(16166);
}
- }
- views.push_back(view);
- len = length;
- return view;
+ break;
+ }
}
- extern stdx::mutex mapViewMutex;
-
- void* MemoryMappedFile::createPrivateMap() {
- verify( maphandle );
-
- stdx::lock_guard<stdx::mutex> lk(mapViewMutex);
+ views.push_back(view);
+ len = length;
+ return view;
+}
- LPVOID thisAddress = getNextMemoryMappedFileLocation( len );
+extern stdx::mutex mapViewMutex;
- void* privateMapAddress = NULL;
- int current_retry = 0;
+void* MemoryMappedFile::createPrivateMap() {
+ verify(maphandle);
- while (true) {
+ stdx::lock_guard<stdx::mutex> lk(mapViewMutex);
- privateMapAddress = MapViewOfFileEx(
- maphandle, // file mapping handle
- FILE_MAP_READ, // access
- 0, 0, // file offset, high and low
- 0, // bytes to map, 0 == all
- thisAddress); // address to place file
+ LPVOID thisAddress = getNextMemoryMappedFileLocation(len);
- if (privateMapAddress == 0) {
- DWORD dosError = GetLastError();
+ void* privateMapAddress = NULL;
+ int current_retry = 0;
- ++current_retry;
+ while (true) {
+ privateMapAddress = MapViewOfFileEx(maphandle, // file mapping handle
+ FILE_MAP_READ, // access
+ 0,
+ 0, // file offset, high and low
+ 0, // bytes to map, 0 == all
+ thisAddress); // address to place file
- // If we failed to allocate a memory mapped file, try again in case we picked
- // an address that Windows is also trying to use for some other VM allocations
- if (dosError == ERROR_INVALID_ADDRESS && current_retry < 5) {
- continue;
- }
+ if (privateMapAddress == 0) {
+ DWORD dosError = GetLastError();
- log() << "MapViewOfFileEx for " << filename()
- << " failed with error " << errnoWithDescription(dosError)
- << " (file size is " << len << ")"
- << " in MemoryMappedFile::createPrivateMap"
- << endl;
+ ++current_retry;
- fassertFailed(16167);
+ // If we failed to allocate a memory mapped file, try again in case we picked
+ // an address that Windows is also trying to use for some other VM allocations
+ if (dosError == ERROR_INVALID_ADDRESS && current_retry < 5) {
+ continue;
}
- break;
+ log() << "MapViewOfFileEx for " << filename() << " failed with error "
+ << errnoWithDescription(dosError) << " (file size is " << len << ")"
+ << " in MemoryMappedFile::createPrivateMap" << endl;
+
+ fassertFailed(16167);
}
- views.push_back( privateMapAddress );
- return privateMapAddress;
+ break;
}
- void* MemoryMappedFile::remapPrivateView(void *oldPrivateAddr) {
- LockMongoFilesExclusive lockMongoFiles;
+ views.push_back(privateMapAddress);
+ return privateMapAddress;
+}
- privateViews.clearWritableBits(oldPrivateAddr, len);
+void* MemoryMappedFile::remapPrivateView(void* oldPrivateAddr) {
+ LockMongoFilesExclusive lockMongoFiles;
- stdx::lock_guard<stdx::mutex> lk(mapViewMutex);
+ privateViews.clearWritableBits(oldPrivateAddr, len);
- if( !UnmapViewOfFile(oldPrivateAddr) ) {
- DWORD dosError = GetLastError();
- log() << "UnMapViewOfFile for " << filename()
- << " failed with error " << errnoWithDescription( dosError )
- << " in MemoryMappedFile::remapPrivateView"
- << endl;
- fassertFailed( 16168 );
- }
+ stdx::lock_guard<stdx::mutex> lk(mapViewMutex);
- void* newPrivateView = MapViewOfFileEx(
- maphandle, // file mapping handle
- FILE_MAP_READ, // access
- 0, 0, // file offset, high and low
- 0, // bytes to map, 0 == all
- oldPrivateAddr ); // we want the same address we had before
- if ( 0 == newPrivateView ) {
- DWORD dosError = GetLastError();
- log() << "MapViewOfFileEx for " << filename()
- << " failed with error " << errnoWithDescription( dosError )
- << " (file size is " << len << ")"
- << " in MemoryMappedFile::remapPrivateView"
- << endl;
- }
- fassert( 16148, newPrivateView == oldPrivateAddr );
- return newPrivateView;
+ if (!UnmapViewOfFile(oldPrivateAddr)) {
+ DWORD dosError = GetLastError();
+ log() << "UnMapViewOfFile for " << filename() << " failed with error "
+ << errnoWithDescription(dosError) << " in MemoryMappedFile::remapPrivateView" << endl;
+ fassertFailed(16168);
}
- class WindowsFlushable : public MemoryMappedFile::Flushable {
- public:
- WindowsFlushable( MemoryMappedFile* theFile,
- void * view,
- HANDLE fd,
- const uint64_t id,
- const std::string& filename,
- stdx::mutex& flushMutex )
- : _theFile(theFile), _view(view), _fd(fd), _id(id), _filename(filename),
- _flushMutex(flushMutex)
- {}
-
- void flush() {
- if (!_view || !_fd)
- return;
+ void* newPrivateView =
+ MapViewOfFileEx(maphandle, // file mapping handle
+ FILE_MAP_READ, // access
+ 0,
+ 0, // file offset, high and low
+ 0, // bytes to map, 0 == all
+ oldPrivateAddr); // we want the same address we had before
+ if (0 == newPrivateView) {
+ DWORD dosError = GetLastError();
+ log() << "MapViewOfFileEx for " << filename() << " failed with error "
+ << errnoWithDescription(dosError) << " (file size is " << len << ")"
+ << " in MemoryMappedFile::remapPrivateView" << endl;
+ }
+ fassert(16148, newPrivateView == oldPrivateAddr);
+ return newPrivateView;
+}
- {
- LockMongoFilesShared mmfilesLock;
+class WindowsFlushable : public MemoryMappedFile::Flushable {
+public:
+ WindowsFlushable(MemoryMappedFile* theFile,
+ void* view,
+ HANDLE fd,
+ const uint64_t id,
+ const std::string& filename,
+ stdx::mutex& flushMutex)
+ : _theFile(theFile),
+ _view(view),
+ _fd(fd),
+ _id(id),
+ _filename(filename),
+ _flushMutex(flushMutex) {}
+
+ void flush() {
+ if (!_view || !_fd)
+ return;
- std::set<MongoFile*> mmfs = MongoFile::getAllFiles();
- std::set<MongoFile*>::const_iterator it = mmfs.find(_theFile);
- if ( it == mmfs.end() || (*it)->getUniqueId() != _id ) {
- // this was deleted while we were unlocked
- return;
- }
+ {
+ LockMongoFilesShared mmfilesLock;
- // Hold the flush mutex to ensure the file is not closed during flush
- _flushMutex.lock();
+ std::set<MongoFile*> mmfs = MongoFile::getAllFiles();
+ std::set<MongoFile*>::const_iterator it = mmfs.find(_theFile);
+ if (it == mmfs.end() || (*it)->getUniqueId() != _id) {
+ // this was deleted while we were unlocked
+ return;
}
- stdx::lock_guard<stdx::mutex> lk(_flushMutex, stdx::adopt_lock);
-
- int loopCount = 0;
- bool success = false;
- bool timeout = false;
- int dosError = ERROR_SUCCESS;
- const int maximumTimeInSeconds = 60 * 15;
- Timer t;
- while ( !success && !timeout ) {
- ++loopCount;
- success = FALSE != FlushViewOfFile( _view, 0 );
- if ( !success ) {
- dosError = GetLastError();
- if ( dosError != ERROR_LOCK_VIOLATION ) {
- break;
- }
- timeout = t.seconds() > maximumTimeInSeconds;
- }
- }
- if ( success && loopCount > 1 ) {
- log() << "FlushViewOfFile for " << _filename
- << " succeeded after " << loopCount
- << " attempts taking " << t.millis()
- << "ms" << endl;
- }
- else if ( !success ) {
- log() << "FlushViewOfFile for " << _filename
- << " failed with error " << dosError
- << " after " << loopCount
- << " attempts taking " << t.millis()
- << "ms" << endl;
- // Abort here to avoid data corruption
- fassert(16387, false);
- }
+ // Hold the flush mutex to ensure the file is not closed during flush
+ _flushMutex.lock();
+ }
- success = FALSE != FlushFileBuffers(_fd);
+ stdx::lock_guard<stdx::mutex> lk(_flushMutex, stdx::adopt_lock);
+
+ int loopCount = 0;
+ bool success = false;
+ bool timeout = false;
+ int dosError = ERROR_SUCCESS;
+ const int maximumTimeInSeconds = 60 * 15;
+ Timer t;
+ while (!success && !timeout) {
+ ++loopCount;
+ success = FALSE != FlushViewOfFile(_view, 0);
if (!success) {
- int err = GetLastError();
- log() << "FlushFileBuffers failed: " << errnoWithDescription( err )
- << " file: " << _filename << endl;
- dataSyncFailedHandler();
+ dosError = GetLastError();
+ if (dosError != ERROR_LOCK_VIOLATION) {
+ break;
+ }
+ timeout = t.seconds() > maximumTimeInSeconds;
}
}
+ if (success && loopCount > 1) {
+ log() << "FlushViewOfFile for " << _filename << " succeeded after " << loopCount
+ << " attempts taking " << t.millis() << "ms" << endl;
+ } else if (!success) {
+ log() << "FlushViewOfFile for " << _filename << " failed with error " << dosError
+ << " after " << loopCount << " attempts taking " << t.millis() << "ms" << endl;
+ // Abort here to avoid data corruption
+ fassert(16387, false);
+ }
- MemoryMappedFile* _theFile; // this may be deleted while we are running
- void * _view;
- HANDLE _fd;
- const uint64_t _id;
- string _filename;
- stdx::mutex& _flushMutex;
- };
-
- void MemoryMappedFile::flush(bool sync) {
- uassert(13056, "Async flushing not supported on windows", sync);
- if( !views.empty() ) {
- WindowsFlushable f(this, viewForFlushing(), fd, _uniqueId, filename(), _flushMutex);
- f.flush();
+ success = FALSE != FlushFileBuffers(_fd);
+ if (!success) {
+ int err = GetLastError();
+ log() << "FlushFileBuffers failed: " << errnoWithDescription(err)
+ << " file: " << _filename << endl;
+ dataSyncFailedHandler();
}
}
- MemoryMappedFile::Flushable * MemoryMappedFile::prepareFlush() {
- return new WindowsFlushable(this, viewForFlushing(), fd, _uniqueId,
- filename(), _flushMutex);
+ MemoryMappedFile* _theFile; // this may be deleted while we are running
+ void* _view;
+ HANDLE _fd;
+ const uint64_t _id;
+ string _filename;
+ stdx::mutex& _flushMutex;
+};
+
+void MemoryMappedFile::flush(bool sync) {
+ uassert(13056, "Async flushing not supported on windows", sync);
+ if (!views.empty()) {
+ WindowsFlushable f(this, viewForFlushing(), fd, _uniqueId, filename(), _flushMutex);
+ f.flush();
}
+}
+MemoryMappedFile::Flushable* MemoryMappedFile::prepareFlush() {
+ return new WindowsFlushable(this, viewForFlushing(), fd, _uniqueId, filename(), _flushMutex);
+}
}
diff --git a/src/mongo/db/storage/mmap_v1/record.h b/src/mongo/db/storage/mmap_v1/record.h
index 38c0cfd7085..0f3f9ebcdd4 100644
--- a/src/mongo/db/storage/mmap_v1/record.h
+++ b/src/mongo/db/storage/mmap_v1/record.h
@@ -37,98 +37,141 @@
namespace mongo {
- class DeletedRecord;
-
- /* MmapV1RecordHeader is a record in a datafile. DeletedRecord is similar but for deleted space.
-
- *11:03:20 AM) dm10gen: regarding extentOfs...
- (11:03:42 AM) dm10gen: an extent is a continugous disk area, which contains many Records and DeleteRecords
- (11:03:56 AM) dm10gen: a DiskLoc has two pieces, the fileno and ofs. (64 bit total)
- (11:04:16 AM) dm10gen: to keep the headesr small, instead of storing a 64 bit ptr to the full extent address, we keep just the offset
- (11:04:29 AM) dm10gen: we can do this as we know the record's address, and it has the same fileNo
- (11:04:33 AM) dm10gen: see class DiskLoc for more info
- (11:04:43 AM) dm10gen: so that is how MmapV1RecordHeader::myExtent() works
- (11:04:53 AM) dm10gen: on an alloc(), when we build a new MmapV1RecordHeader, we must populate its extentOfs then
- */
+class DeletedRecord;
+
+/* MmapV1RecordHeader is a record in a datafile. DeletedRecord is similar but for deleted space.
+
+*11:03:20 AM) dm10gen: regarding extentOfs...
+(11:03:42 AM) dm10gen: an extent is a continugous disk area, which contains many Records and DeleteRecords
+(11:03:56 AM) dm10gen: a DiskLoc has two pieces, the fileno and ofs. (64 bit total)
+(11:04:16 AM) dm10gen: to keep the headesr small, instead of storing a 64 bit ptr to the full extent address, we keep just the offset
+(11:04:29 AM) dm10gen: we can do this as we know the record's address, and it has the same fileNo
+(11:04:33 AM) dm10gen: see class DiskLoc for more info
+(11:04:43 AM) dm10gen: so that is how MmapV1RecordHeader::myExtent() works
+(11:04:53 AM) dm10gen: on an alloc(), when we build a new MmapV1RecordHeader, we must populate its extentOfs then
+*/
#pragma pack(1)
- class MmapV1RecordHeader {
- public:
- enum HeaderSizeValue { HeaderSize = 16 };
-
- int lengthWithHeaders() const { return _lengthWithHeaders; }
- int& lengthWithHeaders() { return _lengthWithHeaders; }
-
- int extentOfs() const { return _extentOfs; }
- int& extentOfs() { return _extentOfs; }
-
- int nextOfs() const { return _nextOfs; }
- int& nextOfs() { return _nextOfs; }
-
- int prevOfs() const { return _prevOfs; }
- int& prevOfs() { return _prevOfs; }
-
- const char* data() const { return _data; }
- char* data() { return _data; }
-
- // XXX remove
- const char* dataNoThrowing() const { return _data; }
- char* dataNoThrowing() { return _data; }
-
- int netLength() const { return _netLength(); }
-
- /* use this when a record is deleted. basically a union with next/prev fields */
- DeletedRecord& asDeleted() { return *((DeletedRecord*) this); }
-
- DiskLoc myExtentLoc(const DiskLoc& myLoc) const { return DiskLoc(myLoc.a(), extentOfs() ); }
-
- struct NP {
- int nextOfs;
- int prevOfs;
- };
-
- NP* np() { return (NP*) &_nextOfs; }
-
- RecordData toRecordData() const { return RecordData(_data, _netLength()); }
-
- private:
-
- int _netLength() const { return _lengthWithHeaders - HeaderSize; }
-
- int _lengthWithHeaders;
- int _extentOfs;
- int _nextOfs;
- int _prevOfs;
-
- /** be careful when referencing this that your write intent was correct */
- char _data[4];
-
- public:
- static bool MemoryTrackingEnabled;
-
+class MmapV1RecordHeader {
+public:
+ enum HeaderSizeValue { HeaderSize = 16 };
+
+ int lengthWithHeaders() const {
+ return _lengthWithHeaders;
+ }
+ int& lengthWithHeaders() {
+ return _lengthWithHeaders;
+ }
+
+ int extentOfs() const {
+ return _extentOfs;
+ }
+ int& extentOfs() {
+ return _extentOfs;
+ }
+
+ int nextOfs() const {
+ return _nextOfs;
+ }
+ int& nextOfs() {
+ return _nextOfs;
+ }
+
+ int prevOfs() const {
+ return _prevOfs;
+ }
+ int& prevOfs() {
+ return _prevOfs;
+ }
+
+ const char* data() const {
+ return _data;
+ }
+ char* data() {
+ return _data;
+ }
+
+ // XXX remove
+ const char* dataNoThrowing() const {
+ return _data;
+ }
+ char* dataNoThrowing() {
+ return _data;
+ }
+
+ int netLength() const {
+ return _netLength();
+ }
+
+ /* use this when a record is deleted. basically a union with next/prev fields */
+ DeletedRecord& asDeleted() {
+ return *((DeletedRecord*)this);
+ }
+
+ DiskLoc myExtentLoc(const DiskLoc& myLoc) const {
+ return DiskLoc(myLoc.a(), extentOfs());
+ }
+
+ struct NP {
+ int nextOfs;
+ int prevOfs;
};
-#pragma pack()
-
- // TODO: this probably moves to record_store.h
- class DeletedRecord {
- public:
- int lengthWithHeaders() const { return _lengthWithHeaders; }
- int& lengthWithHeaders() { return _lengthWithHeaders; }
+ NP* np() {
+ return (NP*)&_nextOfs;
+ }
- int extentOfs() const { return _extentOfs; }
- int& extentOfs() { return _extentOfs; }
+ RecordData toRecordData() const {
+ return RecordData(_data, _netLength());
+ }
- // TODO: we need to not const_cast here but problem is DiskLoc::writing
- DiskLoc& nextDeleted() const { return const_cast<DiskLoc&>(_nextDeleted); }
+private:
+ int _netLength() const {
+ return _lengthWithHeaders - HeaderSize;
+ }
- private:
- int _lengthWithHeaders;
+ int _lengthWithHeaders;
+ int _extentOfs;
+ int _nextOfs;
+ int _prevOfs;
- int _extentOfs;
+ /** be careful when referencing this that your write intent was correct */
+ char _data[4];
- DiskLoc _nextDeleted;
- };
+public:
+ static bool MemoryTrackingEnabled;
+};
+#pragma pack()
- BOOST_STATIC_ASSERT( 16 == sizeof(DeletedRecord) );
+// TODO: this probably moves to record_store.h
+class DeletedRecord {
+public:
+ int lengthWithHeaders() const {
+ return _lengthWithHeaders;
+ }
+ int& lengthWithHeaders() {
+ return _lengthWithHeaders;
+ }
+
+ int extentOfs() const {
+ return _extentOfs;
+ }
+ int& extentOfs() {
+ return _extentOfs;
+ }
+
+ // TODO: we need to not const_cast here but problem is DiskLoc::writing
+ DiskLoc& nextDeleted() const {
+ return const_cast<DiskLoc&>(_nextDeleted);
+ }
+
+private:
+ int _lengthWithHeaders;
+
+ int _extentOfs;
+
+ DiskLoc _nextDeleted;
+};
+
+BOOST_STATIC_ASSERT(16 == sizeof(DeletedRecord));
} // namespace mongo
diff --git a/src/mongo/db/storage/mmap_v1/record_access_tracker.cpp b/src/mongo/db/storage/mmap_v1/record_access_tracker.cpp
index ab77ad69b08..ee13b62d456 100644
--- a/src/mongo/db/storage/mmap_v1/record_access_tracker.cpp
+++ b/src/mongo/db/storage/mmap_v1/record_access_tracker.cpp
@@ -42,312 +42,305 @@
namespace mongo {
- namespace {
+namespace {
- static bool blockSupported = false;
+static bool blockSupported = false;
- MONGO_INITIALIZER_WITH_PREREQUISITES(RecordBlockSupported,
- ("SystemInfo"))(InitializerContext* cx) {
- blockSupported = ProcessInfo::blockCheckSupported();
- return Status::OK();
- }
+MONGO_INITIALIZER_WITH_PREREQUISITES(RecordBlockSupported, ("SystemInfo"))(InitializerContext* cx) {
+ blockSupported = ProcessInfo::blockCheckSupported();
+ return Status::OK();
+}
- int hash(size_t region) {
- return
- abs( ( ( 7 + (int)(region & 0xFFFF) )
- * ( 11 + (int)( ( region >> 16 ) & 0xFFFF ) )
+int hash(size_t region) {
+ return abs(((7 + (int)(region & 0xFFFF)) * (11 + (int)((region >> 16) & 0xFFFF))
#if defined(_WIN64) || defined(__amd64__)
- * ( 13 + (int)( ( region >> 32 ) & 0xFFFF ) )
- * ( 17 + (int)( ( region >> 48 ) & 0xFFFF ) )
+ *
+ (13 + (int)((region >> 32) & 0xFFFF)) * (17 + (int)((region >> 48) & 0xFFFF))
#endif
- ) % RecordAccessTracker::SliceSize );
- }
+ ) %
+ RecordAccessTracker::SliceSize);
+}
- int bigHash(size_t region) {
- return hash(region) % RecordAccessTracker::BigHashSize;
- }
+int bigHash(size_t region) {
+ return hash(region) % RecordAccessTracker::BigHashSize;
+}
- namespace PointerTable {
-
- /* A "superpage" is a group of 16 contiguous pages that differ
- * only in the low-order 16 bits. This means that there is
- * enough room in the low-order bits to store a bitmap for each
- * page in the superpage.
- */
- static const size_t superpageMask = ~0xffffLL;
- static const size_t superpageShift = 16;
- static const size_t pageSelectorMask = 0xf000LL; // selects a page in a superpage
- static const int pageSelectorShift = 12;
-
- // Tunables
- static const int capacity = 128; // in superpages
- static const int bucketSize = 4; // half cache line
- static const int buckets = capacity/bucketSize;
-
- struct Data {
- /** organized similar to a CPU cache
- * bucketSize-way set associative
- * least-recently-inserted replacement policy
- */
- size_t _table[buckets][bucketSize];
- long long _lastReset; // time in millis
- };
-
- void reset(Data* data) {
- memset(data->_table, 0, sizeof(data->_table));
- data->_lastReset = Listener::getElapsedTimeMillis();
- }
-
- inline void resetIfNeeded( Data* data ) {
- const long long now = Listener::getElapsedTimeMillis();
- if (MONGO_unlikely((now - data->_lastReset) >
- RecordAccessTracker::RotateTimeSecs*1000)) {
- reset(data);
- }
- }
-
- inline size_t pageBitOf(size_t ptr) {
- return 1LL << ((ptr & pageSelectorMask) >> pageSelectorShift);
- }
-
- inline size_t superpageOf(size_t ptr) {
- return ptr & superpageMask;
- }
-
- inline size_t bucketFor(size_t ptr) {
- return (ptr >> superpageShift) % buckets;
- }
-
- inline bool haveSeenPage(size_t superpage, size_t ptr) {
- return superpage & pageBitOf(ptr);
- }
-
- inline void markPageSeen(size_t& superpage, size_t ptr) {
- superpage |= pageBitOf(ptr);
- }
-
- /** call this to check a page has been seen yet. */
- inline bool seen(Data* data, size_t ptr) {
- resetIfNeeded(data);
-
- // A bucket contains 4 superpages each containing 16 contiguous pages
- // See above for a more detailed explanation of superpages
- size_t* bucket = data->_table[bucketFor(ptr)];
-
- for (int i = 0; i < bucketSize; i++) {
- if (superpageOf(ptr) == superpageOf(bucket[i])) {
- if (haveSeenPage(bucket[i], ptr))
- return true;
-
- markPageSeen(bucket[i], ptr);
- return false;
- }
- }
-
- // superpage isn't in thread-local cache
- // slide bucket forward and add new superpage at front
- for (int i = bucketSize-1; i > 0; i--)
- bucket[i] = bucket[i-1];
-
- bucket[0] = superpageOf(ptr);
- markPageSeen(bucket[0], ptr);
-
- return false;
- }
-
- Data* getData();
-
- }; // namespace PointerTable
-
- } // namespace
-
- //
- // Slice
- //
-
- RecordAccessTracker::Slice::Slice() {
- reset();
- }
+namespace PointerTable {
- void RecordAccessTracker::Slice::reset() {
- memset(_data, 0, sizeof(_data));
- _lastReset = time(0);
+/* A "superpage" is a group of 16 contiguous pages that differ
+ * only in the low-order 16 bits. This means that there is
+ * enough room in the low-order bits to store a bitmap for each
+ * page in the superpage.
+ */
+static const size_t superpageMask = ~0xffffLL;
+static const size_t superpageShift = 16;
+static const size_t pageSelectorMask = 0xf000LL; // selects a page in a superpage
+static const int pageSelectorShift = 12;
+
+// Tunables
+static const int capacity = 128; // in superpages
+static const int bucketSize = 4; // half cache line
+static const int buckets = capacity / bucketSize;
+
+struct Data {
+ /** organized similar to a CPU cache
+ * bucketSize-way set associative
+ * least-recently-inserted replacement policy
+ */
+ size_t _table[buckets][bucketSize];
+ long long _lastReset; // time in millis
+};
+
+void reset(Data* data) {
+ memset(data->_table, 0, sizeof(data->_table));
+ data->_lastReset = Listener::getElapsedTimeMillis();
+}
+
+inline void resetIfNeeded(Data* data) {
+ const long long now = Listener::getElapsedTimeMillis();
+ if (MONGO_unlikely((now - data->_lastReset) > RecordAccessTracker::RotateTimeSecs * 1000)) {
+ reset(data);
}
+}
- RecordAccessTracker::State RecordAccessTracker::Slice::get(int regionHash,
- size_t region,
- short offset) {
- DEV verify(hash(region) == regionHash);
+inline size_t pageBitOf(size_t ptr) {
+ return 1LL << ((ptr & pageSelectorMask) >> pageSelectorShift);
+}
- Entry* e = _get(regionHash, region, false);
- if (!e)
- return Unk;
+inline size_t superpageOf(size_t ptr) {
+ return ptr & superpageMask;
+}
- return (e->value & ( 1ULL << offset ) ) ? In : Out;
- }
+inline size_t bucketFor(size_t ptr) {
+ return (ptr >> superpageShift) % buckets;
+}
- bool RecordAccessTracker::Slice::put(int regionHash, size_t region, short offset) {
- DEV verify(hash(region) == regionHash);
+inline bool haveSeenPage(size_t superpage, size_t ptr) {
+ return superpage & pageBitOf(ptr);
+}
- Entry* e = _get(regionHash, region, true);
- if (!e)
- return false;
+inline void markPageSeen(size_t& superpage, size_t ptr) {
+ superpage |= pageBitOf(ptr);
+}
- e->value |= 1ULL << offset;
- return true;
- }
+/** call this to check a page has been seen yet. */
+inline bool seen(Data* data, size_t ptr) {
+ resetIfNeeded(data);
+
+ // A bucket contains 4 superpages each containing 16 contiguous pages
+ // See above for a more detailed explanation of superpages
+ size_t* bucket = data->_table[bucketFor(ptr)];
+
+ for (int i = 0; i < bucketSize; i++) {
+ if (superpageOf(ptr) == superpageOf(bucket[i])) {
+ if (haveSeenPage(bucket[i], ptr))
+ return true;
- time_t RecordAccessTracker::Slice::lastReset() const {
- return _lastReset;
+ markPageSeen(bucket[i], ptr);
+ return false;
+ }
}
- RecordAccessTracker::Entry* RecordAccessTracker::Slice::_get(int start,
- size_t region,
- bool add) {
- for (int i = 0; i < MaxChain; i++) {
- int bucket = (start + i) % SliceSize;
+ // superpage isn't in thread-local cache
+ // slide bucket forward and add new superpage at front
+ for (int i = bucketSize - 1; i > 0; i--)
+ bucket[i] = bucket[i - 1];
- if (_data[bucket].region == 0) {
- if (!add)
- return NULL;
+ bucket[0] = superpageOf(ptr);
+ markPageSeen(bucket[0], ptr);
- _data[bucket].region = region;
- return &_data[bucket];
- }
+ return false;
+}
- if (_data[bucket].region == region) {
- return &_data[bucket];
- }
- }
+Data* getData();
- return NULL;
- }
+}; // namespace PointerTable
- //
- // Rolling
- //
+} // namespace
- RecordAccessTracker::Rolling::Rolling() {
- _curSlice = 0;
- _lastRotate = Listener::getElapsedTimeMillis();
- }
+//
+// Slice
+//
- bool RecordAccessTracker::Rolling::access(size_t region, short offset, bool doHalf) {
- int regionHash = hash(region);
+RecordAccessTracker::Slice::Slice() {
+ reset();
+}
- stdx::lock_guard<SimpleMutex> lk(_lock);
+void RecordAccessTracker::Slice::reset() {
+ memset(_data, 0, sizeof(_data));
+ _lastReset = time(0);
+}
- static int rarelyCount = 0;
- if (rarelyCount++ % (2048 / BigHashSize) == 0) {
- long long now = Listener::getElapsedTimeMillis();
+RecordAccessTracker::State RecordAccessTracker::Slice::get(int regionHash,
+ size_t region,
+ short offset) {
+ DEV verify(hash(region) == regionHash);
- if (now - _lastRotate > (1000 * RotateTimeSecs)) {
- _rotate();
- }
- }
+ Entry* e = _get(regionHash, region, false);
+ if (!e)
+ return Unk;
- for (int i = 0; i < NumSlices / (doHalf ? 2 : 1); i++) {
- int pos = (_curSlice + i) % NumSlices;
- State s = _slices[pos].get(regionHash, region, offset);
+ return (e->value & (1ULL << offset)) ? In : Out;
+}
- if (s == In)
- return true;
+bool RecordAccessTracker::Slice::put(int regionHash, size_t region, short offset) {
+ DEV verify(hash(region) == regionHash);
+
+ Entry* e = _get(regionHash, region, true);
+ if (!e)
+ return false;
+
+ e->value |= 1ULL << offset;
+ return true;
+}
+
+time_t RecordAccessTracker::Slice::lastReset() const {
+ return _lastReset;
+}
+
+RecordAccessTracker::Entry* RecordAccessTracker::Slice::_get(int start, size_t region, bool add) {
+ for (int i = 0; i < MaxChain; i++) {
+ int bucket = (start + i) % SliceSize;
- if (s == Out) {
- _slices[pos].put(regionHash, region, offset);
- return false;
- }
+ if (_data[bucket].region == 0) {
+ if (!add)
+ return NULL;
+
+ _data[bucket].region = region;
+ return &_data[bucket];
+ }
+
+ if (_data[bucket].region == region) {
+ return &_data[bucket];
}
+ }
+
+ return NULL;
+}
+
+//
+// Rolling
+//
+
+RecordAccessTracker::Rolling::Rolling() {
+ _curSlice = 0;
+ _lastRotate = Listener::getElapsedTimeMillis();
+}
+
+bool RecordAccessTracker::Rolling::access(size_t region, short offset, bool doHalf) {
+ int regionHash = hash(region);
- // we weren't in any slice
- // so add to cur
- if (!_slices[_curSlice].put(regionHash, region, offset)) {
+ stdx::lock_guard<SimpleMutex> lk(_lock);
+
+ static int rarelyCount = 0;
+ if (rarelyCount++ % (2048 / BigHashSize) == 0) {
+ long long now = Listener::getElapsedTimeMillis();
+
+ if (now - _lastRotate > (1000 * RotateTimeSecs)) {
_rotate();
- _slices[_curSlice].put(regionHash, region, offset);
}
- return false;
}
- void RecordAccessTracker::Rolling::_rotate() {
- _curSlice = (_curSlice + 1) % NumSlices;
- _slices[_curSlice].reset();
- _lastRotate = Listener::getElapsedTimeMillis();
+ for (int i = 0; i < NumSlices / (doHalf ? 2 : 1); i++) {
+ int pos = (_curSlice + i) % NumSlices;
+ State s = _slices[pos].get(regionHash, region, offset);
+
+ if (s == In)
+ return true;
+
+ if (s == Out) {
+ _slices[pos].put(regionHash, region, offset);
+ return false;
+ }
+ }
+
+ // we weren't in any slice
+ // so add to cur
+ if (!_slices[_curSlice].put(regionHash, region, offset)) {
+ _rotate();
+ _slices[_curSlice].put(regionHash, region, offset);
}
+ return false;
+}
- // These need to be outside the ps namespace due to the way they are defined
+void RecordAccessTracker::Rolling::_rotate() {
+ _curSlice = (_curSlice + 1) % NumSlices;
+ _slices[_curSlice].reset();
+ _lastRotate = Listener::getElapsedTimeMillis();
+}
+
+// These need to be outside the ps namespace due to the way they are defined
#if defined(MONGO_CONFIG_HAVE___THREAD)
- __thread PointerTable::Data _pointerTableData;
- PointerTable::Data* PointerTable::getData() {
- return &_pointerTableData;
- }
+__thread PointerTable::Data _pointerTableData;
+PointerTable::Data* PointerTable::getData() {
+ return &_pointerTableData;
+}
#elif defined(MONGO_CONFIG_HAVE___DECLSPEC_THREAD)
- __declspec( thread ) PointerTable::Data _pointerTableData;
- PointerTable::Data* PointerTable::getData() {
- return &_pointerTableData;
- }
+__declspec(thread) PointerTable::Data _pointerTableData;
+PointerTable::Data* PointerTable::getData() {
+ return &_pointerTableData;
+}
#else
- TSP_DEFINE(PointerTable::Data, _pointerTableData);
- PointerTable::Data* PointerTable::getData() {
- return _pointerTableData.getMake();
- }
+TSP_DEFINE(PointerTable::Data, _pointerTableData);
+PointerTable::Data* PointerTable::getData() {
+ return _pointerTableData.getMake();
+}
#endif
- //
- // RecordAccessTracker
- //
+//
+// RecordAccessTracker
+//
- RecordAccessTracker::RecordAccessTracker()
- : _blockSupported(blockSupported) {
- reset();
- }
+RecordAccessTracker::RecordAccessTracker() : _blockSupported(blockSupported) {
+ reset();
+}
- void RecordAccessTracker::reset() {
- PointerTable::reset(PointerTable::getData());
- _rollingTable.reset(new Rolling[BigHashSize]);
- }
+void RecordAccessTracker::reset() {
+ PointerTable::reset(PointerTable::getData());
+ _rollingTable.reset(new Rolling[BigHashSize]);
+}
- void RecordAccessTracker::markAccessed(const void* record) {
- const size_t page = reinterpret_cast<size_t>(record) >> 12;
- const size_t region = page >> 6;
- const size_t offset = page & 0x3f;
+void RecordAccessTracker::markAccessed(const void* record) {
+ const size_t page = reinterpret_cast<size_t>(record) >> 12;
+ const size_t region = page >> 6;
+ const size_t offset = page & 0x3f;
- const bool seen = PointerTable::seen(PointerTable::getData(),
- reinterpret_cast<size_t>(record));
- if (!seen) {
- _rollingTable[bigHash(region)].access(region, offset , true);
- }
+ const bool seen = PointerTable::seen(PointerTable::getData(), reinterpret_cast<size_t>(record));
+ if (!seen) {
+ _rollingTable[bigHash(region)].access(region, offset, true);
}
+}
- bool RecordAccessTracker::checkAccessedAndMark(const void* record) {
- const size_t page = reinterpret_cast<size_t>(record) >> 12;
- const size_t region = page >> 6;
- const size_t offset = page & 0x3f;
-
- // This is like the "L1 cache". If we're a miss then we fall through and check the
- // "L2 cache". If we're still a miss, then we defer to a system-specific system
- // call (or give up and return false if deferring to the system call is not enabled).
- if (PointerTable::seen(PointerTable::getData(), reinterpret_cast<size_t>(record))) {
- return true;
- }
-
- // We were a miss in the PointerTable. See if we can find 'record' in the Rolling table.
- if (_rollingTable[bigHash(region)].access(region, offset, false)) {
- return true;
- }
+bool RecordAccessTracker::checkAccessedAndMark(const void* record) {
+ const size_t page = reinterpret_cast<size_t>(record) >> 12;
+ const size_t region = page >> 6;
+ const size_t offset = page & 0x3f;
- if (!_blockSupported) {
- // This means we don't fall back to a system call. Instead we assume things aren't
- // in memory. This could mean that we yield too much, but this is much better
- // than the alternative of not yielding through a page fault.
- return false;
- }
+ // This is like the "L1 cache". If we're a miss then we fall through and check the
+ // "L2 cache". If we're still a miss, then we defer to a system-specific system
+ // call (or give up and return false if deferring to the system call is not enabled).
+ if (PointerTable::seen(PointerTable::getData(), reinterpret_cast<size_t>(record))) {
+ return true;
+ }
- return ProcessInfo::blockInMemory(const_cast<void*>(record));
+ // We were a miss in the PointerTable. See if we can find 'record' in the Rolling table.
+ if (_rollingTable[bigHash(region)].access(region, offset, false)) {
+ return true;
}
- void RecordAccessTracker::disableSystemBlockInMemCheck() {
- _blockSupported = false;
+ if (!_blockSupported) {
+ // This means we don't fall back to a system call. Instead we assume things aren't
+ // in memory. This could mean that we yield too much, but this is much better
+ // than the alternative of not yielding through a page fault.
+ return false;
}
-} // namespace mongo
+ return ProcessInfo::blockInMemory(const_cast<void*>(record));
+}
+
+void RecordAccessTracker::disableSystemBlockInMemCheck() {
+ _blockSupported = false;
+}
+
+} // namespace mongo
diff --git a/src/mongo/db/storage/mmap_v1/record_access_tracker.h b/src/mongo/db/storage/mmap_v1/record_access_tracker.h
index aa98e22230e..a1cb7ab2187 100644
--- a/src/mongo/db/storage/mmap_v1/record_access_tracker.h
+++ b/src/mongo/db/storage/mmap_v1/record_access_tracker.h
@@ -33,127 +33,126 @@
namespace mongo {
- class MmapV1RecordHeader;
+class MmapV1RecordHeader;
+
+/**
+ * Used to implement likelyInPhysicalMemory() for the MMAP v1 storage engine. Since
+ * MMAP v1 holds exclusive collection-level locks, it should yield the locks during a
+ * page fault. The RecordAccessTracker is used to guess at which records are in memory,
+ * so that a yield can be requested unless we're sure that the record has been
+ * recently accessed.
+ */
+class RecordAccessTracker {
+ MONGO_DISALLOW_COPYING(RecordAccessTracker);
+
+public:
+ RecordAccessTracker();
+
+ enum Constants {
+ SliceSize = 1024,
+ MaxChain = 20, // intentionally very low
+ NumSlices = 10,
+ RotateTimeSecs = 90,
+ BigHashSize = 128
+ };
/**
- * Used to implement likelyInPhysicalMemory() for the MMAP v1 storage engine. Since
- * MMAP v1 holds exclusive collection-level locks, it should yield the locks during a
- * page fault. The RecordAccessTracker is used to guess at which records are in memory,
- * so that a yield can be requested unless we're sure that the record has been
- * recently accessed.
+ * Informs this record access tracker that 'record' has been accessed.
*/
- class RecordAccessTracker {
- MONGO_DISALLOW_COPYING(RecordAccessTracker);
- public:
- RecordAccessTracker();
+ void markAccessed(const void* record);
- enum Constants {
- SliceSize = 1024,
- MaxChain = 20, // intentionally very low
- NumSlices = 10,
- RotateTimeSecs = 90,
- BigHashSize = 128
- };
+ /**
+ * @return whether or not 'record' has been marked as accessed recently. A return value
+ * of true means that 'record' is likely in physical memory.
+ *
+ * Also has the side effect of marking 'record' as accessed.
+ */
+ bool checkAccessedAndMark(const void* record);
- /**
- * Informs this record access tracker that 'record' has been accessed.
- */
- void markAccessed(const void* record);
+ /**
+ * Clears out any history of record accesses.
+ */
+ void reset();
- /**
- * @return whether or not 'record' has been marked as accessed recently. A return value
- * of true means that 'record' is likely in physical memory.
- *
- * Also has the side effect of marking 'record' as accessed.
- */
- bool checkAccessedAndMark(const void* record);
+ //
+ // For testing.
+ //
+
+ /**
+ * The accessedRecently() implementation falls back to making a system call if it
+ * appears that the record is not in physical memory. Use this method to disable
+ * the fallback for testing.
+ */
+ void disableSystemBlockInMemCheck();
+
+private:
+ enum State { In, Out, Unk };
+
+ struct Entry {
+ size_t region;
+ unsigned long long value;
+ };
+
+ /**
+ * simple hash map for region -> status
+ * this constitutes a single region of time
+ * it does chaining, but very short chains
+ */
+ class Slice {
+ public:
+ Slice();
- /**
- * Clears out any history of record accesses.
- */
void reset();
- //
- // For testing.
- //
+ State get(int regionHash, size_t region, short offset);
/**
- * The accessedRecently() implementation falls back to making a system call if it
- * appears that the record is not in physical memory. Use this method to disable
- * the fallback for testing.
+ * @return true if added, false if full
*/
- void disableSystemBlockInMemCheck();
+ bool put(int regionHash, size_t region, short offset);
+
+ time_t lastReset() const;
private:
- enum State {
- In, Out, Unk
- };
+ Entry* _get(int start, size_t region, bool add);
+
+ Entry _data[SliceSize];
+ time_t _lastReset;
+ };
- struct Entry {
- size_t region;
- unsigned long long value;
- };
+ /**
+ * this contains many slices of times
+ * the idea you put mem status in the current time slice
+ * and then after a certain period of time, it rolls off so we check again
+ */
+ class Rolling {
+ public:
+ Rolling();
/**
- * simple hash map for region -> status
- * this constitutes a single region of time
- * it does chaining, but very short chains
+ * After this call, we assume the page is in RAM.
+ *
+ * @param doHalf if this is a known good access, want to put in first half.
+ *
+ * @return whether we know the page is in RAM
*/
- class Slice {
- public:
- Slice();
-
- void reset();
-
- State get(int regionHash, size_t region, short offset);
+ bool access(size_t region, short offset, bool doHalf);
- /**
- * @return true if added, false if full
- */
- bool put(int regionHash, size_t region, short offset);
+ private:
+ void _rotate();
- time_t lastReset() const;
+ int _curSlice;
+ long long _lastRotate;
+ Slice _slices[NumSlices];
- private:
- Entry* _get(int start, size_t region, bool add);
+ SimpleMutex _lock;
+ };
- Entry _data[SliceSize];
- time_t _lastReset;
- };
+ // Should this record tracker fallback to making a system call?
+ bool _blockSupported;
- /**
- * this contains many slices of times
- * the idea you put mem status in the current time slice
- * and then after a certain period of time, it rolls off so we check again
- */
- class Rolling {
- public:
- Rolling();
-
- /**
- * After this call, we assume the page is in RAM.
- *
- * @param doHalf if this is a known good access, want to put in first half.
- *
- * @return whether we know the page is in RAM
- */
- bool access(size_t region, short offset, bool doHalf);
-
- private:
- void _rotate();
-
- int _curSlice;
- long long _lastRotate;
- Slice _slices[NumSlices];
-
- SimpleMutex _lock;
- };
-
- // Should this record tracker fallback to making a system call?
- bool _blockSupported;
-
- // An array of Rolling instances for tracking record accesses.
- std::unique_ptr<Rolling[]> _rollingTable;
- };
+ // An array of Rolling instances for tracking record accesses.
+ std::unique_ptr<Rolling[]> _rollingTable;
+};
-} // namespace
+} // namespace
diff --git a/src/mongo/db/storage/mmap_v1/record_access_tracker_test.cpp b/src/mongo/db/storage/mmap_v1/record_access_tracker_test.cpp
index 92147a24c55..7cc766f2b13 100644
--- a/src/mongo/db/storage/mmap_v1/record_access_tracker_test.cpp
+++ b/src/mongo/db/storage/mmap_v1/record_access_tracker_test.cpp
@@ -35,108 +35,108 @@ using namespace mongo;
namespace {
- const void* pointerOf(int data) {
- return reinterpret_cast<const void*>(data);
- }
-
- TEST(RecordAccessTrackerTest, TouchRecordTwice) {
- RecordAccessTracker tracker;
- tracker.disableSystemBlockInMemCheck();
-
- const void* record = pointerOf(0x10003);
-
- ASSERT_FALSE(tracker.checkAccessedAndMark(record));
- ASSERT_TRUE(tracker.checkAccessedAndMark(record));
- }
-
- TEST(RecordAccessTrackerTest, TouchPageTwice) {
- RecordAccessTracker tracker;
- tracker.disableSystemBlockInMemCheck();
-
- const void* firstRecord = pointerOf(0x10003);
- const void* secondRecord = pointerOf(0x10004);
-
- ASSERT_FALSE(tracker.checkAccessedAndMark(firstRecord));
- ASSERT_TRUE(tracker.checkAccessedAndMark(secondRecord));
- ASSERT_TRUE(tracker.checkAccessedAndMark(firstRecord));
- ASSERT_TRUE(tracker.checkAccessedAndMark(secondRecord));
- }
-
- TEST(RecordAccessTrackerTest, TouchTwoPagesTwice) {
- RecordAccessTracker tracker;
- tracker.disableSystemBlockInMemCheck();
-
- const void* firstRecordFirstPage = pointerOf(0x11000);
- const void* secondRecordFirstPage = pointerOf(0x11100);
-
- const void* firstRecordSecondPage = pointerOf(0x12000);
- const void* secondRecordSecondPage = pointerOf(0x12100);
-
- ASSERT_FALSE(tracker.checkAccessedAndMark(firstRecordFirstPage));
- ASSERT_FALSE(tracker.checkAccessedAndMark(firstRecordSecondPage));
- ASSERT_TRUE(tracker.checkAccessedAndMark(secondRecordFirstPage));
- ASSERT_TRUE(tracker.checkAccessedAndMark(secondRecordSecondPage));
- }
-
- // Tests RecordAccessTracker::reset().
- TEST(RecordAccessTrackerTest, TouchTwoPagesTwiceWithReset) {
- RecordAccessTracker tracker;
- tracker.disableSystemBlockInMemCheck();
-
- const void* firstRecordFirstPage = pointerOf(0x11000);
- const void* secondRecordFirstPage = pointerOf(0x11100);
-
- const void* firstRecordSecondPage = pointerOf(0x12000);
- const void* secondRecordSecondPage = pointerOf(0x12100);
-
- ASSERT_FALSE(tracker.checkAccessedAndMark(firstRecordFirstPage));
- ASSERT_FALSE(tracker.checkAccessedAndMark(firstRecordSecondPage));
- ASSERT_TRUE(tracker.checkAccessedAndMark(secondRecordFirstPage));
- ASSERT_TRUE(tracker.checkAccessedAndMark(secondRecordSecondPage));
-
- // Now reset and make sure things look as though we have a fresh RecordAccessTracker.
- tracker.reset();
- ASSERT_FALSE(tracker.checkAccessedAndMark(firstRecordFirstPage));
- ASSERT_FALSE(tracker.checkAccessedAndMark(firstRecordSecondPage));
- ASSERT_TRUE(tracker.checkAccessedAndMark(secondRecordFirstPage));
- ASSERT_TRUE(tracker.checkAccessedAndMark(secondRecordSecondPage));
+const void* pointerOf(int data) {
+ return reinterpret_cast<const void*>(data);
+}
+
+TEST(RecordAccessTrackerTest, TouchRecordTwice) {
+ RecordAccessTracker tracker;
+ tracker.disableSystemBlockInMemCheck();
+
+ const void* record = pointerOf(0x10003);
+
+ ASSERT_FALSE(tracker.checkAccessedAndMark(record));
+ ASSERT_TRUE(tracker.checkAccessedAndMark(record));
+}
+
+TEST(RecordAccessTrackerTest, TouchPageTwice) {
+ RecordAccessTracker tracker;
+ tracker.disableSystemBlockInMemCheck();
+
+ const void* firstRecord = pointerOf(0x10003);
+ const void* secondRecord = pointerOf(0x10004);
+
+ ASSERT_FALSE(tracker.checkAccessedAndMark(firstRecord));
+ ASSERT_TRUE(tracker.checkAccessedAndMark(secondRecord));
+ ASSERT_TRUE(tracker.checkAccessedAndMark(firstRecord));
+ ASSERT_TRUE(tracker.checkAccessedAndMark(secondRecord));
+}
+
+TEST(RecordAccessTrackerTest, TouchTwoPagesTwice) {
+ RecordAccessTracker tracker;
+ tracker.disableSystemBlockInMemCheck();
+
+ const void* firstRecordFirstPage = pointerOf(0x11000);
+ const void* secondRecordFirstPage = pointerOf(0x11100);
+
+ const void* firstRecordSecondPage = pointerOf(0x12000);
+ const void* secondRecordSecondPage = pointerOf(0x12100);
+
+ ASSERT_FALSE(tracker.checkAccessedAndMark(firstRecordFirstPage));
+ ASSERT_FALSE(tracker.checkAccessedAndMark(firstRecordSecondPage));
+ ASSERT_TRUE(tracker.checkAccessedAndMark(secondRecordFirstPage));
+ ASSERT_TRUE(tracker.checkAccessedAndMark(secondRecordSecondPage));
+}
+
+// Tests RecordAccessTracker::reset().
+TEST(RecordAccessTrackerTest, TouchTwoPagesTwiceWithReset) {
+ RecordAccessTracker tracker;
+ tracker.disableSystemBlockInMemCheck();
+
+ const void* firstRecordFirstPage = pointerOf(0x11000);
+ const void* secondRecordFirstPage = pointerOf(0x11100);
+
+ const void* firstRecordSecondPage = pointerOf(0x12000);
+ const void* secondRecordSecondPage = pointerOf(0x12100);
+
+ ASSERT_FALSE(tracker.checkAccessedAndMark(firstRecordFirstPage));
+ ASSERT_FALSE(tracker.checkAccessedAndMark(firstRecordSecondPage));
+ ASSERT_TRUE(tracker.checkAccessedAndMark(secondRecordFirstPage));
+ ASSERT_TRUE(tracker.checkAccessedAndMark(secondRecordSecondPage));
+
+ // Now reset and make sure things look as though we have a fresh RecordAccessTracker.
+ tracker.reset();
+ ASSERT_FALSE(tracker.checkAccessedAndMark(firstRecordFirstPage));
+ ASSERT_FALSE(tracker.checkAccessedAndMark(firstRecordSecondPage));
+ ASSERT_TRUE(tracker.checkAccessedAndMark(secondRecordFirstPage));
+ ASSERT_TRUE(tracker.checkAccessedAndMark(secondRecordSecondPage));
+}
+
+// Tests RecordAccessTracker::markAccessed().
+TEST(RecordAccessTrackerTest, AccessTest) {
+ RecordAccessTracker tracker;
+ tracker.disableSystemBlockInMemCheck();
+
+ // Mark the first page in superpage 3 as accessed.
+ const void* record = pointerOf(0x30000);
+ tracker.markAccessed(record);
+
+ // Test that all remaining addresses in the page give true when asked whether they are
+ // recently accessed.
+ for (int i = 0x30001; i < 0x31000; i++) {
+ const void* touchedPageRecord = pointerOf(i);
+ ASSERT_TRUE(tracker.checkAccessedAndMark(touchedPageRecord));
}
-
- // Tests RecordAccessTracker::markAccessed().
- TEST(RecordAccessTrackerTest, AccessTest) {
- RecordAccessTracker tracker;
- tracker.disableSystemBlockInMemCheck();
-
- // Mark the first page in superpage 3 as accessed.
- const void* record = pointerOf(0x30000);
- tracker.markAccessed(record);
-
- // Test that all remaining addresses in the page give true when asked whether they are
- // recently accessed.
- for (int i = 0x30001; i < 0x31000; i++) {
- const void* touchedPageRecord = pointerOf(i);
- ASSERT_TRUE(tracker.checkAccessedAndMark(touchedPageRecord));
- }
+}
+
+// Touch pages in 128 separate superpages, and make sure that they all are reported as
+// recently accessed.
+TEST(RecordAccessTrackerTest, Access128Superpages) {
+ RecordAccessTracker tracker;
+ tracker.disableSystemBlockInMemCheck();
+
+ // Touch the pages.
+ for (int i = 0x00000; i < 0x800000; i += 0x10000) {
+ const void* touchedPageRecord = pointerOf(i);
+ tracker.markAccessed(touchedPageRecord);
}
- // Touch pages in 128 separate superpages, and make sure that they all are reported as
- // recently accessed.
- TEST(RecordAccessTrackerTest, Access128Superpages) {
- RecordAccessTracker tracker;
- tracker.disableSystemBlockInMemCheck();
-
- // Touch the pages.
- for (int i = 0x00000; i < 0x800000; i += 0x10000) {
- const void* touchedPageRecord = pointerOf(i);
- tracker.markAccessed(touchedPageRecord);
- }
-
- // Ensure we know that the pages have all been touched.
- for (int i = 0x00000; i < 0x800000; i += 0x10000) {
- // It should be fine if there is an offset of, say, 0xA, into the page.
- const void* touchedPageRecord = pointerOf(i + 0xA);
- ASSERT_TRUE(tracker.checkAccessedAndMark(touchedPageRecord));
- }
+ // Ensure we know that the pages have all been touched.
+ for (int i = 0x00000; i < 0x800000; i += 0x10000) {
+ // It should be fine if there is an offset of, say, 0xA, into the page.
+ const void* touchedPageRecord = pointerOf(i + 0xA);
+ ASSERT_TRUE(tracker.checkAccessedAndMark(touchedPageRecord));
}
+}
} // namespace
diff --git a/src/mongo/db/storage/mmap_v1/record_store_v1_base.cpp b/src/mongo/db/storage/mmap_v1/record_store_v1_base.cpp
index 5862a44a144..cc8cf582ffe 100644
--- a/src/mongo/db/storage/mmap_v1/record_store_v1_base.cpp
+++ b/src/mongo/db/storage/mmap_v1/record_store_v1_base.cpp
@@ -48,935 +48,920 @@
namespace mongo {
- using std::unique_ptr;
- using std::set;
- using std::string;
-
- /* Deleted list buckets are used to quickly locate free space based on size. Each bucket
- contains records up to that size (meaning a record with a size exactly equal to
- bucketSizes[n] would go into bucket n+1).
- */
- const int RecordStoreV1Base::bucketSizes[] = {
- 0x20, 0x40, 0x80, 0x100, // 32, 64, 128, 256
- 0x200, 0x400, 0x800, 0x1000, // 512, 1K, 2K, 4K
- 0x2000, 0x4000, 0x8000, 0x10000, // 8K, 16K, 32K, 64K
- 0x20000, 0x40000, 0x80000, 0x100000, // 128K, 256K, 512K, 1M
- 0x200000, 0x400000, 0x600000, 0x800000, // 2M, 4M, 6M, 8M
- 0xA00000, 0xC00000, 0xE00000, // 10M, 12M, 14M,
- MaxAllowedAllocation, // 16.5M
- MaxAllowedAllocation + 1, // Only MaxAllowedAllocation sized records go here.
- INT_MAX, // "oversized" bucket for unused parts of extents.
- };
-
- // If this fails, it means that bucketSizes doesn't have the correct number of entries.
- BOOST_STATIC_ASSERT(sizeof(RecordStoreV1Base::bucketSizes)
- / sizeof(RecordStoreV1Base::bucketSizes[0])
- == RecordStoreV1Base::Buckets);
-
- SavedCursorRegistry::~SavedCursorRegistry() {
- for (SavedCursorSet::iterator it = _cursors.begin(); it != _cursors.end(); it++) {
- (*it)->_registry = NULL; // prevent SavedCursor destructor from accessing this
- }
+using std::unique_ptr;
+using std::set;
+using std::string;
+
+/* Deleted list buckets are used to quickly locate free space based on size. Each bucket
+ contains records up to that size (meaning a record with a size exactly equal to
+ bucketSizes[n] would go into bucket n+1).
+*/
+const int RecordStoreV1Base::bucketSizes[] = {
+ 0x20,
+ 0x40,
+ 0x80,
+ 0x100, // 32, 64, 128, 256
+ 0x200,
+ 0x400,
+ 0x800,
+ 0x1000, // 512, 1K, 2K, 4K
+ 0x2000,
+ 0x4000,
+ 0x8000,
+ 0x10000, // 8K, 16K, 32K, 64K
+ 0x20000,
+ 0x40000,
+ 0x80000,
+ 0x100000, // 128K, 256K, 512K, 1M
+ 0x200000,
+ 0x400000,
+ 0x600000,
+ 0x800000, // 2M, 4M, 6M, 8M
+ 0xA00000,
+ 0xC00000,
+ 0xE00000, // 10M, 12M, 14M,
+ MaxAllowedAllocation, // 16.5M
+ MaxAllowedAllocation + 1, // Only MaxAllowedAllocation sized records go here.
+ INT_MAX, // "oversized" bucket for unused parts of extents.
+};
+
+// If this fails, it means that bucketSizes doesn't have the correct number of entries.
+BOOST_STATIC_ASSERT(sizeof(RecordStoreV1Base::bucketSizes) /
+ sizeof(RecordStoreV1Base::bucketSizes[0]) ==
+ RecordStoreV1Base::Buckets);
+
+SavedCursorRegistry::~SavedCursorRegistry() {
+ for (SavedCursorSet::iterator it = _cursors.begin(); it != _cursors.end(); it++) {
+ (*it)->_registry = NULL; // prevent SavedCursor destructor from accessing this
}
+}
- void SavedCursorRegistry::registerCursor(SavedCursor* cursor) {
- invariant(!cursor->_registry);
- cursor->_registry = this;
- scoped_spinlock lock(_mutex);
- _cursors.insert(cursor);
- }
+void SavedCursorRegistry::registerCursor(SavedCursor* cursor) {
+ invariant(!cursor->_registry);
+ cursor->_registry = this;
+ scoped_spinlock lock(_mutex);
+ _cursors.insert(cursor);
+}
- bool SavedCursorRegistry::unregisterCursor(SavedCursor* cursor) {
- if (!cursor->_registry) {
- return false;
- }
- invariant(cursor->_registry == this);
- cursor->_registry = NULL;
- scoped_spinlock lock(_mutex);
- invariant(_cursors.erase(cursor));
- return true;
- }
-
- void SavedCursorRegistry::invalidateCursorsForBucket(DiskLoc bucket) {
- // While this is not strictly necessary as an exclusive collection lock will be held,
- // it's cleaner to just make the SavedCursorRegistry thread-safe. Spinlock is OK here.
- scoped_spinlock lock(_mutex);
- for (SavedCursorSet::iterator it = _cursors.begin(); it != _cursors.end();) {
- if ((*it)->bucket == bucket) {
- (*it)->_registry = NULL; // prevent ~SavedCursor from trying to unregister
- _cursors.erase(it++);
- }
- else {
- it++;
- }
- }
+bool SavedCursorRegistry::unregisterCursor(SavedCursor* cursor) {
+ if (!cursor->_registry) {
+ return false;
}
+ invariant(cursor->_registry == this);
+ cursor->_registry = NULL;
+ scoped_spinlock lock(_mutex);
+ invariant(_cursors.erase(cursor));
+ return true;
+}
- RecordStoreV1Base::RecordStoreV1Base( StringData ns,
- RecordStoreV1MetaData* details,
- ExtentManager* em,
- bool isSystemIndexes )
- : RecordStore( ns ),
- _details( details ),
- _extentManager( em ),
- _isSystemIndexes( isSystemIndexes ) {
+void SavedCursorRegistry::invalidateCursorsForBucket(DiskLoc bucket) {
+ // While this is not strictly necessary as an exclusive collection lock will be held,
+ // it's cleaner to just make the SavedCursorRegistry thread-safe. Spinlock is OK here.
+ scoped_spinlock lock(_mutex);
+ for (SavedCursorSet::iterator it = _cursors.begin(); it != _cursors.end();) {
+ if ((*it)->bucket == bucket) {
+ (*it)->_registry = NULL; // prevent ~SavedCursor from trying to unregister
+ _cursors.erase(it++);
+ } else {
+ it++;
+ }
}
+}
- RecordStoreV1Base::~RecordStoreV1Base() {
- }
+RecordStoreV1Base::RecordStoreV1Base(StringData ns,
+ RecordStoreV1MetaData* details,
+ ExtentManager* em,
+ bool isSystemIndexes)
+ : RecordStore(ns), _details(details), _extentManager(em), _isSystemIndexes(isSystemIndexes) {}
+RecordStoreV1Base::~RecordStoreV1Base() {}
- int64_t RecordStoreV1Base::storageSize( OperationContext* txn,
- BSONObjBuilder* extraInfo,
- int level ) const {
- BSONArrayBuilder extentInfo;
- int64_t total = 0;
- int n = 0;
+int64_t RecordStoreV1Base::storageSize(OperationContext* txn,
+ BSONObjBuilder* extraInfo,
+ int level) const {
+ BSONArrayBuilder extentInfo;
- DiskLoc cur = _details->firstExtent(txn);
+ int64_t total = 0;
+ int n = 0;
- while ( !cur.isNull() ) {
- Extent* e = _extentManager->getExtent( cur );
+ DiskLoc cur = _details->firstExtent(txn);
- total += e->length;
- n++;
+ while (!cur.isNull()) {
+ Extent* e = _extentManager->getExtent(cur);
- if ( extraInfo && level > 0 ) {
- extentInfo.append( BSON( "len" << e->length << "loc: " << e->myLoc.toBSONObj() ) );
- }
- cur = e->xnext;
- }
+ total += e->length;
+ n++;
- if ( extraInfo ) {
- extraInfo->append( "numExtents", n );
- if ( level > 0 )
- extraInfo->append( "extents", extentInfo.arr() );
+ if (extraInfo && level > 0) {
+ extentInfo.append(BSON("len" << e->length << "loc: " << e->myLoc.toBSONObj()));
}
-
- return total;
+ cur = e->xnext;
}
- RecordData RecordStoreV1Base::dataFor( OperationContext* txn, const RecordId& loc ) const {
- return recordFor(DiskLoc::fromRecordId(loc))->toRecordData();
+ if (extraInfo) {
+ extraInfo->append("numExtents", n);
+ if (level > 0)
+ extraInfo->append("extents", extentInfo.arr());
}
- bool RecordStoreV1Base::findRecord( OperationContext* txn,
- const RecordId& loc, RecordData* rd ) const {
- // this is a bit odd, as the semantics of using the storage engine imply it _has_ to be.
- // And in fact we can't actually check.
- // So we assume the best.
- MmapV1RecordHeader* rec = recordFor(DiskLoc::fromRecordId(loc));
- if ( !rec ) {
- return false;
- }
- *rd = rec->toRecordData();
- return true;
- }
+ return total;
+}
- MmapV1RecordHeader* RecordStoreV1Base::recordFor( const DiskLoc& loc ) const {
- return _extentManager->recordForV1( loc );
- }
+RecordData RecordStoreV1Base::dataFor(OperationContext* txn, const RecordId& loc) const {
+ return recordFor(DiskLoc::fromRecordId(loc))->toRecordData();
+}
- const DeletedRecord* RecordStoreV1Base::deletedRecordFor( const DiskLoc& loc ) const {
- invariant( loc.a() != -1 );
- return reinterpret_cast<const DeletedRecord*>( recordFor( loc ) );
- }
+bool RecordStoreV1Base::findRecord(OperationContext* txn,
+ const RecordId& loc,
+ RecordData* rd) const {
+ // this is a bit odd, as the semantics of using the storage engine imply it _has_ to be.
+ // And in fact we can't actually check.
+ // So we assume the best.
+ MmapV1RecordHeader* rec = recordFor(DiskLoc::fromRecordId(loc));
+ if (!rec) {
+ return false;
+ }
+ *rd = rec->toRecordData();
+ return true;
+}
- DeletedRecord* RecordStoreV1Base::drec( const DiskLoc& loc ) const {
- invariant( loc.a() != -1 );
- return reinterpret_cast<DeletedRecord*>( recordFor( loc ) );
- }
+MmapV1RecordHeader* RecordStoreV1Base::recordFor(const DiskLoc& loc) const {
+ return _extentManager->recordForV1(loc);
+}
- Extent* RecordStoreV1Base::_getExtent( OperationContext* txn, const DiskLoc& loc ) const {
- return _extentManager->getExtent( loc );
- }
+const DeletedRecord* RecordStoreV1Base::deletedRecordFor(const DiskLoc& loc) const {
+ invariant(loc.a() != -1);
+ return reinterpret_cast<const DeletedRecord*>(recordFor(loc));
+}
- DiskLoc RecordStoreV1Base::_getExtentLocForRecord( OperationContext* txn, const DiskLoc& loc ) const {
- return _extentManager->extentLocForV1( loc );
- }
+DeletedRecord* RecordStoreV1Base::drec(const DiskLoc& loc) const {
+ invariant(loc.a() != -1);
+ return reinterpret_cast<DeletedRecord*>(recordFor(loc));
+}
+Extent* RecordStoreV1Base::_getExtent(OperationContext* txn, const DiskLoc& loc) const {
+ return _extentManager->getExtent(loc);
+}
- DiskLoc RecordStoreV1Base::getNextRecord( OperationContext* txn, const DiskLoc& loc ) const {
- DiskLoc next = getNextRecordInExtent( txn, loc );
- if ( !next.isNull() ) {
- return next;
- }
+DiskLoc RecordStoreV1Base::_getExtentLocForRecord(OperationContext* txn, const DiskLoc& loc) const {
+ return _extentManager->extentLocForV1(loc);
+}
- // now traverse extents
- Extent* e = _getExtent( txn, _getExtentLocForRecord(txn, loc) );
- while ( 1 ) {
- if ( e->xnext.isNull() )
- return DiskLoc(); // end of collection
- e = _getExtent( txn, e->xnext );
- if ( !e->firstRecord.isNull() )
- break;
- // entire extent could be empty, keep looking
- }
- return e->firstRecord;
+DiskLoc RecordStoreV1Base::getNextRecord(OperationContext* txn, const DiskLoc& loc) const {
+ DiskLoc next = getNextRecordInExtent(txn, loc);
+ if (!next.isNull()) {
+ return next;
}
- DiskLoc RecordStoreV1Base::getPrevRecord( OperationContext* txn, const DiskLoc& loc ) const {
- DiskLoc prev = getPrevRecordInExtent( txn, loc );
- if ( !prev.isNull() ) {
- return prev;
- }
+ // now traverse extents
- // now traverse extents
+ Extent* e = _getExtent(txn, _getExtentLocForRecord(txn, loc));
+ while (1) {
+ if (e->xnext.isNull())
+ return DiskLoc(); // end of collection
+ e = _getExtent(txn, e->xnext);
+ if (!e->firstRecord.isNull())
+ break;
+ // entire extent could be empty, keep looking
+ }
+ return e->firstRecord;
+}
- Extent *e = _getExtent(txn, _getExtentLocForRecord(txn, loc));
- while ( 1 ) {
- if ( e->xprev.isNull() )
- return DiskLoc(); // end of collection
- e = _getExtent( txn, e->xprev );
- if ( !e->firstRecord.isNull() )
- break;
- // entire extent could be empty, keep looking
- }
- return e->lastRecord;
-
- }
-
- DiskLoc RecordStoreV1Base::_findFirstSpot( OperationContext* txn,
- const DiskLoc& extDiskLoc,
- Extent* e ) {
- DiskLoc emptyLoc = extDiskLoc;
- emptyLoc.inc( Extent::HeaderSize() );
- int delRecLength = e->length - Extent::HeaderSize();
- if ( delRecLength >= 32*1024 && _ns.find('$') != string::npos && !isCapped() ) {
- // probably an index. so skip forward to keep its records page aligned
- int& ofs = emptyLoc.GETOFS();
- int newOfs = (ofs + 0xfff) & ~0xfff;
- delRecLength -= (newOfs-ofs);
- dassert( delRecLength > 0 );
- ofs = newOfs;
- }
+DiskLoc RecordStoreV1Base::getPrevRecord(OperationContext* txn, const DiskLoc& loc) const {
+ DiskLoc prev = getPrevRecordInExtent(txn, loc);
+ if (!prev.isNull()) {
+ return prev;
+ }
- DeletedRecord* empty = txn->recoveryUnit()->writing(drec(emptyLoc));
- empty->lengthWithHeaders() = delRecLength;
- empty->extentOfs() = e->myLoc.getOfs();
- empty->nextDeleted().Null();
- return emptyLoc;
+ // now traverse extents
+ Extent* e = _getExtent(txn, _getExtentLocForRecord(txn, loc));
+ while (1) {
+ if (e->xprev.isNull())
+ return DiskLoc(); // end of collection
+ e = _getExtent(txn, e->xprev);
+ if (!e->firstRecord.isNull())
+ break;
+ // entire extent could be empty, keep looking
}
+ return e->lastRecord;
+}
- DiskLoc RecordStoreV1Base::getNextRecordInExtent( OperationContext* txn, const DiskLoc& loc ) const {
- int nextOffset = recordFor( loc )->nextOfs();
-
- if ( nextOffset == DiskLoc::NullOfs )
- return DiskLoc();
+DiskLoc RecordStoreV1Base::_findFirstSpot(OperationContext* txn,
+ const DiskLoc& extDiskLoc,
+ Extent* e) {
+ DiskLoc emptyLoc = extDiskLoc;
+ emptyLoc.inc(Extent::HeaderSize());
+ int delRecLength = e->length - Extent::HeaderSize();
+ if (delRecLength >= 32 * 1024 && _ns.find('$') != string::npos && !isCapped()) {
+ // probably an index. so skip forward to keep its records page aligned
+ int& ofs = emptyLoc.GETOFS();
+ int newOfs = (ofs + 0xfff) & ~0xfff;
+ delRecLength -= (newOfs - ofs);
+ dassert(delRecLength > 0);
+ ofs = newOfs;
+ }
+
+ DeletedRecord* empty = txn->recoveryUnit()->writing(drec(emptyLoc));
+ empty->lengthWithHeaders() = delRecLength;
+ empty->extentOfs() = e->myLoc.getOfs();
+ empty->nextDeleted().Null();
+ return emptyLoc;
+}
- fassert( 17441, abs(nextOffset) >= 8 ); // defensive
- DiskLoc result( loc.a(), nextOffset );
- return result;
- }
+DiskLoc RecordStoreV1Base::getNextRecordInExtent(OperationContext* txn, const DiskLoc& loc) const {
+ int nextOffset = recordFor(loc)->nextOfs();
- DiskLoc RecordStoreV1Base::getPrevRecordInExtent( OperationContext* txn, const DiskLoc& loc ) const {
- int prevOffset = recordFor( loc )->prevOfs();
+ if (nextOffset == DiskLoc::NullOfs)
+ return DiskLoc();
- if ( prevOffset == DiskLoc::NullOfs )
- return DiskLoc();
+ fassert(17441, abs(nextOffset) >= 8); // defensive
+ DiskLoc result(loc.a(), nextOffset);
+ return result;
+}
- fassert( 17442, abs(prevOffset) >= 8 ); // defensive
- DiskLoc result( loc.a(), prevOffset );
- return result;
- }
+DiskLoc RecordStoreV1Base::getPrevRecordInExtent(OperationContext* txn, const DiskLoc& loc) const {
+ int prevOffset = recordFor(loc)->prevOfs();
- StatusWith<RecordId> RecordStoreV1Base::insertRecord( OperationContext* txn,
- const DocWriter* doc,
- bool enforceQuota ) {
- int docSize = doc->documentSize();
- if ( docSize < 4 ) {
- return StatusWith<RecordId>(ErrorCodes::InvalidLength, "record has to be >= 4 bytes");
- }
- const int lenWHdr = docSize + MmapV1RecordHeader::HeaderSize;
- if ( lenWHdr > MaxAllowedAllocation ) {
- return StatusWith<RecordId>(ErrorCodes::InvalidLength, "record has to be <= 16.5MB");
- }
- const int lenToAlloc = (doc->addPadding() && shouldPadInserts())
- ? quantizeAllocationSpace(lenWHdr)
- : lenWHdr;
+ if (prevOffset == DiskLoc::NullOfs)
+ return DiskLoc();
- StatusWith<DiskLoc> loc = allocRecord( txn, lenToAlloc, enforceQuota );
- if ( !loc.isOK() )
- return StatusWith<RecordId>(loc.getStatus());
+ fassert(17442, abs(prevOffset) >= 8); // defensive
+ DiskLoc result(loc.a(), prevOffset);
+ return result;
+}
- MmapV1RecordHeader *r = recordFor( loc.getValue() );
- fassert( 17319, r->lengthWithHeaders() >= lenWHdr );
+StatusWith<RecordId> RecordStoreV1Base::insertRecord(OperationContext* txn,
+ const DocWriter* doc,
+ bool enforceQuota) {
+ int docSize = doc->documentSize();
+ if (docSize < 4) {
+ return StatusWith<RecordId>(ErrorCodes::InvalidLength, "record has to be >= 4 bytes");
+ }
+ const int lenWHdr = docSize + MmapV1RecordHeader::HeaderSize;
+ if (lenWHdr > MaxAllowedAllocation) {
+ return StatusWith<RecordId>(ErrorCodes::InvalidLength, "record has to be <= 16.5MB");
+ }
+ const int lenToAlloc =
+ (doc->addPadding() && shouldPadInserts()) ? quantizeAllocationSpace(lenWHdr) : lenWHdr;
- r = reinterpret_cast<MmapV1RecordHeader*>( txn->recoveryUnit()->writingPtr(r, lenWHdr) );
- doc->writeDocument( r->data() );
+ StatusWith<DiskLoc> loc = allocRecord(txn, lenToAlloc, enforceQuota);
+ if (!loc.isOK())
+ return StatusWith<RecordId>(loc.getStatus());
- _addRecordToRecListInExtent(txn, r, loc.getValue());
+ MmapV1RecordHeader* r = recordFor(loc.getValue());
+ fassert(17319, r->lengthWithHeaders() >= lenWHdr);
- _details->incrementStats( txn, r->netLength(), 1 );
+ r = reinterpret_cast<MmapV1RecordHeader*>(txn->recoveryUnit()->writingPtr(r, lenWHdr));
+ doc->writeDocument(r->data());
- return StatusWith<RecordId>(loc.getValue().toRecordId());
- }
+ _addRecordToRecListInExtent(txn, r, loc.getValue());
+ _details->incrementStats(txn, r->netLength(), 1);
- StatusWith<RecordId> RecordStoreV1Base::insertRecord( OperationContext* txn,
- const char* data,
- int len,
- bool enforceQuota ) {
- if ( len < 4 ) {
- return StatusWith<RecordId>( ErrorCodes::InvalidLength, "record has to be >= 4 bytes" );
- }
+ return StatusWith<RecordId>(loc.getValue().toRecordId());
+}
- if ( len + MmapV1RecordHeader::HeaderSize > MaxAllowedAllocation ) {
- return StatusWith<RecordId>( ErrorCodes::InvalidLength, "record has to be <= 16.5MB" );
- }
- return _insertRecord( txn, data, len, enforceQuota );
+StatusWith<RecordId> RecordStoreV1Base::insertRecord(OperationContext* txn,
+ const char* data,
+ int len,
+ bool enforceQuota) {
+ if (len < 4) {
+ return StatusWith<RecordId>(ErrorCodes::InvalidLength, "record has to be >= 4 bytes");
}
- StatusWith<RecordId> RecordStoreV1Base::_insertRecord( OperationContext* txn,
- const char* data,
- int len,
- bool enforceQuota ) {
+ if (len + MmapV1RecordHeader::HeaderSize > MaxAllowedAllocation) {
+ return StatusWith<RecordId>(ErrorCodes::InvalidLength, "record has to be <= 16.5MB");
+ }
- const int lenWHdr = len + MmapV1RecordHeader::HeaderSize;
- const int lenToAlloc = shouldPadInserts() ? quantizeAllocationSpace(lenWHdr)
- : lenWHdr;
- fassert( 17208, lenToAlloc >= lenWHdr );
+ return _insertRecord(txn, data, len, enforceQuota);
+}
- StatusWith<DiskLoc> loc = allocRecord( txn, lenToAlloc, enforceQuota );
- if ( !loc.isOK() )
- return StatusWith<RecordId>(loc.getStatus());
+StatusWith<RecordId> RecordStoreV1Base::_insertRecord(OperationContext* txn,
+ const char* data,
+ int len,
+ bool enforceQuota) {
+ const int lenWHdr = len + MmapV1RecordHeader::HeaderSize;
+ const int lenToAlloc = shouldPadInserts() ? quantizeAllocationSpace(lenWHdr) : lenWHdr;
+ fassert(17208, lenToAlloc >= lenWHdr);
- MmapV1RecordHeader *r = recordFor( loc.getValue() );
- fassert( 17210, r->lengthWithHeaders() >= lenWHdr );
+ StatusWith<DiskLoc> loc = allocRecord(txn, lenToAlloc, enforceQuota);
+ if (!loc.isOK())
+ return StatusWith<RecordId>(loc.getStatus());
- // copy the data
- r = reinterpret_cast<MmapV1RecordHeader*>( txn->recoveryUnit()->writingPtr(r, lenWHdr) );
- memcpy( r->data(), data, len );
+ MmapV1RecordHeader* r = recordFor(loc.getValue());
+ fassert(17210, r->lengthWithHeaders() >= lenWHdr);
- _addRecordToRecListInExtent(txn, r, loc.getValue());
+ // copy the data
+ r = reinterpret_cast<MmapV1RecordHeader*>(txn->recoveryUnit()->writingPtr(r, lenWHdr));
+ memcpy(r->data(), data, len);
- _details->incrementStats( txn, r->netLength(), 1 );
+ _addRecordToRecListInExtent(txn, r, loc.getValue());
- return StatusWith<RecordId>(loc.getValue().toRecordId());
- }
+ _details->incrementStats(txn, r->netLength(), 1);
- StatusWith<RecordId> RecordStoreV1Base::updateRecord( OperationContext* txn,
- const RecordId& oldLocation,
- const char* data,
- int dataSize,
- bool enforceQuota,
- UpdateNotifier* notifier ) {
- MmapV1RecordHeader* oldRecord = recordFor( DiskLoc::fromRecordId(oldLocation) );
- if ( oldRecord->netLength() >= dataSize ) {
- // Make sure to notify other queries before we do an in-place update.
- if ( notifier ) {
- Status callbackStatus = notifier->recordStoreGoingToUpdateInPlace( txn,
- oldLocation );
- if ( !callbackStatus.isOK() )
- return StatusWith<RecordId>( callbackStatus );
- }
+ return StatusWith<RecordId>(loc.getValue().toRecordId());
+}
- // we fit
- memcpy( txn->recoveryUnit()->writingPtr( oldRecord->data(), dataSize ), data, dataSize );
- return StatusWith<RecordId>( oldLocation );
+StatusWith<RecordId> RecordStoreV1Base::updateRecord(OperationContext* txn,
+ const RecordId& oldLocation,
+ const char* data,
+ int dataSize,
+ bool enforceQuota,
+ UpdateNotifier* notifier) {
+ MmapV1RecordHeader* oldRecord = recordFor(DiskLoc::fromRecordId(oldLocation));
+ if (oldRecord->netLength() >= dataSize) {
+ // Make sure to notify other queries before we do an in-place update.
+ if (notifier) {
+ Status callbackStatus = notifier->recordStoreGoingToUpdateInPlace(txn, oldLocation);
+ if (!callbackStatus.isOK())
+ return StatusWith<RecordId>(callbackStatus);
}
- if ( isCapped() )
- return StatusWith<RecordId>( ErrorCodes::InternalError,
- "failing update: objects in a capped ns cannot grow",
- 10003 );
-
- // we have to move
- if ( dataSize + MmapV1RecordHeader::HeaderSize > MaxAllowedAllocation ) {
- return StatusWith<RecordId>( ErrorCodes::InvalidLength, "record has to be <= 16.5MB" );
- }
+ // we fit
+ memcpy(txn->recoveryUnit()->writingPtr(oldRecord->data(), dataSize), data, dataSize);
+ return StatusWith<RecordId>(oldLocation);
+ }
- StatusWith<RecordId> newLocation = _insertRecord( txn, data, dataSize, enforceQuota );
- if ( !newLocation.isOK() )
- return newLocation;
-
- // insert worked, so we delete old record
- if ( notifier ) {
- Status moveStatus = notifier->recordStoreGoingToMove( txn,
- oldLocation,
- oldRecord->data(),
- oldRecord->netLength() );
- if ( !moveStatus.isOK() )
- return StatusWith<RecordId>( moveStatus );
- }
+ if (isCapped())
+ return StatusWith<RecordId>(
+ ErrorCodes::InternalError, "failing update: objects in a capped ns cannot grow", 10003);
- deleteRecord( txn, oldLocation );
+ // we have to move
+ if (dataSize + MmapV1RecordHeader::HeaderSize > MaxAllowedAllocation) {
+ return StatusWith<RecordId>(ErrorCodes::InvalidLength, "record has to be <= 16.5MB");
+ }
+ StatusWith<RecordId> newLocation = _insertRecord(txn, data, dataSize, enforceQuota);
+ if (!newLocation.isOK())
return newLocation;
- }
- bool RecordStoreV1Base::updateWithDamagesSupported() const {
- return true;
+ // insert worked, so we delete old record
+ if (notifier) {
+ Status moveStatus = notifier->recordStoreGoingToMove(
+ txn, oldLocation, oldRecord->data(), oldRecord->netLength());
+ if (!moveStatus.isOK())
+ return StatusWith<RecordId>(moveStatus);
}
- Status RecordStoreV1Base::updateWithDamages( OperationContext* txn,
- const RecordId& loc,
- const RecordData& oldRec,
- const char* damageSource,
- const mutablebson::DamageVector& damages ) {
- MmapV1RecordHeader* rec = recordFor( DiskLoc::fromRecordId(loc) );
- char* root = rec->data();
+ deleteRecord(txn, oldLocation);
- // All updates were in place. Apply them via durability and writing pointer.
- mutablebson::DamageVector::const_iterator where = damages.begin();
- const mutablebson::DamageVector::const_iterator end = damages.end();
- for( ; where != end; ++where ) {
- const char* sourcePtr = damageSource + where->sourceOffset;
- void* targetPtr = txn->recoveryUnit()->writingPtr(root + where->targetOffset, where->size);
- std::memcpy(targetPtr, sourcePtr, where->size);
- }
+ return newLocation;
+}
- return Status::OK();
- }
+bool RecordStoreV1Base::updateWithDamagesSupported() const {
+ return true;
+}
- void RecordStoreV1Base::deleteRecord( OperationContext* txn, const RecordId& rid ) {
- const DiskLoc dl = DiskLoc::fromRecordId(rid);
+Status RecordStoreV1Base::updateWithDamages(OperationContext* txn,
+ const RecordId& loc,
+ const RecordData& oldRec,
+ const char* damageSource,
+ const mutablebson::DamageVector& damages) {
+ MmapV1RecordHeader* rec = recordFor(DiskLoc::fromRecordId(loc));
+ char* root = rec->data();
- MmapV1RecordHeader* todelete = recordFor( dl );
- invariant( todelete->netLength() >= 4 ); // this is required for defensive code
+ // All updates were in place. Apply them via durability and writing pointer.
+ mutablebson::DamageVector::const_iterator where = damages.begin();
+ const mutablebson::DamageVector::const_iterator end = damages.end();
+ for (; where != end; ++where) {
+ const char* sourcePtr = damageSource + where->sourceOffset;
+ void* targetPtr = txn->recoveryUnit()->writingPtr(root + where->targetOffset, where->size);
+ std::memcpy(targetPtr, sourcePtr, where->size);
+ }
- /* remove ourself from the record next/prev chain */
- {
- if ( todelete->prevOfs() != DiskLoc::NullOfs ) {
- DiskLoc prev = getPrevRecordInExtent( txn, dl );
- MmapV1RecordHeader* prevRecord = recordFor( prev );
- txn->recoveryUnit()->writingInt( prevRecord->nextOfs() ) = todelete->nextOfs();
- }
+ return Status::OK();
+}
- if ( todelete->nextOfs() != DiskLoc::NullOfs ) {
- DiskLoc next = getNextRecord( txn, dl );
- MmapV1RecordHeader* nextRecord = recordFor( next );
- txn->recoveryUnit()->writingInt( nextRecord->prevOfs() ) = todelete->prevOfs();
- }
- }
+void RecordStoreV1Base::deleteRecord(OperationContext* txn, const RecordId& rid) {
+ const DiskLoc dl = DiskLoc::fromRecordId(rid);
- /* remove ourself from extent pointers */
- {
- DiskLoc extentLoc = todelete->myExtentLoc(dl);
- Extent *e = _getExtent( txn, extentLoc );
- if ( e->firstRecord == dl ) {
- txn->recoveryUnit()->writing(&e->firstRecord);
- if ( todelete->nextOfs() == DiskLoc::NullOfs )
- e->firstRecord.Null();
- else
- e->firstRecord.set(dl.a(), todelete->nextOfs() );
- }
- if ( e->lastRecord == dl ) {
- txn->recoveryUnit()->writing(&e->lastRecord);
- if ( todelete->prevOfs() == DiskLoc::NullOfs )
- e->lastRecord.Null();
- else
- e->lastRecord.set(dl.a(), todelete->prevOfs() );
- }
- }
+ MmapV1RecordHeader* todelete = recordFor(dl);
+ invariant(todelete->netLength() >= 4); // this is required for defensive code
- /* add to the free list */
- {
- _details->incrementStats( txn, -1 * todelete->netLength(), -1 );
-
- if ( _isSystemIndexes ) {
- /* temp: if in system.indexes, don't reuse, and zero out: we want to be
- careful until validated more, as IndexDetails has pointers
- to this disk location. so an incorrectly done remove would cause
- a lot of problems.
- */
- memset( txn->recoveryUnit()->writingPtr(todelete, todelete->lengthWithHeaders() ),
- 0, todelete->lengthWithHeaders() );
- }
- else {
- // this is defensive so we can detect if we are still using a location
- // that was deleted
- memset(txn->recoveryUnit()->writingPtr(todelete->data(), 4), 0xee, 4);
- addDeletedRec(txn, dl);
- }
+ /* remove ourself from the record next/prev chain */
+ {
+ if (todelete->prevOfs() != DiskLoc::NullOfs) {
+ DiskLoc prev = getPrevRecordInExtent(txn, dl);
+ MmapV1RecordHeader* prevRecord = recordFor(prev);
+ txn->recoveryUnit()->writingInt(prevRecord->nextOfs()) = todelete->nextOfs();
}
+ if (todelete->nextOfs() != DiskLoc::NullOfs) {
+ DiskLoc next = getNextRecord(txn, dl);
+ MmapV1RecordHeader* nextRecord = recordFor(next);
+ txn->recoveryUnit()->writingInt(nextRecord->prevOfs()) = todelete->prevOfs();
+ }
}
- std::unique_ptr<RecordCursor> RecordStoreV1Base::getCursorForRepair(
- OperationContext* txn) const {
- return stdx::make_unique<RecordStoreV1RepairCursor>(txn, this);
- }
-
- void RecordStoreV1Base::_addRecordToRecListInExtent(OperationContext* txn,
- MmapV1RecordHeader *r,
- DiskLoc loc) {
- dassert( recordFor(loc) == r );
- DiskLoc extentLoc = _getExtentLocForRecord( txn, loc );
- Extent *e = _getExtent( txn, extentLoc );
- if ( e->lastRecord.isNull() ) {
- *txn->recoveryUnit()->writing(&e->firstRecord) = loc;
- *txn->recoveryUnit()->writing(&e->lastRecord) = loc;
- r->prevOfs() = r->nextOfs() = DiskLoc::NullOfs;
+ /* remove ourself from extent pointers */
+ {
+ DiskLoc extentLoc = todelete->myExtentLoc(dl);
+ Extent* e = _getExtent(txn, extentLoc);
+ if (e->firstRecord == dl) {
+ txn->recoveryUnit()->writing(&e->firstRecord);
+ if (todelete->nextOfs() == DiskLoc::NullOfs)
+ e->firstRecord.Null();
+ else
+ e->firstRecord.set(dl.a(), todelete->nextOfs());
}
- else {
- MmapV1RecordHeader *oldlast = recordFor(e->lastRecord);
- r->prevOfs() = e->lastRecord.getOfs();
- r->nextOfs() = DiskLoc::NullOfs;
- txn->recoveryUnit()->writingInt(oldlast->nextOfs()) = loc.getOfs();
- *txn->recoveryUnit()->writing(&e->lastRecord) = loc;
+ if (e->lastRecord == dl) {
+ txn->recoveryUnit()->writing(&e->lastRecord);
+ if (todelete->prevOfs() == DiskLoc::NullOfs)
+ e->lastRecord.Null();
+ else
+ e->lastRecord.set(dl.a(), todelete->prevOfs());
}
}
- void RecordStoreV1Base::increaseStorageSize( OperationContext* txn,
- int size,
- bool enforceQuota ) {
- DiskLoc eloc = _extentManager->allocateExtent( txn,
- isCapped(),
- size,
- enforceQuota );
- Extent *e = _extentManager->getExtent( eloc );
- invariant( e );
-
- *txn->recoveryUnit()->writing( &e->nsDiagnostic ) = _ns;
-
- txn->recoveryUnit()->writing( &e->xnext )->Null();
- txn->recoveryUnit()->writing( &e->xprev )->Null();
- txn->recoveryUnit()->writing( &e->firstRecord )->Null();
- txn->recoveryUnit()->writing( &e->lastRecord )->Null();
-
- DiskLoc emptyLoc = _findFirstSpot( txn, eloc, e );
-
- if ( _details->lastExtent(txn).isNull() ) {
- invariant( _details->firstExtent(txn).isNull() );
- _details->setFirstExtent( txn, eloc );
- _details->setLastExtent( txn, eloc );
- _details->setCapExtent( txn, eloc );
- invariant( e->xprev.isNull() );
- invariant( e->xnext.isNull() );
- }
- else {
- invariant( !_details->firstExtent(txn).isNull() );
- *txn->recoveryUnit()->writing(&e->xprev) = _details->lastExtent(txn);
- *txn->recoveryUnit()->writing(&_extentManager->getExtent(_details->lastExtent(txn))->xnext) = eloc;
- _details->setLastExtent( txn, eloc );
+ /* add to the free list */
+ {
+ _details->incrementStats(txn, -1 * todelete->netLength(), -1);
+
+ if (_isSystemIndexes) {
+ /* temp: if in system.indexes, don't reuse, and zero out: we want to be
+ careful until validated more, as IndexDetails has pointers
+ to this disk location. so an incorrectly done remove would cause
+ a lot of problems.
+ */
+ memset(txn->recoveryUnit()->writingPtr(todelete, todelete->lengthWithHeaders()),
+ 0,
+ todelete->lengthWithHeaders());
+ } else {
+ // this is defensive so we can detect if we are still using a location
+ // that was deleted
+ memset(txn->recoveryUnit()->writingPtr(todelete->data(), 4), 0xee, 4);
+ addDeletedRec(txn, dl);
}
+ }
+}
- _details->setLastExtentSize( txn, e->length );
+std::unique_ptr<RecordCursor> RecordStoreV1Base::getCursorForRepair(OperationContext* txn) const {
+ return stdx::make_unique<RecordStoreV1RepairCursor>(txn, this);
+}
- addDeletedRec(txn, emptyLoc);
+void RecordStoreV1Base::_addRecordToRecListInExtent(OperationContext* txn,
+ MmapV1RecordHeader* r,
+ DiskLoc loc) {
+ dassert(recordFor(loc) == r);
+ DiskLoc extentLoc = _getExtentLocForRecord(txn, loc);
+ Extent* e = _getExtent(txn, extentLoc);
+ if (e->lastRecord.isNull()) {
+ *txn->recoveryUnit()->writing(&e->firstRecord) = loc;
+ *txn->recoveryUnit()->writing(&e->lastRecord) = loc;
+ r->prevOfs() = r->nextOfs() = DiskLoc::NullOfs;
+ } else {
+ MmapV1RecordHeader* oldlast = recordFor(e->lastRecord);
+ r->prevOfs() = e->lastRecord.getOfs();
+ r->nextOfs() = DiskLoc::NullOfs;
+ txn->recoveryUnit()->writingInt(oldlast->nextOfs()) = loc.getOfs();
+ *txn->recoveryUnit()->writing(&e->lastRecord) = loc;
}
+}
- Status RecordStoreV1Base::validate( OperationContext* txn,
- bool full, bool scanData,
- ValidateAdaptor* adaptor,
- ValidateResults* results, BSONObjBuilder* output ) {
+void RecordStoreV1Base::increaseStorageSize(OperationContext* txn, int size, bool enforceQuota) {
+ DiskLoc eloc = _extentManager->allocateExtent(txn, isCapped(), size, enforceQuota);
+ Extent* e = _extentManager->getExtent(eloc);
+ invariant(e);
- // 1) basic status that require no iteration
- // 2) extent level info
- // 3) check extent start and end
- // 4) check each non-deleted record
- // 5) check deleted list
+ *txn->recoveryUnit()->writing(&e->nsDiagnostic) = _ns;
- // -------------
+ txn->recoveryUnit()->writing(&e->xnext)->Null();
+ txn->recoveryUnit()->writing(&e->xprev)->Null();
+ txn->recoveryUnit()->writing(&e->firstRecord)->Null();
+ txn->recoveryUnit()->writing(&e->lastRecord)->Null();
- // 1111111111111111111
- if ( isCapped() ){
- output->appendBool("capped", true);
- output->appendNumber("max", _details->maxCappedDocs());
- }
+ DiskLoc emptyLoc = _findFirstSpot(txn, eloc, e);
- output->appendNumber("datasize", _details->dataSize());
- output->appendNumber("nrecords", _details->numRecords());
- output->appendNumber("lastExtentSize", _details->lastExtentSize(txn));
-
- if ( _details->firstExtent(txn).isNull() )
- output->append( "firstExtent", "null" );
- else
- output->append( "firstExtent",
- str::stream() << _details->firstExtent(txn).toString()
- << " ns:"
- << _getExtent( txn, _details->firstExtent(txn) )->nsDiagnostic.toString());
- if ( _details->lastExtent(txn).isNull() )
- output->append( "lastExtent", "null" );
- else
- output->append( "lastExtent", str::stream() << _details->lastExtent(txn).toString()
- << " ns:"
- << _getExtent( txn, _details->lastExtent(txn) )->nsDiagnostic.toString());
-
- // 22222222222222222222222222
- { // validate extent basics
- BSONArrayBuilder extentData;
- int extentCount = 0;
- DiskLoc extentDiskLoc;
- try {
- if ( !_details->firstExtent(txn).isNull() ) {
- _getExtent( txn, _details->firstExtent(txn) )->assertOk();
- _getExtent( txn, _details->lastExtent(txn) )->assertOk();
- }
+ if (_details->lastExtent(txn).isNull()) {
+ invariant(_details->firstExtent(txn).isNull());
+ _details->setFirstExtent(txn, eloc);
+ _details->setLastExtent(txn, eloc);
+ _details->setCapExtent(txn, eloc);
+ invariant(e->xprev.isNull());
+ invariant(e->xnext.isNull());
+ } else {
+ invariant(!_details->firstExtent(txn).isNull());
+ *txn->recoveryUnit()->writing(&e->xprev) = _details->lastExtent(txn);
+ *txn->recoveryUnit()->writing(
+ &_extentManager->getExtent(_details->lastExtent(txn))->xnext) = eloc;
+ _details->setLastExtent(txn, eloc);
+ }
- extentDiskLoc = _details->firstExtent(txn);
- while (!extentDiskLoc.isNull()) {
- Extent* thisExtent = _getExtent( txn, extentDiskLoc );
- if (full) {
- extentData << thisExtent->dump();
- }
- if (!thisExtent->validates(extentDiskLoc, &results->errors)) {
- results->valid = false;
- }
- DiskLoc nextDiskLoc = thisExtent->xnext;
+ _details->setLastExtentSize(txn, e->length);
- if (extentCount > 0 && !nextDiskLoc.isNull()
- && _getExtent( txn, nextDiskLoc )->xprev != extentDiskLoc) {
- StringBuilder sb;
- sb << "'xprev' pointer " << _getExtent( txn, nextDiskLoc )->xprev.toString()
- << " in extent " << nextDiskLoc.toString()
- << " does not point to extent " << extentDiskLoc.toString();
- results->errors.push_back( sb.str() );
- results->valid = false;
- }
- if (nextDiskLoc.isNull() && extentDiskLoc != _details->lastExtent(txn)) {
- StringBuilder sb;
- sb << "'lastExtent' pointer " << _details->lastExtent(txn).toString()
- << " does not point to last extent in list " << extentDiskLoc.toString();
- results->errors.push_back( sb.str() );
- results->valid = false;
- }
- extentDiskLoc = nextDiskLoc;
- extentCount++;
- txn->checkForInterrupt();
- }
- }
- catch (const DBException& e) {
- StringBuilder sb;
- sb << "exception validating extent " << extentCount
- << ": " << e.what();
- results->errors.push_back( sb.str() );
- results->valid = false;
- return Status::OK();
- }
- output->append("extentCount", extentCount);
+ addDeletedRec(txn, emptyLoc);
+}
- if ( full )
- output->appendArray( "extents" , extentData.arr() );
+Status RecordStoreV1Base::validate(OperationContext* txn,
+ bool full,
+ bool scanData,
+ ValidateAdaptor* adaptor,
+ ValidateResults* results,
+ BSONObjBuilder* output) {
+ // 1) basic status that require no iteration
+ // 2) extent level info
+ // 3) check extent start and end
+ // 4) check each non-deleted record
+ // 5) check deleted list
+
+ // -------------
+
+ // 1111111111111111111
+ if (isCapped()) {
+ output->appendBool("capped", true);
+ output->appendNumber("max", _details->maxCappedDocs());
+ }
+
+ output->appendNumber("datasize", _details->dataSize());
+ output->appendNumber("nrecords", _details->numRecords());
+ output->appendNumber("lastExtentSize", _details->lastExtentSize(txn));
+
+ if (_details->firstExtent(txn).isNull())
+ output->append("firstExtent", "null");
+ else
+ output->append("firstExtent",
+ str::stream()
+ << _details->firstExtent(txn).toString() << " ns:"
+ << _getExtent(txn, _details->firstExtent(txn))->nsDiagnostic.toString());
+ if (_details->lastExtent(txn).isNull())
+ output->append("lastExtent", "null");
+ else
+ output->append("lastExtent",
+ str::stream()
+ << _details->lastExtent(txn).toString() << " ns:"
+ << _getExtent(txn, _details->lastExtent(txn))->nsDiagnostic.toString());
+
+ // 22222222222222222222222222
+ { // validate extent basics
+ BSONArrayBuilder extentData;
+ int extentCount = 0;
+ DiskLoc extentDiskLoc;
+ try {
+ if (!_details->firstExtent(txn).isNull()) {
+ _getExtent(txn, _details->firstExtent(txn))->assertOk();
+ _getExtent(txn, _details->lastExtent(txn))->assertOk();
+ }
+ extentDiskLoc = _details->firstExtent(txn);
+ while (!extentDiskLoc.isNull()) {
+ Extent* thisExtent = _getExtent(txn, extentDiskLoc);
+ if (full) {
+ extentData << thisExtent->dump();
+ }
+ if (!thisExtent->validates(extentDiskLoc, &results->errors)) {
+ results->valid = false;
+ }
+ DiskLoc nextDiskLoc = thisExtent->xnext;
+
+ if (extentCount > 0 && !nextDiskLoc.isNull() &&
+ _getExtent(txn, nextDiskLoc)->xprev != extentDiskLoc) {
+ StringBuilder sb;
+ sb << "'xprev' pointer " << _getExtent(txn, nextDiskLoc)->xprev.toString()
+ << " in extent " << nextDiskLoc.toString() << " does not point to extent "
+ << extentDiskLoc.toString();
+ results->errors.push_back(sb.str());
+ results->valid = false;
+ }
+ if (nextDiskLoc.isNull() && extentDiskLoc != _details->lastExtent(txn)) {
+ StringBuilder sb;
+ sb << "'lastExtent' pointer " << _details->lastExtent(txn).toString()
+ << " does not point to last extent in list " << extentDiskLoc.toString();
+ results->errors.push_back(sb.str());
+ results->valid = false;
+ }
+ extentDiskLoc = nextDiskLoc;
+ extentCount++;
+ txn->checkForInterrupt();
+ }
+ } catch (const DBException& e) {
+ StringBuilder sb;
+ sb << "exception validating extent " << extentCount << ": " << e.what();
+ results->errors.push_back(sb.str());
+ results->valid = false;
+ return Status::OK();
}
+ output->append("extentCount", extentCount);
+
+ if (full)
+ output->appendArray("extents", extentData.arr());
+ }
+ try {
+ // 333333333333333333333333333
+ bool testingLastExtent = false;
try {
- // 333333333333333333333333333
- bool testingLastExtent = false;
- try {
- DiskLoc firstExtentLoc = _details->firstExtent(txn);
- if (firstExtentLoc.isNull()) {
- // this is ok
+ DiskLoc firstExtentLoc = _details->firstExtent(txn);
+ if (firstExtentLoc.isNull()) {
+ // this is ok
+ } else {
+ output->append("firstExtentDetails", _getExtent(txn, firstExtentLoc)->dump());
+ if (!_getExtent(txn, firstExtentLoc)->xprev.isNull()) {
+ StringBuilder sb;
+ sb << "'xprev' pointer in 'firstExtent' "
+ << _details->firstExtent(txn).toString() << " is "
+ << _getExtent(txn, firstExtentLoc)->xprev.toString() << ", should be null";
+ results->errors.push_back(sb.str());
+ results->valid = false;
}
- else {
- output->append("firstExtentDetails", _getExtent(txn, firstExtentLoc)->dump());
- if (!_getExtent(txn, firstExtentLoc)->xprev.isNull()) {
+ }
+ testingLastExtent = true;
+ DiskLoc lastExtentLoc = _details->lastExtent(txn);
+ if (lastExtentLoc.isNull()) {
+ // this is ok
+ } else {
+ if (firstExtentLoc != lastExtentLoc) {
+ output->append("lastExtentDetails", _getExtent(txn, lastExtentLoc)->dump());
+ if (!_getExtent(txn, lastExtentLoc)->xnext.isNull()) {
StringBuilder sb;
- sb << "'xprev' pointer in 'firstExtent' " << _details->firstExtent(txn).toString()
- << " is " << _getExtent(txn, firstExtentLoc)->xprev.toString()
+ sb << "'xnext' pointer in 'lastExtent' " << lastExtentLoc.toString()
+ << " is " << _getExtent(txn, lastExtentLoc)->xnext.toString()
<< ", should be null";
- results->errors.push_back( sb.str() );
+ results->errors.push_back(sb.str());
results->valid = false;
}
}
- testingLastExtent = true;
- DiskLoc lastExtentLoc = _details->lastExtent(txn);
- if (lastExtentLoc.isNull()) {
- // this is ok
- }
- else {
- if (firstExtentLoc != lastExtentLoc) {
- output->append("lastExtentDetails", _getExtent(txn, lastExtentLoc)->dump());
- if (!_getExtent(txn, lastExtentLoc)->xnext.isNull()) {
- StringBuilder sb;
- sb << "'xnext' pointer in 'lastExtent' " << lastExtentLoc.toString()
- << " is " << _getExtent(txn, lastExtentLoc)->xnext.toString()
- << ", should be null";
- results->errors.push_back( sb.str() );
- results->valid = false;
- }
- }
- }
- }
- catch (const DBException& e) {
- StringBuilder sb;
- sb << "exception processing '"
- << (testingLastExtent ? "lastExtent" : "firstExtent")
- << "': " << e.what();
- results->errors.push_back( sb.str() );
- results->valid = false;
}
+ } catch (const DBException& e) {
+ StringBuilder sb;
+ sb << "exception processing '" << (testingLastExtent ? "lastExtent" : "firstExtent")
+ << "': " << e.what();
+ results->errors.push_back(sb.str());
+ results->valid = false;
+ }
- // 4444444444444444444444444
-
- set<DiskLoc> recs;
- if( scanData ) {
- int n = 0;
- int nInvalid = 0;
- long long nQuantizedSize = 0;
- long long len = 0;
- long long nlen = 0;
- long long bsonLen = 0;
- int outOfOrder = 0;
- DiskLoc dl_last;
-
- auto cursor = getCursor(txn);
- while (auto record = cursor->next()) {
- const auto dl = DiskLoc::fromRecordId(record->id);
- n++;
-
- if ( n < 1000000 )
- recs.insert(dl);
- if ( isCapped() ) {
- if ( dl < dl_last )
- outOfOrder++;
- dl_last = dl;
- }
-
- MmapV1RecordHeader *r = recordFor(dl);
- len += r->lengthWithHeaders();
- nlen += r->netLength();
+ // 4444444444444444444444444
+
+ set<DiskLoc> recs;
+ if (scanData) {
+ int n = 0;
+ int nInvalid = 0;
+ long long nQuantizedSize = 0;
+ long long len = 0;
+ long long nlen = 0;
+ long long bsonLen = 0;
+ int outOfOrder = 0;
+ DiskLoc dl_last;
+
+ auto cursor = getCursor(txn);
+ while (auto record = cursor->next()) {
+ const auto dl = DiskLoc::fromRecordId(record->id);
+ n++;
+
+ if (n < 1000000)
+ recs.insert(dl);
+ if (isCapped()) {
+ if (dl < dl_last)
+ outOfOrder++;
+ dl_last = dl;
+ }
- if ( isQuantized( r->lengthWithHeaders() ) ) {
- // Count the number of records having a size consistent with
- // the quantizeAllocationSpace quantization implementation.
- ++nQuantizedSize;
- }
+ MmapV1RecordHeader* r = recordFor(dl);
+ len += r->lengthWithHeaders();
+ nlen += r->netLength();
- if (full){
- size_t dataSize = 0;
- const Status status = adaptor->validate( r->toRecordData(), &dataSize );
- if (!status.isOK()) {
- results->valid = false;
- if (nInvalid == 0) // only log once;
- results->errors.push_back( "invalid object detected (see logs)" );
-
- nInvalid++;
- log() << "Invalid object detected in " << _ns
- << ": " << status.reason();
- }
- else {
- bsonLen += dataSize;
- }
- }
+ if (isQuantized(r->lengthWithHeaders())) {
+ // Count the number of records having a size consistent with
+ // the quantizeAllocationSpace quantization implementation.
+ ++nQuantizedSize;
}
- if ( isCapped() && !_details->capLooped() ) {
- output->append("cappedOutOfOrder", outOfOrder);
- if ( outOfOrder > 1 ) {
+ if (full) {
+ size_t dataSize = 0;
+ const Status status = adaptor->validate(r->toRecordData(), &dataSize);
+ if (!status.isOK()) {
results->valid = false;
- results->errors.push_back( "too many out of order records" );
+ if (nInvalid == 0) // only log once;
+ results->errors.push_back("invalid object detected (see logs)");
+
+ nInvalid++;
+ log() << "Invalid object detected in " << _ns << ": " << status.reason();
+ } else {
+ bsonLen += dataSize;
}
}
- output->append("objectsFound", n);
+ }
- if (full) {
- output->append("invalidObjects", nInvalid);
+ if (isCapped() && !_details->capLooped()) {
+ output->append("cappedOutOfOrder", outOfOrder);
+ if (outOfOrder > 1) {
+ results->valid = false;
+ results->errors.push_back("too many out of order records");
}
+ }
+ output->append("objectsFound", n);
- output->appendNumber("nQuantizedSize", nQuantizedSize);
- output->appendNumber("bytesWithHeaders", len);
- output->appendNumber("bytesWithoutHeaders", nlen);
+ if (full) {
+ output->append("invalidObjects", nInvalid);
+ }
- if (full) {
- output->appendNumber("bytesBson", bsonLen);
- }
- } // end scanData
+ output->appendNumber("nQuantizedSize", nQuantizedSize);
+ output->appendNumber("bytesWithHeaders", len);
+ output->appendNumber("bytesWithoutHeaders", nlen);
- // 55555555555555555555555555
- BSONArrayBuilder deletedListArray;
- for ( int i = 0; i < Buckets; i++ ) {
- deletedListArray << _details->deletedListEntry(i).isNull();
+ if (full) {
+ output->appendNumber("bytesBson", bsonLen);
}
+ } // end scanData
+
+ // 55555555555555555555555555
+ BSONArrayBuilder deletedListArray;
+ for (int i = 0; i < Buckets; i++) {
+ deletedListArray << _details->deletedListEntry(i).isNull();
+ }
- int ndel = 0;
- long long delSize = 0;
- BSONArrayBuilder delBucketSizes;
- int incorrect = 0;
- for ( int i = 0; i < Buckets; i++ ) {
- DiskLoc loc = _details->deletedListEntry(i);
- try {
- int k = 0;
- while ( !loc.isNull() ) {
- if ( recs.count(loc) )
- incorrect++;
- ndel++;
-
- if ( loc.questionable() ) {
- if( isCapped() && !loc.isValid() && i == 1 ) {
- /* the constructor for NamespaceDetails intentionally sets deletedList[1] to invalid
- see comments in namespace.h
- */
- break;
- }
-
- string err( str::stream() << "bad pointer in deleted record list: "
- << loc.toString()
- << " bucket: " << i
- << " k: " << k );
- results->errors.push_back( err );
- results->valid = false;
+ int ndel = 0;
+ long long delSize = 0;
+ BSONArrayBuilder delBucketSizes;
+ int incorrect = 0;
+ for (int i = 0; i < Buckets; i++) {
+ DiskLoc loc = _details->deletedListEntry(i);
+ try {
+ int k = 0;
+ while (!loc.isNull()) {
+ if (recs.count(loc))
+ incorrect++;
+ ndel++;
+
+ if (loc.questionable()) {
+ if (isCapped() && !loc.isValid() && i == 1) {
+ /* the constructor for NamespaceDetails intentionally sets deletedList[1] to invalid
+ see comments in namespace.h
+ */
break;
}
- const DeletedRecord* d = deletedRecordFor(loc);
- delSize += d->lengthWithHeaders();
- loc = d->nextDeleted();
- k++;
- txn->checkForInterrupt();
+ string err(str::stream()
+ << "bad pointer in deleted record list: " << loc.toString()
+ << " bucket: " << i << " k: " << k);
+ results->errors.push_back(err);
+ results->valid = false;
+ break;
}
- delBucketSizes << k;
- }
- catch (...) {
- results->errors.push_back( (string)"exception in deleted chain for bucket " +
- BSONObjBuilder::numStr(i) );
- results->valid = false;
- }
- }
- output->appendNumber("deletedCount", ndel);
- output->appendNumber("deletedSize", delSize);
- if ( full ) {
- output->append( "delBucketSizes", delBucketSizes.arr() );
- }
- if ( incorrect ) {
- results->errors.push_back( BSONObjBuilder::numStr(incorrect) +
- " records from datafile are in deleted list" );
+ const DeletedRecord* d = deletedRecordFor(loc);
+ delSize += d->lengthWithHeaders();
+ loc = d->nextDeleted();
+ k++;
+ txn->checkForInterrupt();
+ }
+ delBucketSizes << k;
+ } catch (...) {
+ results->errors.push_back((string) "exception in deleted chain for bucket " +
+ BSONObjBuilder::numStr(i));
results->valid = false;
}
-
}
- catch (AssertionException) {
- results->errors.push_back( "exception during validate" );
- results->valid = false;
+ output->appendNumber("deletedCount", ndel);
+ output->appendNumber("deletedSize", delSize);
+ if (full) {
+ output->append("delBucketSizes", delBucketSizes.arr());
}
- return Status::OK();
- }
-
- void RecordStoreV1Base::appendCustomStats( OperationContext* txn,
- BSONObjBuilder* result,
- double scale ) const {
- result->append( "lastExtentSize", _details->lastExtentSize(txn) / scale );
- result->append( "paddingFactor", 1.0 ); // hard coded
- result->append( "paddingFactorNote", "paddingFactor is unused and unmaintained in 3.0. It "
- "remains hard coded to 1.0 for compatibility only." );
- result->append( "userFlags", _details->userFlags() );
- result->appendBool( "capped", isCapped() );
- if ( isCapped() ) {
- result->appendNumber( "max", _details->maxCappedDocs() );
- result->appendNumber( "maxSize", static_cast<long long>(storageSize(txn, NULL, 0) /
- scale) );
+ if (incorrect) {
+ results->errors.push_back(BSONObjBuilder::numStr(incorrect) +
+ " records from datafile are in deleted list");
+ results->valid = false;
}
+
+ } catch (AssertionException) {
+ results->errors.push_back("exception during validate");
+ results->valid = false;
}
+ return Status::OK();
+}
- namespace {
- struct touch_location {
- const char* root;
- size_t length;
- };
+void RecordStoreV1Base::appendCustomStats(OperationContext* txn,
+ BSONObjBuilder* result,
+ double scale) const {
+ result->append("lastExtentSize", _details->lastExtentSize(txn) / scale);
+ result->append("paddingFactor", 1.0); // hard coded
+ result->append("paddingFactorNote",
+ "paddingFactor is unused and unmaintained in 3.0. It "
+ "remains hard coded to 1.0 for compatibility only.");
+ result->append("userFlags", _details->userFlags());
+ result->appendBool("capped", isCapped());
+ if (isCapped()) {
+ result->appendNumber("max", _details->maxCappedDocs());
+ result->appendNumber("maxSize", static_cast<long long>(storageSize(txn, NULL, 0) / scale));
}
+}
- Status RecordStoreV1Base::touch( OperationContext* txn, BSONObjBuilder* output ) const {
- Timer t;
- std::vector<touch_location> ranges;
- {
- DiskLoc nextLoc = _details->firstExtent(txn);
- Extent* ext = nextLoc.isNull() ? NULL : _getExtent( txn, nextLoc );
- while ( ext ) {
- touch_location tl;
- tl.root = reinterpret_cast<const char*>(ext);
- tl.length = ext->length;
- ranges.push_back(tl);
+namespace {
+struct touch_location {
+ const char* root;
+ size_t length;
+};
+}
- nextLoc = ext->xnext;
- if ( nextLoc.isNull() )
- ext = NULL;
- else
- ext = _getExtent( txn, nextLoc );
- }
- }
+Status RecordStoreV1Base::touch(OperationContext* txn, BSONObjBuilder* output) const {
+ Timer t;
- std::string progress_msg = "touch " + std::string(txn->getNS()) + " extents";
- stdx::unique_lock<Client> lk(*txn->getClient());
- ProgressMeterHolder pm(*txn->setMessage_inlock(progress_msg.c_str(),
- "Touch Progress",
- ranges.size()));
- lk.unlock();
-
- for ( std::vector<touch_location>::iterator it = ranges.begin(); it != ranges.end(); ++it ) {
- touch_pages( it->root, it->length );
- pm.hit();
- txn->checkForInterrupt();
- }
- pm.finished();
+ std::vector<touch_location> ranges;
+ {
+ DiskLoc nextLoc = _details->firstExtent(txn);
+ Extent* ext = nextLoc.isNull() ? NULL : _getExtent(txn, nextLoc);
+ while (ext) {
+ touch_location tl;
+ tl.root = reinterpret_cast<const char*>(ext);
+ tl.length = ext->length;
+ ranges.push_back(tl);
- if ( output ) {
- output->append( "numRanges", static_cast<int>( ranges.size() ) );
- output->append( "millis", t.millis() );
+ nextLoc = ext->xnext;
+ if (nextLoc.isNull())
+ ext = NULL;
+ else
+ ext = _getExtent(txn, nextLoc);
}
-
- return Status::OK();
}
- boost::optional<Record> RecordStoreV1Base::IntraExtentIterator::next() {
- if (_curr.isNull()) return {};
- auto out = _curr.toRecordId();
- advance();
- return {{out, _rs->dataFor(_txn, out)}};
+ std::string progress_msg = "touch " + std::string(txn->getNS()) + " extents";
+ stdx::unique_lock<Client> lk(*txn->getClient());
+ ProgressMeterHolder pm(
+ *txn->setMessage_inlock(progress_msg.c_str(), "Touch Progress", ranges.size()));
+ lk.unlock();
+
+ for (std::vector<touch_location>::iterator it = ranges.begin(); it != ranges.end(); ++it) {
+ touch_pages(it->root, it->length);
+ pm.hit();
+ txn->checkForInterrupt();
}
+ pm.finished();
- boost::optional<Record> RecordStoreV1Base::IntraExtentIterator::seekExact(const RecordId& id) {
- invariant(!"seekExact not supported");
+ if (output) {
+ output->append("numRanges", static_cast<int>(ranges.size()));
+ output->append("millis", t.millis());
}
- void RecordStoreV1Base::IntraExtentIterator::advance() {
- if (_curr.isNull())
- return;
+ return Status::OK();
+}
- const MmapV1RecordHeader* rec = recordFor(_curr);
- const int nextOfs = _forward ? rec->nextOfs() : rec->prevOfs();
- _curr = (nextOfs == DiskLoc::NullOfs ? DiskLoc() : DiskLoc(_curr.a(), nextOfs));
- }
+boost::optional<Record> RecordStoreV1Base::IntraExtentIterator::next() {
+ if (_curr.isNull())
+ return {};
+ auto out = _curr.toRecordId();
+ advance();
+ return {{out, _rs->dataFor(_txn, out)}};
+}
- void RecordStoreV1Base::IntraExtentIterator::invalidate(const RecordId& rid) {
- if (rid == _curr.toRecordId()) {
- advance();
- }
- }
+boost::optional<Record> RecordStoreV1Base::IntraExtentIterator::seekExact(const RecordId& id) {
+ invariant(!"seekExact not supported");
+}
+
+void RecordStoreV1Base::IntraExtentIterator::advance() {
+ if (_curr.isNull())
+ return;
- std::unique_ptr<RecordFetcher> RecordStoreV1Base::IntraExtentIterator::fetcherForNext() const {
- return _rs->_extentManager->recordNeedsFetch(_curr);
+ const MmapV1RecordHeader* rec = recordFor(_curr);
+ const int nextOfs = _forward ? rec->nextOfs() : rec->prevOfs();
+ _curr = (nextOfs == DiskLoc::NullOfs ? DiskLoc() : DiskLoc(_curr.a(), nextOfs));
+}
+
+void RecordStoreV1Base::IntraExtentIterator::invalidate(const RecordId& rid) {
+ if (rid == _curr.toRecordId()) {
+ advance();
}
+}
- int RecordStoreV1Base::quantizeAllocationSpace(int allocSize) {
- invariant(allocSize <= MaxAllowedAllocation);
- for ( int i = 0; i < Buckets - 2; i++ ) { // last two bucketSizes are invalid
- if ( bucketSizes[i] >= allocSize ) {
- // Return the size of the first bucket sized >= the requested size.
- return bucketSizes[i];
- }
+std::unique_ptr<RecordFetcher> RecordStoreV1Base::IntraExtentIterator::fetcherForNext() const {
+ return _rs->_extentManager->recordNeedsFetch(_curr);
+}
+
+int RecordStoreV1Base::quantizeAllocationSpace(int allocSize) {
+ invariant(allocSize <= MaxAllowedAllocation);
+ for (int i = 0; i < Buckets - 2; i++) { // last two bucketSizes are invalid
+ if (bucketSizes[i] >= allocSize) {
+ // Return the size of the first bucket sized >= the requested size.
+ return bucketSizes[i];
}
- invariant(false); // prior invariant means we should find something.
}
+ invariant(false); // prior invariant means we should find something.
+}
- bool RecordStoreV1Base::isQuantized(int recordSize) {
- if (recordSize > MaxAllowedAllocation)
- return false;
+bool RecordStoreV1Base::isQuantized(int recordSize) {
+ if (recordSize > MaxAllowedAllocation)
+ return false;
- return recordSize == quantizeAllocationSpace(recordSize);
- }
+ return recordSize == quantizeAllocationSpace(recordSize);
+}
- int RecordStoreV1Base::bucket(int size) {
- for ( int i = 0; i < Buckets; i++ ) {
- if ( bucketSizes[i] > size ) {
- // Return the first bucket sized _larger_ than the requested size. This is important
- // since we want all records in a bucket to be >= the quantized size, therefore the
- // quantized size must be the smallest allowed record per bucket.
- return i;
- }
+int RecordStoreV1Base::bucket(int size) {
+ for (int i = 0; i < Buckets; i++) {
+ if (bucketSizes[i] > size) {
+ // Return the first bucket sized _larger_ than the requested size. This is important
+ // since we want all records in a bucket to be >= the quantized size, therefore the
+ // quantized size must be the smallest allowed record per bucket.
+ return i;
}
- // Technically, this is reachable if size == INT_MAX, but it would be an error to pass that
- // in anyway since it would be impossible to have a record that large given the file and
- // extent headers.
- invariant(false);
}
+ // Technically, this is reachable if size == INT_MAX, but it would be an error to pass that
+ // in anyway since it would be impossible to have a record that large given the file and
+ // extent headers.
+ invariant(false);
+}
}
diff --git a/src/mongo/db/storage/mmap_v1/record_store_v1_base.h b/src/mongo/db/storage/mmap_v1/record_store_v1_base.h
index 4e1aa8de338..5c0437cce56 100644
--- a/src/mongo/db/storage/mmap_v1/record_store_v1_base.h
+++ b/src/mongo/db/storage/mmap_v1/record_store_v1_base.h
@@ -38,312 +38,319 @@
namespace mongo {
- class DeletedRecord;
- class DocWriter;
- class ExtentManager;
- class MmapV1RecordHeader;
- class OperationContext;
+class DeletedRecord;
+class DocWriter;
+class ExtentManager;
+class MmapV1RecordHeader;
+class OperationContext;
- struct Extent;
+struct Extent;
- class RecordStoreV1MetaData {
- public:
- virtual ~RecordStoreV1MetaData(){}
+class RecordStoreV1MetaData {
+public:
+ virtual ~RecordStoreV1MetaData() {}
- virtual const DiskLoc& capExtent() const = 0;
- virtual void setCapExtent( OperationContext* txn, const DiskLoc& loc ) = 0;
+ virtual const DiskLoc& capExtent() const = 0;
+ virtual void setCapExtent(OperationContext* txn, const DiskLoc& loc) = 0;
- virtual const DiskLoc& capFirstNewRecord() const = 0;
- virtual void setCapFirstNewRecord( OperationContext* txn, const DiskLoc& loc ) = 0;
+ virtual const DiskLoc& capFirstNewRecord() const = 0;
+ virtual void setCapFirstNewRecord(OperationContext* txn, const DiskLoc& loc) = 0;
- bool capLooped() const { return capFirstNewRecord().isValid(); }
+ bool capLooped() const {
+ return capFirstNewRecord().isValid();
+ }
- virtual long long dataSize() const = 0;
- virtual long long numRecords() const = 0;
+ virtual long long dataSize() const = 0;
+ virtual long long numRecords() const = 0;
- virtual void incrementStats( OperationContext* txn,
- long long dataSizeIncrement,
- long long numRecordsIncrement ) = 0;
+ virtual void incrementStats(OperationContext* txn,
+ long long dataSizeIncrement,
+ long long numRecordsIncrement) = 0;
- virtual void setStats( OperationContext* txn,
- long long dataSize,
- long long numRecords ) = 0;
+ virtual void setStats(OperationContext* txn, long long dataSize, long long numRecords) = 0;
- virtual DiskLoc deletedListEntry( int bucket ) const = 0;
- virtual void setDeletedListEntry( OperationContext* txn,
- int bucket,
- const DiskLoc& loc ) = 0;
+ virtual DiskLoc deletedListEntry(int bucket) const = 0;
+ virtual void setDeletedListEntry(OperationContext* txn, int bucket, const DiskLoc& loc) = 0;
- virtual DiskLoc deletedListLegacyGrabBag() const = 0;
- virtual void setDeletedListLegacyGrabBag(OperationContext* txn, const DiskLoc& loc) = 0;
+ virtual DiskLoc deletedListLegacyGrabBag() const = 0;
+ virtual void setDeletedListLegacyGrabBag(OperationContext* txn, const DiskLoc& loc) = 0;
- virtual void orphanDeletedList(OperationContext* txn) = 0;
+ virtual void orphanDeletedList(OperationContext* txn) = 0;
- virtual const DiskLoc& firstExtent( OperationContext* txn ) const = 0;
- virtual void setFirstExtent( OperationContext* txn, const DiskLoc& loc ) = 0;
+ virtual const DiskLoc& firstExtent(OperationContext* txn) const = 0;
+ virtual void setFirstExtent(OperationContext* txn, const DiskLoc& loc) = 0;
- virtual const DiskLoc& lastExtent( OperationContext* txn ) const = 0;
- virtual void setLastExtent( OperationContext* txn, const DiskLoc& loc ) = 0;
+ virtual const DiskLoc& lastExtent(OperationContext* txn) const = 0;
+ virtual void setLastExtent(OperationContext* txn, const DiskLoc& loc) = 0;
- virtual bool isCapped() const = 0;
+ virtual bool isCapped() const = 0;
- virtual bool isUserFlagSet( int flag ) const = 0;
- virtual int userFlags() const = 0;
- virtual bool setUserFlag( OperationContext* txn, int flag ) = 0;
- virtual bool clearUserFlag( OperationContext* txn, int flag ) = 0;
- virtual bool replaceUserFlags( OperationContext* txn, int flags ) = 0;
+ virtual bool isUserFlagSet(int flag) const = 0;
+ virtual int userFlags() const = 0;
+ virtual bool setUserFlag(OperationContext* txn, int flag) = 0;
+ virtual bool clearUserFlag(OperationContext* txn, int flag) = 0;
+ virtual bool replaceUserFlags(OperationContext* txn, int flags) = 0;
- virtual int lastExtentSize( OperationContext* txn) const = 0;
- virtual void setLastExtentSize( OperationContext* txn, int newMax ) = 0;
+ virtual int lastExtentSize(OperationContext* txn) const = 0;
+ virtual void setLastExtentSize(OperationContext* txn, int newMax) = 0;
- virtual long long maxCappedDocs() const = 0;
-
- };
+ virtual long long maxCappedDocs() const = 0;
+};
+/**
+ * Class that stores active cursors that have been saved (as part of yielding) to
+ * allow them to be invalidated if the thing they pointed at goes away. The registry is
+ * thread-safe, as readers may concurrently register and remove their cursors. Contention is
+ * expected to be very low, as yielding is infrequent. This logically belongs to the
+ * RecordStore, but is not contained in it to facilitate unit testing.
+ */
+class SavedCursorRegistry {
+public:
/**
- * Class that stores active cursors that have been saved (as part of yielding) to
- * allow them to be invalidated if the thing they pointed at goes away. The registry is
- * thread-safe, as readers may concurrently register and remove their cursors. Contention is
- * expected to be very low, as yielding is infrequent. This logically belongs to the
- * RecordStore, but is not contained in it to facilitate unit testing.
+ * The destructor ensures the cursor is unregistered when an exception is thrown.
+ * Note that the SavedCursor may outlive the registry it was saved in.
*/
- class SavedCursorRegistry {
- public:
- /**
- * The destructor ensures the cursor is unregistered when an exception is thrown.
- * Note that the SavedCursor may outlive the registry it was saved in.
- */
- struct SavedCursor {
- SavedCursor() : _registry(NULL) { }
- virtual ~SavedCursor() { if (_registry) _registry->unregisterCursor(this); }
- DiskLoc bucket;
- BSONObj key;
- DiskLoc loc;
-
- private:
- friend class SavedCursorRegistry;
- // Non-null iff registered. Accessed by owner or writer with MODE_X collection lock
- SavedCursorRegistry* _registry;
- };
-
- ~SavedCursorRegistry();
-
- /**
- * Adds given saved cursor to SavedCursorRegistry. Doesn't take ownership.
- */
- void registerCursor(SavedCursor* cursor);
-
- /**
- * Removes given saved cursor. Returns true if the cursor was still present, and false
- * if it had already been removed due to invalidation. Doesn't take ownership.
- */
- bool unregisterCursor(SavedCursor* cursor);
-
- /**
- * When a btree-bucket disappears due to merge/split or similar, this invalidates all
- * cursors that point at the same bucket by removing them from the registry.
- */
- void invalidateCursorsForBucket(DiskLoc bucket);
+ struct SavedCursor {
+ SavedCursor() : _registry(NULL) {}
+ virtual ~SavedCursor() {
+ if (_registry)
+ _registry->unregisterCursor(this);
+ }
+ DiskLoc bucket;
+ BSONObj key;
+ DiskLoc loc;
private:
- SpinLock _mutex;
- typedef unordered_set<SavedCursor *> SavedCursorSet; // SavedCursor pointers not owned here
- SavedCursorSet _cursors;
+ friend class SavedCursorRegistry;
+ // Non-null iff registered. Accessed by owner or writer with MODE_X collection lock
+ SavedCursorRegistry* _registry;
};
- class RecordStoreV1Base : public RecordStore {
- public:
-
- static const int Buckets = 26;
- static const int MaxAllowedAllocation = 16*1024*1024 + 512*1024;
+ ~SavedCursorRegistry();
- static const int bucketSizes[];
+ /**
+ * Adds given saved cursor to SavedCursorRegistry. Doesn't take ownership.
+ */
+ void registerCursor(SavedCursor* cursor);
- // ------------
+ /**
+ * Removes given saved cursor. Returns true if the cursor was still present, and false
+ * if it had already been removed due to invalidation. Doesn't take ownership.
+ */
+ bool unregisterCursor(SavedCursor* cursor);
- class IntraExtentIterator;
+ /**
+ * When a btree-bucket disappears due to merge/split or similar, this invalidates all
+ * cursors that point at the same bucket by removing them from the registry.
+ */
+ void invalidateCursorsForBucket(DiskLoc bucket);
- /**
- * @param details - takes ownership
- * @param em - does NOT take ownership
- */
- RecordStoreV1Base(StringData ns,
- RecordStoreV1MetaData* details,
- ExtentManager* em,
- bool isSystemIndexes);
+private:
+ SpinLock _mutex;
+ typedef unordered_set<SavedCursor*> SavedCursorSet; // SavedCursor pointers not owned here
+ SavedCursorSet _cursors;
+};
- virtual ~RecordStoreV1Base();
+class RecordStoreV1Base : public RecordStore {
+public:
+ static const int Buckets = 26;
+ static const int MaxAllowedAllocation = 16 * 1024 * 1024 + 512 * 1024;
- virtual long long dataSize( OperationContext* txn ) const { return _details->dataSize(); }
- virtual long long numRecords( OperationContext* txn ) const { return _details->numRecords(); }
+ static const int bucketSizes[];
- virtual int64_t storageSize( OperationContext* txn,
- BSONObjBuilder* extraInfo = NULL,
- int level = 0 ) const;
+ // ------------
- virtual RecordData dataFor( OperationContext* txn, const RecordId& loc ) const;
+ class IntraExtentIterator;
- virtual bool findRecord( OperationContext* txn, const RecordId& loc, RecordData* rd ) const;
+ /**
+ * @param details - takes ownership
+ * @param em - does NOT take ownership
+ */
+ RecordStoreV1Base(StringData ns,
+ RecordStoreV1MetaData* details,
+ ExtentManager* em,
+ bool isSystemIndexes);
- void deleteRecord( OperationContext* txn,
- const RecordId& dl );
+ virtual ~RecordStoreV1Base();
- StatusWith<RecordId> insertRecord( OperationContext* txn,
- const char* data,
- int len,
- bool enforceQuota );
+ virtual long long dataSize(OperationContext* txn) const {
+ return _details->dataSize();
+ }
+ virtual long long numRecords(OperationContext* txn) const {
+ return _details->numRecords();
+ }
- StatusWith<RecordId> insertRecord( OperationContext* txn,
- const DocWriter* doc,
- bool enforceQuota );
+ virtual int64_t storageSize(OperationContext* txn,
+ BSONObjBuilder* extraInfo = NULL,
+ int level = 0) const;
- virtual StatusWith<RecordId> updateRecord( OperationContext* txn,
- const RecordId& oldLocation,
- const char* data,
- int len,
- bool enforceQuota,
- UpdateNotifier* notifier );
+ virtual RecordData dataFor(OperationContext* txn, const RecordId& loc) const;
- virtual bool updateWithDamagesSupported() const;
+ virtual bool findRecord(OperationContext* txn, const RecordId& loc, RecordData* rd) const;
- virtual Status updateWithDamages( OperationContext* txn,
- const RecordId& loc,
- const RecordData& oldRec,
- const char* damageSource,
- const mutablebson::DamageVector& damages );
+ void deleteRecord(OperationContext* txn, const RecordId& dl);
- virtual std::unique_ptr<RecordCursor> getCursorForRepair( OperationContext* txn ) const;
+ StatusWith<RecordId> insertRecord(OperationContext* txn,
+ const char* data,
+ int len,
+ bool enforceQuota);
- void increaseStorageSize( OperationContext* txn, int size, bool enforceQuota );
+ StatusWith<RecordId> insertRecord(OperationContext* txn,
+ const DocWriter* doc,
+ bool enforceQuota);
- virtual Status validate( OperationContext* txn,
- bool full, bool scanData,
- ValidateAdaptor* adaptor,
- ValidateResults* results, BSONObjBuilder* output );
+ virtual StatusWith<RecordId> updateRecord(OperationContext* txn,
+ const RecordId& oldLocation,
+ const char* data,
+ int len,
+ bool enforceQuota,
+ UpdateNotifier* notifier);
- virtual void appendCustomStats( OperationContext* txn,
- BSONObjBuilder* result,
- double scale ) const;
+ virtual bool updateWithDamagesSupported() const;
- virtual Status touch( OperationContext* txn, BSONObjBuilder* output ) const;
+ virtual Status updateWithDamages(OperationContext* txn,
+ const RecordId& loc,
+ const RecordData& oldRec,
+ const char* damageSource,
+ const mutablebson::DamageVector& damages);
- const RecordStoreV1MetaData* details() const { return _details.get(); }
+ virtual std::unique_ptr<RecordCursor> getCursorForRepair(OperationContext* txn) const;
- // This keeps track of cursors saved during yielding, for invalidation purposes.
- SavedCursorRegistry savedCursors;
+ void increaseStorageSize(OperationContext* txn, int size, bool enforceQuota);
- DiskLoc getExtentLocForRecord( OperationContext* txn, const DiskLoc& loc ) const;
+ virtual Status validate(OperationContext* txn,
+ bool full,
+ bool scanData,
+ ValidateAdaptor* adaptor,
+ ValidateResults* results,
+ BSONObjBuilder* output);
- DiskLoc getNextRecord( OperationContext* txn, const DiskLoc& loc ) const;
- DiskLoc getPrevRecord( OperationContext* txn, const DiskLoc& loc ) const;
+ virtual void appendCustomStats(OperationContext* txn,
+ BSONObjBuilder* result,
+ double scale) const;
- DiskLoc getNextRecordInExtent( OperationContext* txn, const DiskLoc& loc ) const;
- DiskLoc getPrevRecordInExtent( OperationContext* txn, const DiskLoc& loc ) const;
+ virtual Status touch(OperationContext* txn, BSONObjBuilder* output) const;
- /**
- * Quantize 'minSize' to the nearest allocation size.
- */
- static int quantizeAllocationSpace(int minSize);
+ const RecordStoreV1MetaData* details() const {
+ return _details.get();
+ }
- static bool isQuantized(int recordSize);
+ // This keeps track of cursors saved during yielding, for invalidation purposes.
+ SavedCursorRegistry savedCursors;
- /* return which "deleted bucket" for this size object */
- static int bucket(int size);
+ DiskLoc getExtentLocForRecord(OperationContext* txn, const DiskLoc& loc) const;
- virtual void updateStatsAfterRepair(OperationContext* txn,
- long long numRecords,
- long long dataSize) {
- invariant(false); // MMAPv1 has its own repair which doesn't call this.
- }
- protected:
+ DiskLoc getNextRecord(OperationContext* txn, const DiskLoc& loc) const;
+ DiskLoc getPrevRecord(OperationContext* txn, const DiskLoc& loc) const;
- virtual MmapV1RecordHeader* recordFor( const DiskLoc& loc ) const;
+ DiskLoc getNextRecordInExtent(OperationContext* txn, const DiskLoc& loc) const;
+ DiskLoc getPrevRecordInExtent(OperationContext* txn, const DiskLoc& loc) const;
- const DeletedRecord* deletedRecordFor( const DiskLoc& loc ) const;
+ /**
+ * Quantize 'minSize' to the nearest allocation size.
+ */
+ static int quantizeAllocationSpace(int minSize);
- virtual bool isCapped() const = 0;
+ static bool isQuantized(int recordSize);
- virtual bool shouldPadInserts() const = 0;
+ /* return which "deleted bucket" for this size object */
+ static int bucket(int size);
- virtual StatusWith<DiskLoc> allocRecord( OperationContext* txn,
- int lengthWithHeaders,
- bool enforceQuota ) = 0;
+ virtual void updateStatsAfterRepair(OperationContext* txn,
+ long long numRecords,
+ long long dataSize) {
+ invariant(false); // MMAPv1 has its own repair which doesn't call this.
+ }
- // TODO: document, remove, what have you
- virtual void addDeletedRec( OperationContext* txn, const DiskLoc& dloc) = 0;
+protected:
+ virtual MmapV1RecordHeader* recordFor(const DiskLoc& loc) const;
- // TODO: another sad one
- virtual DeletedRecord* drec( const DiskLoc& loc ) const;
+ const DeletedRecord* deletedRecordFor(const DiskLoc& loc) const;
- // just a wrapper for _extentManager->getExtent( loc );
- Extent* _getExtent( OperationContext* txn, const DiskLoc& loc ) const;
+ virtual bool isCapped() const = 0;
- DiskLoc _getExtentLocForRecord( OperationContext* txn, const DiskLoc& loc ) const;
+ virtual bool shouldPadInserts() const = 0;
- DiskLoc _getNextRecord( OperationContext* txn, const DiskLoc& loc ) const;
- DiskLoc _getPrevRecord( OperationContext* txn, const DiskLoc& loc ) const;
+ virtual StatusWith<DiskLoc> allocRecord(OperationContext* txn,
+ int lengthWithHeaders,
+ bool enforceQuota) = 0;
- DiskLoc _getNextRecordInExtent( OperationContext* txn, const DiskLoc& loc ) const;
- DiskLoc _getPrevRecordInExtent( OperationContext* txn, const DiskLoc& loc ) const;
+ // TODO: document, remove, what have you
+ virtual void addDeletedRec(OperationContext* txn, const DiskLoc& dloc) = 0;
- /**
- * finds the first suitable DiskLoc for data
- * will return the DiskLoc of a newly created DeletedRecord
- */
- DiskLoc _findFirstSpot( OperationContext* txn, const DiskLoc& extDiskLoc, Extent* e );
+ // TODO: another sad one
+ virtual DeletedRecord* drec(const DiskLoc& loc) const;
- /** add a record to the end of the linked list chain within this extent.
- require: you must have already declared write intent for the record header.
- */
- void _addRecordToRecListInExtent(OperationContext* txn, MmapV1RecordHeader* r, DiskLoc loc);
+ // just a wrapper for _extentManager->getExtent( loc );
+ Extent* _getExtent(OperationContext* txn, const DiskLoc& loc) const;
- /**
- * internal
- * doesn't check inputs or change padding
- */
- StatusWith<RecordId> _insertRecord( OperationContext* txn,
- const char* data,
- int len,
- bool enforceQuota );
+ DiskLoc _getExtentLocForRecord(OperationContext* txn, const DiskLoc& loc) const;
- std::unique_ptr<RecordStoreV1MetaData> _details;
- ExtentManager* _extentManager;
- bool _isSystemIndexes;
+ DiskLoc _getNextRecord(OperationContext* txn, const DiskLoc& loc) const;
+ DiskLoc _getPrevRecord(OperationContext* txn, const DiskLoc& loc) const;
- friend class RecordStoreV1RepairCursor;
- };
+ DiskLoc _getNextRecordInExtent(OperationContext* txn, const DiskLoc& loc) const;
+ DiskLoc _getPrevRecordInExtent(OperationContext* txn, const DiskLoc& loc) const;
/**
- * Iterates over all records within a single extent.
- *
- * EOF at end of extent, even if there are more extents.
+ * finds the first suitable DiskLoc for data
+ * will return the DiskLoc of a newly created DeletedRecord
*/
- class RecordStoreV1Base::IntraExtentIterator final : public RecordCursor {
- public:
- IntraExtentIterator(OperationContext* txn,
- DiskLoc start,
- const RecordStoreV1Base* rs,
- bool forward = true)
- : _txn(txn), _curr(start), _rs(rs), _forward(forward) {}
-
- boost::optional<Record> next() final;
- boost::optional<Record> seekExact(const RecordId& id) final;
- void invalidate(const RecordId& dl) final;
- void savePositioned() final {}
- bool restore(OperationContext* txn) final { return true; }
- std::unique_ptr<RecordFetcher> fetcherForNext() const final;
+ DiskLoc _findFirstSpot(OperationContext* txn, const DiskLoc& extDiskLoc, Extent* e);
- private:
- virtual const MmapV1RecordHeader* recordFor( const DiskLoc& loc ) const {
- return _rs->recordFor(loc);
- }
+ /** add a record to the end of the linked list chain within this extent.
+ require: you must have already declared write intent for the record header.
+ */
+ void _addRecordToRecListInExtent(OperationContext* txn, MmapV1RecordHeader* r, DiskLoc loc);
+
+ /**
+ * internal
+ * doesn't check inputs or change padding
+ */
+ StatusWith<RecordId> _insertRecord(OperationContext* txn,
+ const char* data,
+ int len,
+ bool enforceQuota);
- void advance();
+ std::unique_ptr<RecordStoreV1MetaData> _details;
+ ExtentManager* _extentManager;
+ bool _isSystemIndexes;
- OperationContext* _txn;
- DiskLoc _curr;
- const RecordStoreV1Base* _rs;
- bool _forward;
- };
+ friend class RecordStoreV1RepairCursor;
+};
+/**
+ * Iterates over all records within a single extent.
+ *
+ * EOF at end of extent, even if there are more extents.
+ */
+class RecordStoreV1Base::IntraExtentIterator final : public RecordCursor {
+public:
+ IntraExtentIterator(OperationContext* txn,
+ DiskLoc start,
+ const RecordStoreV1Base* rs,
+ bool forward = true)
+ : _txn(txn), _curr(start), _rs(rs), _forward(forward) {}
+
+ boost::optional<Record> next() final;
+ boost::optional<Record> seekExact(const RecordId& id) final;
+ void invalidate(const RecordId& dl) final;
+ void savePositioned() final {}
+ bool restore(OperationContext* txn) final {
+ return true;
+ }
+ std::unique_ptr<RecordFetcher> fetcherForNext() const final;
+
+private:
+ virtual const MmapV1RecordHeader* recordFor(const DiskLoc& loc) const {
+ return _rs->recordFor(loc);
+ }
+
+ void advance();
+
+ OperationContext* _txn;
+ DiskLoc _curr;
+ const RecordStoreV1Base* _rs;
+ bool _forward;
+};
}
diff --git a/src/mongo/db/storage/mmap_v1/record_store_v1_capped.cpp b/src/mongo/db/storage/mmap_v1/record_store_v1_capped.cpp
index a41dd66ab1e..2674861bdb1 100644
--- a/src/mongo/db/storage/mmap_v1/record_store_v1_capped.cpp
+++ b/src/mongo/db/storage/mmap_v1/record_store_v1_capped.cpp
@@ -62,658 +62,630 @@
namespace mongo {
- using std::dec;
- using std::endl;
- using std::hex;
- using std::vector;
-
- CappedRecordStoreV1::CappedRecordStoreV1( OperationContext* txn,
- CappedDocumentDeleteCallback* collection,
- StringData ns,
- RecordStoreV1MetaData* details,
- ExtentManager* em,
- bool isSystemIndexes )
- : RecordStoreV1Base( ns, details, em, isSystemIndexes ),
- _deleteCallback( collection ) {
-
- DiskLoc extentLoc = details->firstExtent(txn);
- while ( !extentLoc.isNull() ) {
- _extentAdvice.push_back( _extentManager->cacheHint( extentLoc,
- ExtentManager::Sequential ) );
- Extent* extent = em->getExtent( extentLoc );
- extentLoc = extent->xnext;
- }
-
- // this is for VERY VERY old versions of capped collections
- cappedCheckMigrate(txn);
+using std::dec;
+using std::endl;
+using std::hex;
+using std::vector;
+
+CappedRecordStoreV1::CappedRecordStoreV1(OperationContext* txn,
+ CappedDocumentDeleteCallback* collection,
+ StringData ns,
+ RecordStoreV1MetaData* details,
+ ExtentManager* em,
+ bool isSystemIndexes)
+ : RecordStoreV1Base(ns, details, em, isSystemIndexes), _deleteCallback(collection) {
+ DiskLoc extentLoc = details->firstExtent(txn);
+ while (!extentLoc.isNull()) {
+ _extentAdvice.push_back(_extentManager->cacheHint(extentLoc, ExtentManager::Sequential));
+ Extent* extent = em->getExtent(extentLoc);
+ extentLoc = extent->xnext;
}
- CappedRecordStoreV1::~CappedRecordStoreV1() {
- }
+ // this is for VERY VERY old versions of capped collections
+ cappedCheckMigrate(txn);
+}
- StatusWith<DiskLoc> CappedRecordStoreV1::allocRecord( OperationContext* txn,
- int lenToAlloc,
- bool enforceQuota ) {
- {
- // align very slightly.
- lenToAlloc = (lenToAlloc + 3) & 0xfffffffc;
- }
+CappedRecordStoreV1::~CappedRecordStoreV1() {}
- if ( lenToAlloc > theCapExtent()->length ) {
- // the extent check is a way to try and improve performance
- // since we have to iterate all the extents (for now) to get
- // storage size
- if ( lenToAlloc > storageSize(txn) ) {
- return StatusWith<DiskLoc>( ErrorCodes::DocTooLargeForCapped,
- mongoutils::str::stream()
- << "document is larger than capped size "
- << lenToAlloc << " > " << storageSize(txn),
- 16328 );
- }
+StatusWith<DiskLoc> CappedRecordStoreV1::allocRecord(OperationContext* txn,
+ int lenToAlloc,
+ bool enforceQuota) {
+ {
+ // align very slightly.
+ lenToAlloc = (lenToAlloc + 3) & 0xfffffffc;
+ }
+ if (lenToAlloc > theCapExtent()->length) {
+ // the extent check is a way to try and improve performance
+ // since we have to iterate all the extents (for now) to get
+ // storage size
+ if (lenToAlloc > storageSize(txn)) {
+ return StatusWith<DiskLoc>(ErrorCodes::DocTooLargeForCapped,
+ mongoutils::str::stream()
+ << "document is larger than capped size " << lenToAlloc
+ << " > " << storageSize(txn),
+ 16328);
}
- DiskLoc loc;
- { // do allocation
-
- // signal done allocating new extents.
- if ( !cappedLastDelRecLastExtent().isValid() )
- setLastDelRecLastExtent( txn, DiskLoc() );
+ }
+ DiskLoc loc;
+ { // do allocation
- invariant( lenToAlloc < 400000000 );
- int passes = 0;
+ // signal done allocating new extents.
+ if (!cappedLastDelRecLastExtent().isValid())
+ setLastDelRecLastExtent(txn, DiskLoc());
- // delete records until we have room and the max # objects limit achieved.
+ invariant(lenToAlloc < 400000000);
+ int passes = 0;
- /* this fails on a rename -- that is ok but must keep commented out */
- //invariant( theCapExtent()->ns == ns );
+ // delete records until we have room and the max # objects limit achieved.
- theCapExtent()->assertOk();
- DiskLoc firstEmptyExtent; // This prevents us from infinite looping.
- while ( 1 ) {
- if ( _details->numRecords() < _details->maxCappedDocs() ) {
- loc = __capAlloc( txn, lenToAlloc );
- if ( !loc.isNull() )
- break;
- }
+ /* this fails on a rename -- that is ok but must keep commented out */
+ // invariant( theCapExtent()->ns == ns );
- // If on first iteration through extents, don't delete anything.
- if ( !_details->capFirstNewRecord().isValid() ) {
- advanceCapExtent( txn, _ns );
+ theCapExtent()->assertOk();
+ DiskLoc firstEmptyExtent; // This prevents us from infinite looping.
+ while (1) {
+ if (_details->numRecords() < _details->maxCappedDocs()) {
+ loc = __capAlloc(txn, lenToAlloc);
+ if (!loc.isNull())
+ break;
+ }
- if ( _details->capExtent() != _details->firstExtent(txn) )
- _details->setCapFirstNewRecord( txn, DiskLoc().setInvalid() );
- // else signal done with first iteration through extents.
- continue;
- }
+ // If on first iteration through extents, don't delete anything.
+ if (!_details->capFirstNewRecord().isValid()) {
+ advanceCapExtent(txn, _ns);
- if ( !_details->capFirstNewRecord().isNull() &&
- theCapExtent()->firstRecord == _details->capFirstNewRecord() ) {
- // We've deleted all records that were allocated on the previous
- // iteration through this extent.
- advanceCapExtent( txn, _ns );
- continue;
- }
+ if (_details->capExtent() != _details->firstExtent(txn))
+ _details->setCapFirstNewRecord(txn, DiskLoc().setInvalid());
+ // else signal done with first iteration through extents.
+ continue;
+ }
- if ( theCapExtent()->firstRecord.isNull() ) {
- if ( firstEmptyExtent.isNull() )
- firstEmptyExtent = _details->capExtent();
- advanceCapExtent( txn, _ns );
- if ( firstEmptyExtent == _details->capExtent() ) {
- // All records have been deleted but there is still no room for this record.
- // Nothing we can do but fail.
- _maybeComplain( txn, lenToAlloc );
- return StatusWith<DiskLoc>(
- ErrorCodes::DocTooLargeForCapped,
- str::stream() << "document doesn't fit in capped collection."
- << " size: " << lenToAlloc
- << " storageSize:" << storageSize(txn),
- 28575);
- }
- continue;
- }
+ if (!_details->capFirstNewRecord().isNull() &&
+ theCapExtent()->firstRecord == _details->capFirstNewRecord()) {
+ // We've deleted all records that were allocated on the previous
+ // iteration through this extent.
+ advanceCapExtent(txn, _ns);
+ continue;
+ }
- const RecordId fr = theCapExtent()->firstRecord.toRecordId();
- Status status = _deleteCallback->aboutToDeleteCapped( txn, fr, dataFor(txn, fr) );
- if ( !status.isOK() )
- return StatusWith<DiskLoc>( status );
- deleteRecord( txn, fr );
-
- _compact(txn);
- if ((++passes % 5000) == 0) {
- StringBuilder sb;
- log() << "passes = " << passes << " in CappedRecordStoreV1::allocRecord:"
- << " ns: " << _ns
- << ", lenToAlloc: " << lenToAlloc
- << ", maxCappedDocs: " << _details->maxCappedDocs()
- << ", nrecords: " << _details->numRecords()
- << ", datasize: " << _details->dataSize()
- << ". Continuing to delete old records to make room.";
+ if (theCapExtent()->firstRecord.isNull()) {
+ if (firstEmptyExtent.isNull())
+ firstEmptyExtent = _details->capExtent();
+ advanceCapExtent(txn, _ns);
+ if (firstEmptyExtent == _details->capExtent()) {
+ // All records have been deleted but there is still no room for this record.
+ // Nothing we can do but fail.
+ _maybeComplain(txn, lenToAlloc);
+ return StatusWith<DiskLoc>(ErrorCodes::DocTooLargeForCapped,
+ str::stream()
+ << "document doesn't fit in capped collection."
+ << " size: " << lenToAlloc
+ << " storageSize:" << storageSize(txn),
+ 28575);
}
+ continue;
}
- // Remember first record allocated on this iteration through capExtent.
- if ( _details->capFirstNewRecord().isValid() && _details->capFirstNewRecord().isNull() )
- _details->setCapFirstNewRecord( txn, loc );
+ const RecordId fr = theCapExtent()->firstRecord.toRecordId();
+ Status status = _deleteCallback->aboutToDeleteCapped(txn, fr, dataFor(txn, fr));
+ if (!status.isOK())
+ return StatusWith<DiskLoc>(status);
+ deleteRecord(txn, fr);
+
+ _compact(txn);
+ if ((++passes % 5000) == 0) {
+ StringBuilder sb;
+ log() << "passes = " << passes << " in CappedRecordStoreV1::allocRecord:"
+ << " ns: " << _ns << ", lenToAlloc: " << lenToAlloc
+ << ", maxCappedDocs: " << _details->maxCappedDocs()
+ << ", nrecords: " << _details->numRecords()
+ << ", datasize: " << _details->dataSize()
+ << ". Continuing to delete old records to make room.";
+ }
}
- invariant( !loc.isNull() );
+ // Remember first record allocated on this iteration through capExtent.
+ if (_details->capFirstNewRecord().isValid() && _details->capFirstNewRecord().isNull())
+ _details->setCapFirstNewRecord(txn, loc);
+ }
- // possibly slice up if we've allocated too much space
+ invariant(!loc.isNull());
- DeletedRecord *r = drec( loc );
+ // possibly slice up if we've allocated too much space
- /* note we want to grab from the front so our next pointers on disk tend
- to go in a forward direction which is important for performance. */
- int regionlen = r->lengthWithHeaders();
- invariant( r->extentOfs() < loc.getOfs() );
+ DeletedRecord* r = drec(loc);
- int left = regionlen - lenToAlloc;
+ /* note we want to grab from the front so our next pointers on disk tend
+ to go in a forward direction which is important for performance. */
+ int regionlen = r->lengthWithHeaders();
+ invariant(r->extentOfs() < loc.getOfs());
- /* split off some for further use. */
- txn->recoveryUnit()->writingInt(r->lengthWithHeaders()) = lenToAlloc;
- DiskLoc newDelLoc = loc;
- newDelLoc.inc(lenToAlloc);
- DeletedRecord* newDel = drec( newDelLoc );
- DeletedRecord* newDelW = txn->recoveryUnit()->writing(newDel);
- newDelW->extentOfs() = r->extentOfs();
- newDelW->lengthWithHeaders() = left;
- newDelW->nextDeleted().Null();
+ int left = regionlen - lenToAlloc;
- addDeletedRec(txn, newDelLoc);
+ /* split off some for further use. */
+ txn->recoveryUnit()->writingInt(r->lengthWithHeaders()) = lenToAlloc;
+ DiskLoc newDelLoc = loc;
+ newDelLoc.inc(lenToAlloc);
+ DeletedRecord* newDel = drec(newDelLoc);
+ DeletedRecord* newDelW = txn->recoveryUnit()->writing(newDel);
+ newDelW->extentOfs() = r->extentOfs();
+ newDelW->lengthWithHeaders() = left;
+ newDelW->nextDeleted().Null();
- return StatusWith<DiskLoc>( loc );
- }
+ addDeletedRec(txn, newDelLoc);
- Status CappedRecordStoreV1::truncate(OperationContext* txn) {
- setLastDelRecLastExtent( txn, DiskLoc() );
- setListOfAllDeletedRecords( txn, DiskLoc() );
-
- // preserve firstExtent/lastExtent
- _details->setCapExtent( txn, _details->firstExtent(txn) );
- _details->setStats( txn, 0, 0 );
- // preserve lastExtentSize
- // nIndexes preserve 0
- // capped preserve true
- // max preserve
- // paddingFactor is unused
- _details->setCapFirstNewRecord( txn, DiskLoc().setInvalid() );
- setLastDelRecLastExtent( txn, DiskLoc().setInvalid() );
- // dataFileVersion preserve
- // indexFileVersion preserve
-
- // Reset all existing extents and recreate the deleted list.
- Extent* ext;
- for( DiskLoc extLoc = _details->firstExtent(txn);
- !extLoc.isNull();
- extLoc = ext->xnext ) {
- ext = _extentManager->getExtent(extLoc);
-
- txn->recoveryUnit()->writing( &ext->firstRecord )->Null();
- txn->recoveryUnit()->writing( &ext->lastRecord )->Null();
-
- addDeletedRec( txn, _findFirstSpot( txn, extLoc, ext ) );
- }
+ return StatusWith<DiskLoc>(loc);
+}
- return Status::OK();
+Status CappedRecordStoreV1::truncate(OperationContext* txn) {
+ setLastDelRecLastExtent(txn, DiskLoc());
+ setListOfAllDeletedRecords(txn, DiskLoc());
+
+ // preserve firstExtent/lastExtent
+ _details->setCapExtent(txn, _details->firstExtent(txn));
+ _details->setStats(txn, 0, 0);
+ // preserve lastExtentSize
+ // nIndexes preserve 0
+ // capped preserve true
+ // max preserve
+ // paddingFactor is unused
+ _details->setCapFirstNewRecord(txn, DiskLoc().setInvalid());
+ setLastDelRecLastExtent(txn, DiskLoc().setInvalid());
+ // dataFileVersion preserve
+ // indexFileVersion preserve
+
+ // Reset all existing extents and recreate the deleted list.
+ Extent* ext;
+ for (DiskLoc extLoc = _details->firstExtent(txn); !extLoc.isNull(); extLoc = ext->xnext) {
+ ext = _extentManager->getExtent(extLoc);
+
+ txn->recoveryUnit()->writing(&ext->firstRecord)->Null();
+ txn->recoveryUnit()->writing(&ext->lastRecord)->Null();
+
+ addDeletedRec(txn, _findFirstSpot(txn, extLoc, ext));
}
- void CappedRecordStoreV1::temp_cappedTruncateAfter( OperationContext* txn,
- RecordId end,
- bool inclusive ) {
- cappedTruncateAfter( txn, _ns.c_str(), DiskLoc::fromRecordId(end), inclusive );
- }
+ return Status::OK();
+}
- /* combine adjacent deleted records *for the current extent* of the capped collection
+void CappedRecordStoreV1::temp_cappedTruncateAfter(OperationContext* txn,
+ RecordId end,
+ bool inclusive) {
+ cappedTruncateAfter(txn, _ns.c_str(), DiskLoc::fromRecordId(end), inclusive);
+}
- this is O(n^2) but we call it for capped tables where typically n==1 or 2!
- (or 3...there will be a little unused sliver at the end of the extent.)
- */
- void CappedRecordStoreV1::_compact(OperationContext* txn) {
- DDD( "CappedRecordStoreV1::compact enter" );
+/* combine adjacent deleted records *for the current extent* of the capped collection
- vector<DiskLoc> drecs;
+ this is O(n^2) but we call it for capped tables where typically n==1 or 2!
+ (or 3...there will be a little unused sliver at the end of the extent.)
+*/
+void CappedRecordStoreV1::_compact(OperationContext* txn) {
+ DDD("CappedRecordStoreV1::compact enter");
- // Pull out capExtent's DRs from deletedList
- DiskLoc i = cappedFirstDeletedInCurExtent();
- for (; !i.isNull() && inCapExtent( i ); i = deletedRecordFor( i )->nextDeleted() ) {
- DDD( "\t" << i );
- drecs.push_back( i );
- }
+ vector<DiskLoc> drecs;
+
+ // Pull out capExtent's DRs from deletedList
+ DiskLoc i = cappedFirstDeletedInCurExtent();
+ for (; !i.isNull() && inCapExtent(i); i = deletedRecordFor(i)->nextDeleted()) {
+ DDD("\t" << i);
+ drecs.push_back(i);
+ }
- setFirstDeletedInCurExtent( txn, i );
+ setFirstDeletedInCurExtent(txn, i);
- std::sort( drecs.begin(), drecs.end() );
- DDD( "\t drecs.size(): " << drecs.size() );
+ std::sort(drecs.begin(), drecs.end());
+ DDD("\t drecs.size(): " << drecs.size());
- vector<DiskLoc>::const_iterator j = drecs.begin();
- invariant( j != drecs.end() );
- DiskLoc a = *j;
- while ( 1 ) {
+ vector<DiskLoc>::const_iterator j = drecs.begin();
+ invariant(j != drecs.end());
+ DiskLoc a = *j;
+ while (1) {
+ j++;
+ if (j == drecs.end()) {
+ DDD("\t compact adddelrec");
+ addDeletedRec(txn, a);
+ break;
+ }
+ DiskLoc b = *j;
+ while (a.a() == b.a() && a.getOfs() + drec(a)->lengthWithHeaders() == b.getOfs()) {
+ // a & b are adjacent. merge.
+ txn->recoveryUnit()->writingInt(drec(a)->lengthWithHeaders()) +=
+ drec(b)->lengthWithHeaders();
j++;
- if ( j == drecs.end() ) {
- DDD( "\t compact adddelrec" );
+ if (j == drecs.end()) {
+ DDD("\t compact adddelrec2");
addDeletedRec(txn, a);
- break;
+ return;
}
- DiskLoc b = *j;
- while ( a.a() == b.a() &&
- a.getOfs() + drec( a )->lengthWithHeaders() == b.getOfs() ) {
-
- // a & b are adjacent. merge.
- txn->recoveryUnit()->writingInt( drec(a)->lengthWithHeaders() ) += drec(b)->lengthWithHeaders();
- j++;
- if ( j == drecs.end() ) {
- DDD( "\t compact adddelrec2" );
- addDeletedRec(txn, a);
- return;
- }
- b = *j;
- }
- DDD( "\t compact adddelrec3" );
- addDeletedRec(txn, a);
- a = b;
+ b = *j;
}
-
- }
-
- DiskLoc CappedRecordStoreV1::cappedFirstDeletedInCurExtent() const {
- if ( cappedLastDelRecLastExtent().isNull() )
- return cappedListOfAllDeletedRecords();
- else
- return drec(cappedLastDelRecLastExtent())->nextDeleted();
+ DDD("\t compact adddelrec3");
+ addDeletedRec(txn, a);
+ a = b;
}
+}
- void CappedRecordStoreV1::setFirstDeletedInCurExtent( OperationContext* txn,
- const DiskLoc& loc ) {
- if ( cappedLastDelRecLastExtent().isNull() )
- setListOfAllDeletedRecords( txn, loc );
- else
- *txn->recoveryUnit()->writing( &drec(cappedLastDelRecLastExtent())->nextDeleted() ) = loc;
- }
+DiskLoc CappedRecordStoreV1::cappedFirstDeletedInCurExtent() const {
+ if (cappedLastDelRecLastExtent().isNull())
+ return cappedListOfAllDeletedRecords();
+ else
+ return drec(cappedLastDelRecLastExtent())->nextDeleted();
+}
- void CappedRecordStoreV1::cappedCheckMigrate(OperationContext* txn) {
- // migrate old RecordStoreV1MetaData format
- if ( _details->capExtent().a() == 0 && _details->capExtent().getOfs() == 0 ) {
- WriteUnitOfWork wunit(txn);
- _details->setCapFirstNewRecord( txn, DiskLoc().setInvalid() );
- // put all the DeletedRecords in cappedListOfAllDeletedRecords()
- for ( int i = 1; i < Buckets; ++i ) {
- DiskLoc first = _details->deletedListEntry( i );
- if ( first.isNull() )
- continue;
- DiskLoc last = first;
- for (; !drec(last)->nextDeleted().isNull(); last = drec(last)->nextDeleted() );
- *txn->recoveryUnit()->writing(&drec(last)->nextDeleted()) = cappedListOfAllDeletedRecords();
- setListOfAllDeletedRecords( txn, first );
- _details->setDeletedListEntry(txn, i, DiskLoc());
- }
- // NOTE cappedLastDelRecLastExtent() set to DiskLoc() in above
+void CappedRecordStoreV1::setFirstDeletedInCurExtent(OperationContext* txn, const DiskLoc& loc) {
+ if (cappedLastDelRecLastExtent().isNull())
+ setListOfAllDeletedRecords(txn, loc);
+ else
+ *txn->recoveryUnit()->writing(&drec(cappedLastDelRecLastExtent())->nextDeleted()) = loc;
+}
- // Last, in case we're killed before getting here
- _details->setCapExtent( txn, _details->firstExtent(txn) );
- wunit.commit();
+void CappedRecordStoreV1::cappedCheckMigrate(OperationContext* txn) {
+ // migrate old RecordStoreV1MetaData format
+ if (_details->capExtent().a() == 0 && _details->capExtent().getOfs() == 0) {
+ WriteUnitOfWork wunit(txn);
+ _details->setCapFirstNewRecord(txn, DiskLoc().setInvalid());
+ // put all the DeletedRecords in cappedListOfAllDeletedRecords()
+ for (int i = 1; i < Buckets; ++i) {
+ DiskLoc first = _details->deletedListEntry(i);
+ if (first.isNull())
+ continue;
+ DiskLoc last = first;
+ for (; !drec(last)->nextDeleted().isNull(); last = drec(last)->nextDeleted())
+ ;
+ *txn->recoveryUnit()->writing(&drec(last)->nextDeleted()) =
+ cappedListOfAllDeletedRecords();
+ setListOfAllDeletedRecords(txn, first);
+ _details->setDeletedListEntry(txn, i, DiskLoc());
}
+ // NOTE cappedLastDelRecLastExtent() set to DiskLoc() in above
+
+ // Last, in case we're killed before getting here
+ _details->setCapExtent(txn, _details->firstExtent(txn));
+ wunit.commit();
}
+}
- bool CappedRecordStoreV1::inCapExtent( const DiskLoc &dl ) const {
- invariant( !dl.isNull() );
+bool CappedRecordStoreV1::inCapExtent(const DiskLoc& dl) const {
+ invariant(!dl.isNull());
- if ( dl.a() != _details->capExtent().a() )
- return false;
+ if (dl.a() != _details->capExtent().a())
+ return false;
- if ( dl.getOfs() < _details->capExtent().getOfs() )
- return false;
+ if (dl.getOfs() < _details->capExtent().getOfs())
+ return false;
- const Extent* e = theCapExtent();
- int end = _details->capExtent().getOfs() + e->length;
- return dl.getOfs() <= end;
- }
+ const Extent* e = theCapExtent();
+ int end = _details->capExtent().getOfs() + e->length;
+ return dl.getOfs() <= end;
+}
- bool CappedRecordStoreV1::nextIsInCapExtent( const DiskLoc &dl ) const {
- invariant( !dl.isNull() );
- DiskLoc next = drec(dl)->nextDeleted();
- if ( next.isNull() )
- return false;
- return inCapExtent( next );
+bool CappedRecordStoreV1::nextIsInCapExtent(const DiskLoc& dl) const {
+ invariant(!dl.isNull());
+ DiskLoc next = drec(dl)->nextDeleted();
+ if (next.isNull())
+ return false;
+ return inCapExtent(next);
+}
+
+void CappedRecordStoreV1::advanceCapExtent(OperationContext* txn, StringData ns) {
+ // We want cappedLastDelRecLastExtent() to be the last DeletedRecord of the prev cap extent
+ // (or DiskLoc() if new capExtent == firstExtent)
+ if (_details->capExtent() == _details->lastExtent(txn))
+ setLastDelRecLastExtent(txn, DiskLoc());
+ else {
+ DiskLoc i = cappedFirstDeletedInCurExtent();
+ for (; !i.isNull() && nextIsInCapExtent(i); i = drec(i)->nextDeleted())
+ ;
+ setLastDelRecLastExtent(txn, i);
}
- void CappedRecordStoreV1::advanceCapExtent( OperationContext* txn, StringData ns ) {
- // We want cappedLastDelRecLastExtent() to be the last DeletedRecord of the prev cap extent
- // (or DiskLoc() if new capExtent == firstExtent)
- if ( _details->capExtent() == _details->lastExtent(txn) )
- setLastDelRecLastExtent( txn, DiskLoc() );
- else {
- DiskLoc i = cappedFirstDeletedInCurExtent();
- for (; !i.isNull() && nextIsInCapExtent( i ); i = drec(i)->nextDeleted() );
- setLastDelRecLastExtent( txn, i );
- }
+ _details->setCapExtent(
+ txn, theCapExtent()->xnext.isNull() ? _details->firstExtent(txn) : theCapExtent()->xnext);
- _details->setCapExtent( txn,
- theCapExtent()->xnext.isNull() ? _details->firstExtent(txn)
- : theCapExtent()->xnext );
+ /* this isn't true if a collection has been renamed...that is ok just used for diagnostics */
+ // dassert( theCapExtent()->ns == ns );
- /* this isn't true if a collection has been renamed...that is ok just used for diagnostics */
- //dassert( theCapExtent()->ns == ns );
+ theCapExtent()->assertOk();
+ _details->setCapFirstNewRecord(txn, DiskLoc());
+}
- theCapExtent()->assertOk();
- _details->setCapFirstNewRecord( txn, DiskLoc() );
+DiskLoc CappedRecordStoreV1::__capAlloc(OperationContext* txn, int len) {
+ DiskLoc prev = cappedLastDelRecLastExtent();
+ DiskLoc i = cappedFirstDeletedInCurExtent();
+ DiskLoc ret;
+ for (; !i.isNull() && inCapExtent(i); prev = i, i = drec(i)->nextDeleted()) {
+ // We need to keep at least one DR per extent in cappedListOfAllDeletedRecords(),
+ // so make sure there's space to create a DR at the end.
+ if (drec(i)->lengthWithHeaders() >= len + 24) {
+ ret = i;
+ break;
+ }
}
- DiskLoc CappedRecordStoreV1::__capAlloc( OperationContext* txn, int len ) {
- DiskLoc prev = cappedLastDelRecLastExtent();
- DiskLoc i = cappedFirstDeletedInCurExtent();
- DiskLoc ret;
- for (; !i.isNull() && inCapExtent( i ); prev = i, i = drec(i)->nextDeleted() ) {
- // We need to keep at least one DR per extent in cappedListOfAllDeletedRecords(),
- // so make sure there's space to create a DR at the end.
- if ( drec(i)->lengthWithHeaders() >= len + 24 ) {
- ret = i;
- break;
- }
- }
+ /* unlink ourself from the deleted list */
+ if (!ret.isNull()) {
+ if (prev.isNull())
+ setListOfAllDeletedRecords(txn, drec(ret)->nextDeleted());
+ else
+ *txn->recoveryUnit()->writing(&drec(prev)->nextDeleted()) = drec(ret)->nextDeleted();
+ *txn->recoveryUnit()->writing(&drec(ret)->nextDeleted()) =
+ DiskLoc().setInvalid(); // defensive.
+ invariant(drec(ret)->extentOfs() < ret.getOfs());
+ }
- /* unlink ourself from the deleted list */
- if ( !ret.isNull() ) {
- if ( prev.isNull() )
- setListOfAllDeletedRecords( txn, drec(ret)->nextDeleted() );
- else
- *txn->recoveryUnit()->writing(&drec(prev)->nextDeleted()) = drec(ret)->nextDeleted();
- *txn->recoveryUnit()->writing(&drec(ret)->nextDeleted()) = DiskLoc().setInvalid(); // defensive.
- invariant( drec(ret)->extentOfs() < ret.getOfs() );
- }
+ return ret;
+}
- return ret;
+void CappedRecordStoreV1::cappedTruncateLastDelUpdate(OperationContext* txn) {
+ if (_details->capExtent() == _details->firstExtent(txn)) {
+ // Only one extent of the collection is in use, so there
+ // is no deleted record in a previous extent, so nullify
+ // cappedLastDelRecLastExtent().
+ setLastDelRecLastExtent(txn, DiskLoc());
+ } else {
+ // Scan through all deleted records in the collection
+ // until the last deleted record for the extent prior
+ // to the new capExtent is found. Then set
+ // cappedLastDelRecLastExtent() to that deleted record.
+ DiskLoc i = cappedListOfAllDeletedRecords();
+ for (; !drec(i)->nextDeleted().isNull() && !inCapExtent(drec(i)->nextDeleted());
+ i = drec(i)->nextDeleted())
+ ;
+ // In our capped storage model, every extent must have at least one
+ // deleted record. Here we check that 'i' is not the last deleted
+ // record. (We expect that there will be deleted records in the new
+ // capExtent as well.)
+ invariant(!drec(i)->nextDeleted().isNull());
+ setLastDelRecLastExtent(txn, i);
}
+}
- void CappedRecordStoreV1::cappedTruncateLastDelUpdate(OperationContext* txn) {
- if ( _details->capExtent() == _details->firstExtent(txn) ) {
- // Only one extent of the collection is in use, so there
- // is no deleted record in a previous extent, so nullify
- // cappedLastDelRecLastExtent().
- setLastDelRecLastExtent( txn, DiskLoc() );
+void CappedRecordStoreV1::cappedTruncateAfter(OperationContext* txn,
+ const char* ns,
+ DiskLoc end,
+ bool inclusive) {
+ invariant(cappedLastDelRecLastExtent().isValid());
+
+ // We iteratively remove the newest document until the newest document
+ // is 'end', then we remove 'end' if requested.
+ bool foundLast = false;
+ while (1) {
+ if (foundLast) {
+ // 'end' has been found and removed, so break.
+ break;
}
- else {
- // Scan through all deleted records in the collection
- // until the last deleted record for the extent prior
- // to the new capExtent is found. Then set
- // cappedLastDelRecLastExtent() to that deleted record.
- DiskLoc i = cappedListOfAllDeletedRecords();
- for( ;
- !drec(i)->nextDeleted().isNull() &&
- !inCapExtent( drec(i)->nextDeleted() );
- i = drec(i)->nextDeleted() );
- // In our capped storage model, every extent must have at least one
- // deleted record. Here we check that 'i' is not the last deleted
- // record. (We expect that there will be deleted records in the new
- // capExtent as well.)
- invariant( !drec(i)->nextDeleted().isNull() );
- setLastDelRecLastExtent( txn, i );
- }
- }
-
- void CappedRecordStoreV1::cappedTruncateAfter(OperationContext* txn,
- const char* ns,
- DiskLoc end,
- bool inclusive) {
- invariant( cappedLastDelRecLastExtent().isValid() );
-
- // We iteratively remove the newest document until the newest document
- // is 'end', then we remove 'end' if requested.
- bool foundLast = false;
- while( 1 ) {
- if ( foundLast ) {
- // 'end' has been found and removed, so break.
+ // 'curr' will point to the newest document in the collection.
+ const DiskLoc curr = theCapExtent()->lastRecord;
+ const RecordId currId = curr.toRecordId();
+ invariant(!curr.isNull());
+ if (curr == end) {
+ if (inclusive) {
+ // 'end' has been found, so break next iteration.
+ foundLast = true;
+ } else {
+ // 'end' has been found, so break.
break;
}
- // 'curr' will point to the newest document in the collection.
- const DiskLoc curr = theCapExtent()->lastRecord;
- const RecordId currId = curr.toRecordId();
- invariant( !curr.isNull() );
- if ( curr == end ) {
- if ( inclusive ) {
- // 'end' has been found, so break next iteration.
- foundLast = true;
- }
- else {
- // 'end' has been found, so break.
- break;
- }
- }
-
- // TODO The algorithm used in this function cannot generate an
- // empty collection, but we could call emptyCappedCollection() in
- // this case instead of asserting.
- uassert( 13415, "emptying the collection is not allowed", _details->numRecords() > 1 );
-
- WriteUnitOfWork wunit(txn);
- // Delete the newest record, and coalesce the new deleted
- // record with existing deleted records.
- Status status = _deleteCallback->aboutToDeleteCapped(txn, currId, dataFor(txn, currId));
- uassertStatusOK( status );
- deleteRecord( txn, currId );
- _compact(txn);
-
- // This is the case where we have not yet had to remove any
- // documents to make room for other documents, and we are allocating
- // documents from free space in fresh extents instead of reusing
- // space from familiar extents.
- if ( !_details->capLooped() ) {
-
- // We just removed the last record from the 'capExtent', and
- // the 'capExtent' can't be empty, so we set 'capExtent' to
- // capExtent's prev extent.
- if ( theCapExtent()->lastRecord.isNull() ) {
- invariant( !theCapExtent()->xprev.isNull() );
- // NOTE Because we didn't delete the last document, and
- // capLooped() is false, capExtent is not the first extent
- // so xprev will be nonnull.
- _details->setCapExtent( txn, theCapExtent()->xprev );
- theCapExtent()->assertOk();
-
- // update cappedLastDelRecLastExtent()
- cappedTruncateLastDelUpdate(txn);
- }
- wunit.commit();
- continue;
- }
-
- // This is the case where capLooped() is true, and we just deleted
- // from capExtent, and we just deleted capFirstNewRecord, which was
- // the last record on the fresh side of capExtent.
- // NOTE In this comparison, curr and potentially capFirstNewRecord
- // may point to invalid data, but we can still compare the
- // references themselves.
- if ( curr == _details->capFirstNewRecord() ) {
-
- // Set 'capExtent' to the first nonempty extent prior to the
- // initial capExtent. There must be such an extent because we
- // have not deleted the last document in the collection. It is
- // possible that all extents other than the capExtent are empty.
- // In this case we will keep the initial capExtent and specify
- // that all records contained within are on the fresh rather than
- // stale side of the extent.
- DiskLoc newCapExtent = _details->capExtent();
- do {
- // Find the previous extent, looping if necessary.
- newCapExtent = ( newCapExtent == _details->firstExtent(txn) ) ?
- _details->lastExtent(txn) :
- _extentManager->getExtent(newCapExtent)->xprev;
- _extentManager->getExtent(newCapExtent)->assertOk();
- }
- while ( _extentManager->getExtent(newCapExtent)->firstRecord.isNull() );
- _details->setCapExtent( txn, newCapExtent );
+ }
- // Place all documents in the new capExtent on the fresh side
- // of the capExtent by setting capFirstNewRecord to the first
- // document in the new capExtent.
- _details->setCapFirstNewRecord( txn, theCapExtent()->firstRecord );
+ // TODO The algorithm used in this function cannot generate an
+ // empty collection, but we could call emptyCappedCollection() in
+ // this case instead of asserting.
+ uassert(13415, "emptying the collection is not allowed", _details->numRecords() > 1);
+
+ WriteUnitOfWork wunit(txn);
+ // Delete the newest record, and coalesce the new deleted
+ // record with existing deleted records.
+ Status status = _deleteCallback->aboutToDeleteCapped(txn, currId, dataFor(txn, currId));
+ uassertStatusOK(status);
+ deleteRecord(txn, currId);
+ _compact(txn);
+
+ // This is the case where we have not yet had to remove any
+ // documents to make room for other documents, and we are allocating
+ // documents from free space in fresh extents instead of reusing
+ // space from familiar extents.
+ if (!_details->capLooped()) {
+ // We just removed the last record from the 'capExtent', and
+ // the 'capExtent' can't be empty, so we set 'capExtent' to
+ // capExtent's prev extent.
+ if (theCapExtent()->lastRecord.isNull()) {
+ invariant(!theCapExtent()->xprev.isNull());
+ // NOTE Because we didn't delete the last document, and
+ // capLooped() is false, capExtent is not the first extent
+ // so xprev will be nonnull.
+ _details->setCapExtent(txn, theCapExtent()->xprev);
+ theCapExtent()->assertOk();
// update cappedLastDelRecLastExtent()
cappedTruncateLastDelUpdate(txn);
}
-
wunit.commit();
+ continue;
}
- }
- DiskLoc CappedRecordStoreV1::cappedListOfAllDeletedRecords() const {
- return _details->deletedListEntry(0);
- }
+ // This is the case where capLooped() is true, and we just deleted
+ // from capExtent, and we just deleted capFirstNewRecord, which was
+ // the last record on the fresh side of capExtent.
+ // NOTE In this comparison, curr and potentially capFirstNewRecord
+ // may point to invalid data, but we can still compare the
+ // references themselves.
+ if (curr == _details->capFirstNewRecord()) {
+ // Set 'capExtent' to the first nonempty extent prior to the
+ // initial capExtent. There must be such an extent because we
+ // have not deleted the last document in the collection. It is
+ // possible that all extents other than the capExtent are empty.
+ // In this case we will keep the initial capExtent and specify
+ // that all records contained within are on the fresh rather than
+ // stale side of the extent.
+ DiskLoc newCapExtent = _details->capExtent();
+ do {
+ // Find the previous extent, looping if necessary.
+ newCapExtent = (newCapExtent == _details->firstExtent(txn))
+ ? _details->lastExtent(txn)
+ : _extentManager->getExtent(newCapExtent)->xprev;
+ _extentManager->getExtent(newCapExtent)->assertOk();
+ } while (_extentManager->getExtent(newCapExtent)->firstRecord.isNull());
+ _details->setCapExtent(txn, newCapExtent);
+
+ // Place all documents in the new capExtent on the fresh side
+ // of the capExtent by setting capFirstNewRecord to the first
+ // document in the new capExtent.
+ _details->setCapFirstNewRecord(txn, theCapExtent()->firstRecord);
+
+ // update cappedLastDelRecLastExtent()
+ cappedTruncateLastDelUpdate(txn);
+ }
- void CappedRecordStoreV1::setListOfAllDeletedRecords( OperationContext* txn,
- const DiskLoc& loc ) {
- return _details->setDeletedListEntry(txn, 0, loc);
+ wunit.commit();
}
+}
- DiskLoc CappedRecordStoreV1::cappedLastDelRecLastExtent() const {
- return _details->deletedListEntry(1);
- }
+DiskLoc CappedRecordStoreV1::cappedListOfAllDeletedRecords() const {
+ return _details->deletedListEntry(0);
+}
- void CappedRecordStoreV1::setLastDelRecLastExtent( OperationContext* txn,
- const DiskLoc& loc ) {
- return _details->setDeletedListEntry(txn, 1, loc);
- }
+void CappedRecordStoreV1::setListOfAllDeletedRecords(OperationContext* txn, const DiskLoc& loc) {
+ return _details->setDeletedListEntry(txn, 0, loc);
+}
- Extent* CappedRecordStoreV1::theCapExtent() const {
- return _extentManager->getExtent(_details->capExtent());
- }
+DiskLoc CappedRecordStoreV1::cappedLastDelRecLastExtent() const {
+ return _details->deletedListEntry(1);
+}
- void CappedRecordStoreV1::addDeletedRec( OperationContext* txn, const DiskLoc& dloc ) {
- DeletedRecord* d = txn->recoveryUnit()->writing( drec( dloc ) );
-
- if ( !cappedLastDelRecLastExtent().isValid() ) {
- // Initial extent allocation. Insert at end.
- d->nextDeleted() = DiskLoc();
- if ( cappedListOfAllDeletedRecords().isNull() )
- setListOfAllDeletedRecords( txn, dloc );
- else {
- DiskLoc i = cappedListOfAllDeletedRecords();
- for (; !drec(i)->nextDeleted().isNull(); i = drec(i)->nextDeleted() )
- ;
- *txn->recoveryUnit()->writing(&drec(i)->nextDeleted()) = dloc;
- }
- }
+void CappedRecordStoreV1::setLastDelRecLastExtent(OperationContext* txn, const DiskLoc& loc) {
+ return _details->setDeletedListEntry(txn, 1, loc);
+}
+
+Extent* CappedRecordStoreV1::theCapExtent() const {
+ return _extentManager->getExtent(_details->capExtent());
+}
+
+void CappedRecordStoreV1::addDeletedRec(OperationContext* txn, const DiskLoc& dloc) {
+ DeletedRecord* d = txn->recoveryUnit()->writing(drec(dloc));
+
+ if (!cappedLastDelRecLastExtent().isValid()) {
+ // Initial extent allocation. Insert at end.
+ d->nextDeleted() = DiskLoc();
+ if (cappedListOfAllDeletedRecords().isNull())
+ setListOfAllDeletedRecords(txn, dloc);
else {
- d->nextDeleted() = cappedFirstDeletedInCurExtent();
- setFirstDeletedInCurExtent( txn, dloc );
- // always _compact() after this so order doesn't matter
+ DiskLoc i = cappedListOfAllDeletedRecords();
+ for (; !drec(i)->nextDeleted().isNull(); i = drec(i)->nextDeleted())
+ ;
+ *txn->recoveryUnit()->writing(&drec(i)->nextDeleted()) = dloc;
}
+ } else {
+ d->nextDeleted() = cappedFirstDeletedInCurExtent();
+ setFirstDeletedInCurExtent(txn, dloc);
+ // always _compact() after this so order doesn't matter
}
+}
- std::unique_ptr<RecordCursor> CappedRecordStoreV1::getCursor(OperationContext* txn,
- bool forward) const {
-
- return stdx::make_unique<CappedRecordStoreV1Iterator>(txn, this, forward);
- }
+std::unique_ptr<RecordCursor> CappedRecordStoreV1::getCursor(OperationContext* txn,
+ bool forward) const {
+ return stdx::make_unique<CappedRecordStoreV1Iterator>(txn, this, forward);
+}
- vector<std::unique_ptr<RecordCursor>> CappedRecordStoreV1::getManyCursors(
- OperationContext* txn) const {
- vector<std::unique_ptr<RecordCursor>> cursors;
+vector<std::unique_ptr<RecordCursor>> CappedRecordStoreV1::getManyCursors(
+ OperationContext* txn) const {
+ vector<std::unique_ptr<RecordCursor>> cursors;
- if (!_details->capLooped()) {
- // if we haven't looped yet, just spit out all extents (same as non-capped impl)
- const Extent* ext;
- for (DiskLoc extLoc = details()->firstExtent(txn); !extLoc.isNull(); extLoc = ext->xnext) {
- ext = _getExtent(txn, extLoc);
- if (ext->firstRecord.isNull())
- continue;
+ if (!_details->capLooped()) {
+ // if we haven't looped yet, just spit out all extents (same as non-capped impl)
+ const Extent* ext;
+ for (DiskLoc extLoc = details()->firstExtent(txn); !extLoc.isNull(); extLoc = ext->xnext) {
+ ext = _getExtent(txn, extLoc);
+ if (ext->firstRecord.isNull())
+ continue;
- cursors.push_back(stdx::make_unique<RecordStoreV1Base::IntraExtentIterator>(
- txn, ext->firstRecord, this));
- }
+ cursors.push_back(stdx::make_unique<RecordStoreV1Base::IntraExtentIterator>(
+ txn, ext->firstRecord, this));
}
- else {
- // if we've looped we need to iterate the extents, starting and ending with the
- // capExtent
- const DiskLoc capExtent = details()->capExtent();
- invariant(!capExtent.isNull());
- invariant(capExtent.isValid());
-
- // First do the "old" portion of capExtent if there is any
- DiskLoc extLoc = capExtent;
- {
- const Extent* ext = _getExtent(txn, extLoc);
- if (ext->firstRecord != details()->capFirstNewRecord()) {
- // this means there is old data in capExtent
- cursors.push_back(stdx::make_unique<RecordStoreV1Base::IntraExtentIterator>(
- txn, ext->firstRecord, this));
- }
-
- extLoc = ext->xnext.isNull() ? details()->firstExtent(txn) : ext->xnext;
- }
-
- // Next handle all the other extents
- while (extLoc != capExtent) {
- const Extent* ext = _getExtent(txn, extLoc);
+ } else {
+ // if we've looped we need to iterate the extents, starting and ending with the
+ // capExtent
+ const DiskLoc capExtent = details()->capExtent();
+ invariant(!capExtent.isNull());
+ invariant(capExtent.isValid());
+
+ // First do the "old" portion of capExtent if there is any
+ DiskLoc extLoc = capExtent;
+ {
+ const Extent* ext = _getExtent(txn, extLoc);
+ if (ext->firstRecord != details()->capFirstNewRecord()) {
+ // this means there is old data in capExtent
cursors.push_back(stdx::make_unique<RecordStoreV1Base::IntraExtentIterator>(
- txn, ext->firstRecord, this));
-
- extLoc = ext->xnext.isNull() ? details()->firstExtent(txn) : ext->xnext;
+ txn, ext->firstRecord, this));
}
- // Finally handle the "new" data in the capExtent
+ extLoc = ext->xnext.isNull() ? details()->firstExtent(txn) : ext->xnext;
+ }
+
+ // Next handle all the other extents
+ while (extLoc != capExtent) {
+ const Extent* ext = _getExtent(txn, extLoc);
cursors.push_back(stdx::make_unique<RecordStoreV1Base::IntraExtentIterator>(
- txn, details()->capFirstNewRecord(), this));
+ txn, ext->firstRecord, this));
+
+ extLoc = ext->xnext.isNull() ? details()->firstExtent(txn) : ext->xnext;
}
- return cursors;
+ // Finally handle the "new" data in the capExtent
+ cursors.push_back(stdx::make_unique<RecordStoreV1Base::IntraExtentIterator>(
+ txn, details()->capFirstNewRecord(), this));
}
- void CappedRecordStoreV1::_maybeComplain( OperationContext* txn, int len ) const {
- RARELY {
- std::stringstream buf;
- buf << "couldn't make room for record len: " << len << " in capped ns " << _ns << '\n';
- buf << "numRecords: " << numRecords(txn) << '\n';
- int i = 0;
- for ( DiskLoc e = _details->firstExtent(txn);
- !e.isNull();
- e = _extentManager->getExtent( e )->xnext, ++i ) {
- buf << " Extent " << i;
- if ( e == _details->capExtent() )
- buf << " (capExtent)";
- buf << ' ' << e;
- buf << '\n';
-
- buf << " magic: " << hex << _extentManager->getExtent( e )->magic << dec
- << " extent->ns: " << _extentManager->getExtent( e )->nsDiagnostic.toString()
- << '\n';
- buf << " fr: " << _extentManager->getExtent( e )->firstRecord.toString()
- << " lr: " << _extentManager->getExtent( e )->lastRecord.toString()
- << " extent->len: " << _extentManager->getExtent( e )->length << '\n';
- }
-
- warning() << buf.str();
+ return cursors;
+}
- // assume it is unusually large record; if not, something is broken
- fassert( 17438, len * 5 > _details->lastExtentSize(txn) );
+void CappedRecordStoreV1::_maybeComplain(OperationContext* txn, int len) const {
+ RARELY {
+ std::stringstream buf;
+ buf << "couldn't make room for record len: " << len << " in capped ns " << _ns << '\n';
+ buf << "numRecords: " << numRecords(txn) << '\n';
+ int i = 0;
+ for (DiskLoc e = _details->firstExtent(txn); !e.isNull();
+ e = _extentManager->getExtent(e)->xnext, ++i) {
+ buf << " Extent " << i;
+ if (e == _details->capExtent())
+ buf << " (capExtent)";
+ buf << ' ' << e;
+ buf << '\n';
+
+ buf << " magic: " << hex << _extentManager->getExtent(e)->magic << dec
+ << " extent->ns: " << _extentManager->getExtent(e)->nsDiagnostic.toString() << '\n';
+ buf << " fr: " << _extentManager->getExtent(e)->firstRecord.toString()
+ << " lr: " << _extentManager->getExtent(e)->lastRecord.toString()
+ << " extent->len: " << _extentManager->getExtent(e)->length << '\n';
}
- }
-
- DiskLoc CappedRecordStoreV1::firstRecord( OperationContext* txn,
- const DiskLoc &startExtent ) const {
- for (DiskLoc i = startExtent.isNull() ? _details->firstExtent(txn) : startExtent;
- !i.isNull();
- i = _extentManager->getExtent( i )->xnext ) {
- Extent* e = _extentManager->getExtent( i );
+ warning() << buf.str();
- if ( !e->firstRecord.isNull() )
- return e->firstRecord;
- }
- return DiskLoc();
+ // assume it is unusually large record; if not, something is broken
+ fassert(17438, len * 5 > _details->lastExtentSize(txn));
}
+}
- DiskLoc CappedRecordStoreV1::lastRecord( OperationContext* txn,
- const DiskLoc &startExtent ) const {
- for (DiskLoc i = startExtent.isNull() ? _details->lastExtent(txn) : startExtent;
- !i.isNull();
- i = _extentManager->getExtent( i )->xprev ) {
+DiskLoc CappedRecordStoreV1::firstRecord(OperationContext* txn, const DiskLoc& startExtent) const {
+ for (DiskLoc i = startExtent.isNull() ? _details->firstExtent(txn) : startExtent; !i.isNull();
+ i = _extentManager->getExtent(i)->xnext) {
+ Extent* e = _extentManager->getExtent(i);
- Extent* e = _extentManager->getExtent( i );
- if ( !e->lastRecord.isNull() )
- return e->lastRecord;
- }
- return DiskLoc();
+ if (!e->firstRecord.isNull())
+ return e->firstRecord;
}
+ return DiskLoc();
+}
+DiskLoc CappedRecordStoreV1::lastRecord(OperationContext* txn, const DiskLoc& startExtent) const {
+ for (DiskLoc i = startExtent.isNull() ? _details->lastExtent(txn) : startExtent; !i.isNull();
+ i = _extentManager->getExtent(i)->xprev) {
+ Extent* e = _extentManager->getExtent(i);
+ if (!e->lastRecord.isNull())
+ return e->lastRecord;
+ }
+ return DiskLoc();
+}
}
diff --git a/src/mongo/db/storage/mmap_v1/record_store_v1_capped.h b/src/mongo/db/storage/mmap_v1/record_store_v1_capped.h
index 186de786f37..83105fe8ff9 100644
--- a/src/mongo/db/storage/mmap_v1/record_store_v1_capped.h
+++ b/src/mongo/db/storage/mmap_v1/record_store_v1_capped.h
@@ -38,95 +38,92 @@
namespace mongo {
- class CappedRecordStoreV1 final : public RecordStoreV1Base {
- public:
- CappedRecordStoreV1( OperationContext* txn,
- CappedDocumentDeleteCallback* collection,
- StringData ns,
- RecordStoreV1MetaData* details,
- ExtentManager* em,
- bool isSystemIndexes );
-
- ~CappedRecordStoreV1() final;
-
- const char* name() const final { return "CappedRecordStoreV1"; }
-
- Status truncate(OperationContext* txn) final;
-
- /**
- * Truncate documents newer than the document at 'end' from the capped
- * collection. The collection cannot be completely emptied using this
- * function. An assertion will be thrown if that is attempted.
- * @param inclusive - Truncate 'end' as well iff true
- * XXX: this will go away soon, just needed to move for now
- */
- void temp_cappedTruncateAfter(OperationContext* txn, RecordId end, bool inclusive) final;
-
- std::unique_ptr<RecordCursor> getCursor(OperationContext* txn, bool forward) const final;
-
- std::vector<std::unique_ptr<RecordCursor>> getManyCursors(
- OperationContext* txn) const final;
-
- // Start from firstExtent by default.
- DiskLoc firstRecord( OperationContext* txn,
- const DiskLoc &startExtent = DiskLoc() ) const;
- // Start from lastExtent by default.
- DiskLoc lastRecord( OperationContext* txn,
- const DiskLoc &startExtent = DiskLoc() ) const;
-
- protected:
-
- bool isCapped() const final { return true; }
- bool shouldPadInserts() const final { return false; }
-
- void setCappedDeleteCallback( CappedDocumentDeleteCallback* cb ) final {
- _deleteCallback = cb;
- }
-
- StatusWith<DiskLoc> allocRecord( OperationContext* txn,
- int lengthWithHeaders,
- bool enforceQuota ) final;
-
- void addDeletedRec(OperationContext* txn, const DiskLoc& dloc) final;
-
- private:
- // -- start copy from cap.cpp --
- void _compact(OperationContext* txn);
- DiskLoc cappedFirstDeletedInCurExtent() const;
- void setFirstDeletedInCurExtent( OperationContext* txn, const DiskLoc& loc );
- void cappedCheckMigrate(OperationContext* txn);
- DiskLoc __capAlloc( OperationContext* txn, int len );
- bool inCapExtent( const DiskLoc &dl ) const;
- DiskLoc cappedListOfAllDeletedRecords() const;
- DiskLoc cappedLastDelRecLastExtent() const;
- void setListOfAllDeletedRecords( OperationContext* txn, const DiskLoc& loc );
- void setLastDelRecLastExtent( OperationContext* txn, const DiskLoc& loc );
- Extent *theCapExtent() const;
- bool nextIsInCapExtent( const DiskLoc &dl ) const;
- void advanceCapExtent( OperationContext* txn, StringData ns );
- void cappedTruncateLastDelUpdate(OperationContext* txn);
-
- /**
- * Truncate documents newer than the document at 'end' from the capped
- * collection. The collection cannot be completely emptied using this
- * function. An assertion will be thrown if that is attempted.
- * @param inclusive - Truncate 'end' as well iff true
- */
- void cappedTruncateAfter(OperationContext* txn,
- const char* ns,
- DiskLoc end,
- bool inclusive);
-
- void _maybeComplain( OperationContext* txn, int len ) const;
-
- // -- end copy from cap.cpp --
-
- CappedDocumentDeleteCallback* _deleteCallback;
-
- OwnedPointerVector<ExtentManager::CacheHint> _extentAdvice;
-
- friend class CappedRecordStoreV1Iterator;
- };
-
-
+class CappedRecordStoreV1 final : public RecordStoreV1Base {
+public:
+ CappedRecordStoreV1(OperationContext* txn,
+ CappedDocumentDeleteCallback* collection,
+ StringData ns,
+ RecordStoreV1MetaData* details,
+ ExtentManager* em,
+ bool isSystemIndexes);
+
+ ~CappedRecordStoreV1() final;
+
+ const char* name() const final {
+ return "CappedRecordStoreV1";
+ }
+
+ Status truncate(OperationContext* txn) final;
+
+ /**
+ * Truncate documents newer than the document at 'end' from the capped
+ * collection. The collection cannot be completely emptied using this
+ * function. An assertion will be thrown if that is attempted.
+ * @param inclusive - Truncate 'end' as well iff true
+ * XXX: this will go away soon, just needed to move for now
+ */
+ void temp_cappedTruncateAfter(OperationContext* txn, RecordId end, bool inclusive) final;
+
+ std::unique_ptr<RecordCursor> getCursor(OperationContext* txn, bool forward) const final;
+
+ std::vector<std::unique_ptr<RecordCursor>> getManyCursors(OperationContext* txn) const final;
+
+ // Start from firstExtent by default.
+ DiskLoc firstRecord(OperationContext* txn, const DiskLoc& startExtent = DiskLoc()) const;
+ // Start from lastExtent by default.
+ DiskLoc lastRecord(OperationContext* txn, const DiskLoc& startExtent = DiskLoc()) const;
+
+protected:
+ bool isCapped() const final {
+ return true;
+ }
+ bool shouldPadInserts() const final {
+ return false;
+ }
+
+ void setCappedDeleteCallback(CappedDocumentDeleteCallback* cb) final {
+ _deleteCallback = cb;
+ }
+
+ StatusWith<DiskLoc> allocRecord(OperationContext* txn,
+ int lengthWithHeaders,
+ bool enforceQuota) final;
+
+ void addDeletedRec(OperationContext* txn, const DiskLoc& dloc) final;
+
+private:
+ // -- start copy from cap.cpp --
+ void _compact(OperationContext* txn);
+ DiskLoc cappedFirstDeletedInCurExtent() const;
+ void setFirstDeletedInCurExtent(OperationContext* txn, const DiskLoc& loc);
+ void cappedCheckMigrate(OperationContext* txn);
+ DiskLoc __capAlloc(OperationContext* txn, int len);
+ bool inCapExtent(const DiskLoc& dl) const;
+ DiskLoc cappedListOfAllDeletedRecords() const;
+ DiskLoc cappedLastDelRecLastExtent() const;
+ void setListOfAllDeletedRecords(OperationContext* txn, const DiskLoc& loc);
+ void setLastDelRecLastExtent(OperationContext* txn, const DiskLoc& loc);
+ Extent* theCapExtent() const;
+ bool nextIsInCapExtent(const DiskLoc& dl) const;
+ void advanceCapExtent(OperationContext* txn, StringData ns);
+ void cappedTruncateLastDelUpdate(OperationContext* txn);
+
+ /**
+ * Truncate documents newer than the document at 'end' from the capped
+ * collection. The collection cannot be completely emptied using this
+ * function. An assertion will be thrown if that is attempted.
+ * @param inclusive - Truncate 'end' as well iff true
+ */
+ void cappedTruncateAfter(OperationContext* txn, const char* ns, DiskLoc end, bool inclusive);
+
+ void _maybeComplain(OperationContext* txn, int len) const;
+
+ // -- end copy from cap.cpp --
+
+ CappedDocumentDeleteCallback* _deleteCallback;
+
+ OwnedPointerVector<ExtentManager::CacheHint> _extentAdvice;
+
+ friend class CappedRecordStoreV1Iterator;
+};
}
diff --git a/src/mongo/db/storage/mmap_v1/record_store_v1_capped_iterator.cpp b/src/mongo/db/storage/mmap_v1/record_store_v1_capped_iterator.cpp
index ea77d224488..353a7f39c0c 100644
--- a/src/mongo/db/storage/mmap_v1/record_store_v1_capped_iterator.cpp
+++ b/src/mongo/db/storage/mmap_v1/record_store_v1_capped_iterator.cpp
@@ -36,181 +36,181 @@
namespace mongo {
- //
- // Capped collection traversal
- //
- CappedRecordStoreV1Iterator::CappedRecordStoreV1Iterator(OperationContext* txn,
- const CappedRecordStoreV1* collection,
- bool forward)
- : _txn(txn), _recordStore(collection), _forward(forward) {
-
- const RecordStoreV1MetaData* nsd = _recordStore->details();
-
- // If a start position isn't specified, we fill one out from the start of the
- // collection.
- if (_forward) {
- // Going forwards.
- if (!nsd->capLooped()) {
- // If our capped collection doesn't loop around, the first record is easy.
- _curr = collection->firstRecord(_txn);
- }
- else {
- // Our capped collection has "looped' around.
- // Copied verbatim from ForwardCappedCursor::init.
- // TODO ELABORATE
- _curr = _getExtent( nsd->capExtent() )->firstRecord;
- if (!_curr.isNull() && _curr == nsd->capFirstNewRecord()) {
- _curr = _getExtent( nsd->capExtent() )->lastRecord;
- _curr = nextLoop(_curr);
- }
- }
- }
- else {
- // Going backwards
- if (!nsd->capLooped()) {
- // Start at the end.
- _curr = collection->lastRecord(_txn);
- }
- else {
- _curr = _getExtent( nsd->capExtent() )->lastRecord;
+//
+// Capped collection traversal
+//
+CappedRecordStoreV1Iterator::CappedRecordStoreV1Iterator(OperationContext* txn,
+ const CappedRecordStoreV1* collection,
+ bool forward)
+ : _txn(txn), _recordStore(collection), _forward(forward) {
+ const RecordStoreV1MetaData* nsd = _recordStore->details();
+
+ // If a start position isn't specified, we fill one out from the start of the
+ // collection.
+ if (_forward) {
+ // Going forwards.
+ if (!nsd->capLooped()) {
+ // If our capped collection doesn't loop around, the first record is easy.
+ _curr = collection->firstRecord(_txn);
+ } else {
+ // Our capped collection has "looped' around.
+ // Copied verbatim from ForwardCappedCursor::init.
+ // TODO ELABORATE
+ _curr = _getExtent(nsd->capExtent())->firstRecord;
+ if (!_curr.isNull() && _curr == nsd->capFirstNewRecord()) {
+ _curr = _getExtent(nsd->capExtent())->lastRecord;
+ _curr = nextLoop(_curr);
}
}
- }
-
- boost::optional<Record> CappedRecordStoreV1Iterator::next() {
- if (isEOF()) return {};
- auto toReturn = _curr.toRecordId();
- _curr = getNextCapped(_curr);
- return {{toReturn, _recordStore->RecordStore::dataFor(_txn, toReturn)}};
- }
-
- boost::optional<Record> CappedRecordStoreV1Iterator::seekExact(const RecordId& id) {
- _curr = getNextCapped(DiskLoc::fromRecordId(id));
- return {{id, _recordStore->RecordStore::dataFor(_txn, id)}};
- }
-
- void CappedRecordStoreV1Iterator::invalidate(const RecordId& id) {
- const DiskLoc dl = DiskLoc::fromRecordId(id);
- if (dl == _curr) {
- // We *could* move to the next thing, since there is actually a next
- // thing, but according to clientcursor.cpp:
- // "note we cannot advance here. if this condition occurs, writes to the oplog
- // have "caught" the reader. skipping ahead, the reader would miss potentially
- // important data."
- _curr = DiskLoc();
- _killedByInvalidate = true;
+ } else {
+ // Going backwards
+ if (!nsd->capLooped()) {
+ // Start at the end.
+ _curr = collection->lastRecord(_txn);
+ } else {
+ _curr = _getExtent(nsd->capExtent())->lastRecord;
}
}
-
- void CappedRecordStoreV1Iterator::savePositioned() {
- _txn = nullptr;
+}
+
+boost::optional<Record> CappedRecordStoreV1Iterator::next() {
+ if (isEOF())
+ return {};
+ auto toReturn = _curr.toRecordId();
+ _curr = getNextCapped(_curr);
+ return {{toReturn, _recordStore->RecordStore::dataFor(_txn, toReturn)}};
+}
+
+boost::optional<Record> CappedRecordStoreV1Iterator::seekExact(const RecordId& id) {
+ _curr = getNextCapped(DiskLoc::fromRecordId(id));
+ return {{id, _recordStore->RecordStore::dataFor(_txn, id)}};
+}
+
+void CappedRecordStoreV1Iterator::invalidate(const RecordId& id) {
+ const DiskLoc dl = DiskLoc::fromRecordId(id);
+ if (dl == _curr) {
+ // We *could* move to the next thing, since there is actually a next
+ // thing, but according to clientcursor.cpp:
+ // "note we cannot advance here. if this condition occurs, writes to the oplog
+ // have "caught" the reader. skipping ahead, the reader would miss potentially
+ // important data."
+ _curr = DiskLoc();
+ _killedByInvalidate = true;
}
+}
- bool CappedRecordStoreV1Iterator::restore(OperationContext* txn) {
- _txn = txn;
- return !_killedByInvalidate;
- }
+void CappedRecordStoreV1Iterator::savePositioned() {
+ _txn = nullptr;
+}
- DiskLoc CappedRecordStoreV1Iterator::getNextCapped(const DiskLoc& dl) {
- invariant(!dl.isNull());
- const RecordStoreV1MetaData* details = _recordStore->details();
+bool CappedRecordStoreV1Iterator::restore(OperationContext* txn) {
+ _txn = txn;
+ return !_killedByInvalidate;
+}
- if (_forward) {
- // If it's not looped, it's easy.
- if (!_recordStore->details()->capLooped()) {
- return _getNextRecord( dl );
- }
+DiskLoc CappedRecordStoreV1Iterator::getNextCapped(const DiskLoc& dl) {
+ invariant(!dl.isNull());
+ const RecordStoreV1MetaData* details = _recordStore->details();
- // TODO ELABORATE
- // EOF.
- if (dl == _getExtent( details->capExtent() )->lastRecord) {
- return DiskLoc();
- }
+ if (_forward) {
+ // If it's not looped, it's easy.
+ if (!_recordStore->details()->capLooped()) {
+ return _getNextRecord(dl);
+ }
- DiskLoc ret = nextLoop(dl);
+ // TODO ELABORATE
+ // EOF.
+ if (dl == _getExtent(details->capExtent())->lastRecord) {
+ return DiskLoc();
+ }
- // If we become capFirstNewRecord from same extent, advance to next extent.
- if (ret == details->capFirstNewRecord() && ret != _getExtent( details->capExtent() )->firstRecord) {
- ret = nextLoop(_getExtent( details->capExtent() )->lastRecord);
- }
+ DiskLoc ret = nextLoop(dl);
- // If we have just gotten to beginning of capExtent, skip to capFirstNewRecord
- if (ret == _getExtent( details->capExtent() )->firstRecord) { ret = details->capFirstNewRecord(); }
+ // If we become capFirstNewRecord from same extent, advance to next extent.
+ if (ret == details->capFirstNewRecord() &&
+ ret != _getExtent(details->capExtent())->firstRecord) {
+ ret = nextLoop(_getExtent(details->capExtent())->lastRecord);
+ }
- return ret;
+ // If we have just gotten to beginning of capExtent, skip to capFirstNewRecord
+ if (ret == _getExtent(details->capExtent())->firstRecord) {
+ ret = details->capFirstNewRecord();
}
- else {
- if (!details->capLooped()) { return _getPrevRecord( dl ); }
- // TODO ELABORATE
- // Last record
- if (details->capFirstNewRecord() == _getExtent( details->capExtent() )->firstRecord) {
- if (dl == nextLoop(_getExtent( details->capExtent() )->lastRecord)) {
- return DiskLoc();
- }
- }
- else {
- if (dl == _getExtent( details->capExtent() )->firstRecord) { return DiskLoc(); }
- }
+ return ret;
+ } else {
+ if (!details->capLooped()) {
+ return _getPrevRecord(dl);
+ }
- DiskLoc ret;
- // If we are capFirstNewRecord, advance to prev extent, otherwise just get prev.
- if (dl == details->capFirstNewRecord()) {
- ret = prevLoop(_getExtent( details->capExtent() )->firstRecord);
- }
- else {
- ret = prevLoop(dl);
+ // TODO ELABORATE
+ // Last record
+ if (details->capFirstNewRecord() == _getExtent(details->capExtent())->firstRecord) {
+ if (dl == nextLoop(_getExtent(details->capExtent())->lastRecord)) {
+ return DiskLoc();
}
-
- // If we just became last in cap extent, advance past capFirstNewRecord
- // (We know ext(capExtent)->firstRecord != capFirstNewRecord, since would
- // have returned DiskLoc() earlier otherwise.)
- if (ret == _getExtent( details->capExtent() )->lastRecord) {
- ret = _getPrevRecord( details->capFirstNewRecord() );
+ } else {
+ if (dl == _getExtent(details->capExtent())->firstRecord) {
+ return DiskLoc();
}
+ }
- return ret;
+ DiskLoc ret;
+ // If we are capFirstNewRecord, advance to prev extent, otherwise just get prev.
+ if (dl == details->capFirstNewRecord()) {
+ ret = prevLoop(_getExtent(details->capExtent())->firstRecord);
+ } else {
+ ret = prevLoop(dl);
}
- }
- DiskLoc CappedRecordStoreV1Iterator::nextLoop(const DiskLoc& prev) {
- // TODO ELABORATE
- DiskLoc next = _getNextRecord( prev );
- if (!next.isNull()) {
- return next;
+ // If we just became last in cap extent, advance past capFirstNewRecord
+ // (We know ext(capExtent)->firstRecord != capFirstNewRecord, since would
+ // have returned DiskLoc() earlier otherwise.)
+ if (ret == _getExtent(details->capExtent())->lastRecord) {
+ ret = _getPrevRecord(details->capFirstNewRecord());
}
- return _recordStore->firstRecord(_txn);
+
+ return ret;
}
+}
- DiskLoc CappedRecordStoreV1Iterator::prevLoop(const DiskLoc& curr) {
- // TODO ELABORATE
- DiskLoc prev = _getPrevRecord( curr );
- if (!prev.isNull()) {
- return prev;
- }
- return _recordStore->lastRecord(_txn);
+DiskLoc CappedRecordStoreV1Iterator::nextLoop(const DiskLoc& prev) {
+ // TODO ELABORATE
+ DiskLoc next = _getNextRecord(prev);
+ if (!next.isNull()) {
+ return next;
+ }
+ return _recordStore->firstRecord(_txn);
+}
+
+DiskLoc CappedRecordStoreV1Iterator::prevLoop(const DiskLoc& curr) {
+ // TODO ELABORATE
+ DiskLoc prev = _getPrevRecord(curr);
+ if (!prev.isNull()) {
+ return prev;
}
+ return _recordStore->lastRecord(_txn);
+}
- Extent* CappedRecordStoreV1Iterator::_getExtent( const DiskLoc& loc ) {
- return _recordStore->_extentManager->getExtent( loc );
- }
+Extent* CappedRecordStoreV1Iterator::_getExtent(const DiskLoc& loc) {
+ return _recordStore->_extentManager->getExtent(loc);
+}
- DiskLoc CappedRecordStoreV1Iterator::_getNextRecord( const DiskLoc& loc ) {
- return _recordStore->getNextRecord( _txn, loc );
- }
+DiskLoc CappedRecordStoreV1Iterator::_getNextRecord(const DiskLoc& loc) {
+ return _recordStore->getNextRecord(_txn, loc);
+}
- DiskLoc CappedRecordStoreV1Iterator::_getPrevRecord( const DiskLoc& loc ) {
- return _recordStore->getPrevRecord( _txn, loc );
- }
+DiskLoc CappedRecordStoreV1Iterator::_getPrevRecord(const DiskLoc& loc) {
+ return _recordStore->getPrevRecord(_txn, loc);
+}
- std::unique_ptr<RecordFetcher> CappedRecordStoreV1Iterator::fetcherForNext() const {
- return _recordStore->_extentManager->recordNeedsFetch(_curr);
- }
+std::unique_ptr<RecordFetcher> CappedRecordStoreV1Iterator::fetcherForNext() const {
+ return _recordStore->_extentManager->recordNeedsFetch(_curr);
+}
- std::unique_ptr<RecordFetcher> CappedRecordStoreV1Iterator::fetcherForId(
- const RecordId& id) const {
- return _recordStore->_extentManager->recordNeedsFetch(DiskLoc::fromRecordId(id));
- }
+std::unique_ptr<RecordFetcher> CappedRecordStoreV1Iterator::fetcherForId(const RecordId& id) const {
+ return _recordStore->_extentManager->recordNeedsFetch(DiskLoc::fromRecordId(id));
+}
} // namespace mongo
diff --git a/src/mongo/db/storage/mmap_v1/record_store_v1_capped_iterator.h b/src/mongo/db/storage/mmap_v1/record_store_v1_capped_iterator.h
index de2b6fda5e3..0a366d9921a 100644
--- a/src/mongo/db/storage/mmap_v1/record_store_v1_capped_iterator.h
+++ b/src/mongo/db/storage/mmap_v1/record_store_v1_capped_iterator.h
@@ -33,58 +33,60 @@
namespace mongo {
- class CappedRecordStoreV1;
+class CappedRecordStoreV1;
- struct Extent;
+struct Extent;
+
+/**
+ * This class iterates over a capped collection identified by 'ns'.
+ * The collection must exist when the constructor is called.
+ */
+class CappedRecordStoreV1Iterator final : public RecordCursor {
+public:
+ CappedRecordStoreV1Iterator(OperationContext* txn,
+ const CappedRecordStoreV1* collection,
+ bool forward);
+
+ boost::optional<Record> next() final;
+ boost::optional<Record> seekExact(const RecordId& id) final;
+ void savePositioned() final;
+ bool restore(OperationContext* txn) final;
+ void invalidate(const RecordId& dl) final;
+ std::unique_ptr<RecordFetcher> fetcherForNext() const final;
+ std::unique_ptr<RecordFetcher> fetcherForId(const RecordId& id) const final;
+
+private:
+ void advance();
+ bool isEOF() {
+ return _curr.isNull();
+ }
/**
- * This class iterates over a capped collection identified by 'ns'.
- * The collection must exist when the constructor is called.
+ * Internal collection navigation helper methods.
*/
- class CappedRecordStoreV1Iterator final : public RecordCursor {
- public:
- CappedRecordStoreV1Iterator( OperationContext* txn,
- const CappedRecordStoreV1* collection,
- bool forward );
-
- boost::optional<Record> next() final;
- boost::optional<Record> seekExact(const RecordId& id) final;
- void savePositioned() final;
- bool restore(OperationContext* txn) final;
- void invalidate(const RecordId& dl) final;
- std::unique_ptr<RecordFetcher> fetcherForNext() const final;
- std::unique_ptr<RecordFetcher> fetcherForId(const RecordId& id) const final;
-
- private:
- void advance();
- bool isEOF() { return _curr.isNull(); }
-
- /**
- * Internal collection navigation helper methods.
- */
- DiskLoc getNextCapped(const DiskLoc& dl);
- DiskLoc prevLoop(const DiskLoc& curr);
- DiskLoc nextLoop(const DiskLoc& prev);
-
- // some helpers - these move to RecordStore probably
- Extent* _getExtent( const DiskLoc& loc );
- DiskLoc _getNextRecord( const DiskLoc& loc );
- DiskLoc _getPrevRecord( const DiskLoc& loc );
-
- // transactional context for read locks. Not owned by us
- OperationContext* _txn;
-
- // The collection we're iterating over.
- const CappedRecordStoreV1* const _recordStore;
-
- // The result returned on the next call to getNext().
- DiskLoc _curr;
-
- const bool _forward;
-
- // If invalidate kills the DiskLoc we need to move forward, we kill the iterator. See the
- // comment in the body of invalidate(...).
- bool _killedByInvalidate = false;
- };
+ DiskLoc getNextCapped(const DiskLoc& dl);
+ DiskLoc prevLoop(const DiskLoc& curr);
+ DiskLoc nextLoop(const DiskLoc& prev);
+
+ // some helpers - these move to RecordStore probably
+ Extent* _getExtent(const DiskLoc& loc);
+ DiskLoc _getNextRecord(const DiskLoc& loc);
+ DiskLoc _getPrevRecord(const DiskLoc& loc);
+
+ // transactional context for read locks. Not owned by us
+ OperationContext* _txn;
+
+ // The collection we're iterating over.
+ const CappedRecordStoreV1* const _recordStore;
+
+ // The result returned on the next call to getNext().
+ DiskLoc _curr;
+
+ const bool _forward;
+
+ // If invalidate kills the DiskLoc we need to move forward, we kill the iterator. See the
+ // comment in the body of invalidate(...).
+ bool _killedByInvalidate = false;
+};
} // namespace mongo
diff --git a/src/mongo/db/storage/mmap_v1/record_store_v1_capped_test.cpp b/src/mongo/db/storage/mmap_v1/record_store_v1_capped_test.cpp
index 0c369587f9b..1089d243467 100644
--- a/src/mongo/db/storage/mmap_v1/record_store_v1_capped_test.cpp
+++ b/src/mongo/db/storage/mmap_v1/record_store_v1_capped_test.cpp
@@ -42,773 +42,671 @@ using namespace mongo;
namespace {
- using std::string;
- using std::vector;
-
- // Provides data to be inserted. Must be large enough for largest possible record.
- // Should be in BSS so unused portions should be free.
- char zeros[20*1024*1024] = {};
-
- class DummyCappedDocumentDeleteCallback : public CappedDocumentDeleteCallback {
- public:
- Status aboutToDeleteCapped( OperationContext* txn, const RecordId& loc, RecordData data) {
- deleted.push_back( DiskLoc::fromRecordId(loc) );
- return Status::OK();
- }
- vector<DiskLoc> deleted;
- };
-
- void simpleInsertTest( const char* buf, int size ) {
-
- OperationContextNoop txn;
- DummyExtentManager em;
- DummyRecordStoreV1MetaData* md = new DummyRecordStoreV1MetaData( true, 0 );
- DummyCappedDocumentDeleteCallback cb;
-
- string myns = "test.simple1";
- CappedRecordStoreV1 rs( &txn, &cb, myns, md, &em, false );
+using std::string;
+using std::vector;
+
+// Provides data to be inserted. Must be large enough for largest possible record.
+// Should be in BSS so unused portions should be free.
+char zeros[20 * 1024 * 1024] = {};
+
+class DummyCappedDocumentDeleteCallback : public CappedDocumentDeleteCallback {
+public:
+ Status aboutToDeleteCapped(OperationContext* txn, const RecordId& loc, RecordData data) {
+ deleted.push_back(DiskLoc::fromRecordId(loc));
+ return Status::OK();
+ }
+ vector<DiskLoc> deleted;
+};
- rs.increaseStorageSize( &txn, 1024, false );
+void simpleInsertTest(const char* buf, int size) {
+ OperationContextNoop txn;
+ DummyExtentManager em;
+ DummyRecordStoreV1MetaData* md = new DummyRecordStoreV1MetaData(true, 0);
+ DummyCappedDocumentDeleteCallback cb;
- ASSERT_NOT_OK( rs.insertRecord( &txn, buf, 3, 1000 ).getStatus() );
+ string myns = "test.simple1";
+ CappedRecordStoreV1 rs(&txn, &cb, myns, md, &em, false);
- rs.insertRecord( &txn, buf, size, 10000 );
+ rs.increaseStorageSize(&txn, 1024, false);
- {
- BSONObjBuilder b;
- int64_t storageSize = rs.storageSize( &txn, &b );
- BSONObj obj = b.obj();
- ASSERT_EQUALS( 1, obj["numExtents"].numberInt() );
- ASSERT_EQUALS( storageSize, em.quantizeExtentSize( 1024 ) );
- }
+ ASSERT_NOT_OK(rs.insertRecord(&txn, buf, 3, 1000).getStatus());
- for ( int i = 0; i < 1000; i++ ) {
- ASSERT_OK( rs.insertRecord( &txn, buf, size, 10000 ).getStatus() );
- }
+ rs.insertRecord(&txn, buf, size, 10000);
- long long start = md->numRecords();
- for ( int i = 0; i < 1000; i++ ) {
- ASSERT_OK( rs.insertRecord( &txn, buf, size, 10000 ).getStatus() );
- }
- ASSERT_EQUALS( start, md->numRecords() );
- ASSERT_GREATER_THAN( start, 100 );
- ASSERT_LESS_THAN( start, 1000 );
+ {
+ BSONObjBuilder b;
+ int64_t storageSize = rs.storageSize(&txn, &b);
+ BSONObj obj = b.obj();
+ ASSERT_EQUALS(1, obj["numExtents"].numberInt());
+ ASSERT_EQUALS(storageSize, em.quantizeExtentSize(1024));
}
- TEST(CappedRecordStoreV1, SimpleInsertSize4) {
- simpleInsertTest("abcd", 4);
- }
- TEST(CappedRecordStoreV1, SimpleInsertSize8) {
- simpleInsertTest("abcdefgh", 8);
+ for (int i = 0; i < 1000; i++) {
+ ASSERT_OK(rs.insertRecord(&txn, buf, size, 10000).getStatus());
}
- TEST(CappedRecordStoreV1, EmptySingleExtent) {
- OperationContextNoop txn;
- DummyExtentManager em;
- DummyRecordStoreV1MetaData* md = new DummyRecordStoreV1MetaData( true, 0 );
- DummyCappedDocumentDeleteCallback cb;
- CappedRecordStoreV1 rs(&txn, &cb, "test.foo", md, &em, false);
-
- {
- LocAndSize records[] = {
- {}
- };
- LocAndSize drecs[] = {
- {DiskLoc(0, 1000), 1000},
- {}
- };
- md->setCapExtent(&txn, DiskLoc(0, 0));
- md->setCapFirstNewRecord(&txn, DiskLoc().setInvalid());
- initializeV1RS(&txn, records, drecs, NULL, &em, md);
- }
-
- rs.insertRecord(&txn, zeros, 100 - MmapV1RecordHeader::HeaderSize, false);
-
- {
- LocAndSize recs[] = {
- {DiskLoc(0, 1000), 100},
- {}
- };
- LocAndSize drecs[] = {
- {DiskLoc(0, 1100), 900},
- {}
- };
- assertStateV1RS(&txn, recs, drecs, NULL, &em, md);
- ASSERT_EQUALS(md->capExtent(), DiskLoc(0, 0));
- ASSERT_EQUALS(md->capFirstNewRecord(), DiskLoc().setInvalid()); // unlooped
- }
+ long long start = md->numRecords();
+ for (int i = 0; i < 1000; i++) {
+ ASSERT_OK(rs.insertRecord(&txn, buf, size, 10000).getStatus());
}
+ ASSERT_EQUALS(start, md->numRecords());
+ ASSERT_GREATER_THAN(start, 100);
+ ASSERT_LESS_THAN(start, 1000);
+}
- TEST(CappedRecordStoreV1, FirstLoopWithSingleExtentExactSize) {
- OperationContextNoop txn;
- DummyExtentManager em;
- DummyRecordStoreV1MetaData* md = new DummyRecordStoreV1MetaData( true, 0 );
- DummyCappedDocumentDeleteCallback cb;
- CappedRecordStoreV1 rs(&txn, &cb, "test.foo", md, &em, false);
-
- {
- LocAndSize records[] = {
- {DiskLoc(0, 1000), 100},
- {DiskLoc(0, 1100), 100},
- {DiskLoc(0, 1200), 100},
- {DiskLoc(0, 1300), 100},
- {DiskLoc(0, 1400), 100},
- {}
- };
- LocAndSize drecs[] = {
- {DiskLoc(0, 1500), 50},
- {}
- };
- md->setCapExtent(&txn, DiskLoc(0, 0));
- md->setCapFirstNewRecord(&txn, DiskLoc().setInvalid()); // unlooped
- initializeV1RS(&txn, records, drecs, NULL, &em, md);
- }
-
- rs.insertRecord(&txn, zeros, 100 - MmapV1RecordHeader::HeaderSize, false);
+TEST(CappedRecordStoreV1, SimpleInsertSize4) {
+ simpleInsertTest("abcd", 4);
+}
+TEST(CappedRecordStoreV1, SimpleInsertSize8) {
+ simpleInsertTest("abcdefgh", 8);
+}
- {
- LocAndSize recs[] = {
- {DiskLoc(0, 1200), 100}, // first old record
- {DiskLoc(0, 1300), 100},
- {DiskLoc(0, 1400), 100}, // last old record
- {DiskLoc(0, 1000), 100}, // first new record
- {}
- };
- LocAndSize drecs[] = {
- {DiskLoc(0, 1100), 100}, // gap after newest record XXX this is probably a bug
- {DiskLoc(0, 1500), 50}, // gap at end of extent
- {}
- };
- assertStateV1RS(&txn, recs, drecs, NULL, &em, md);
- ASSERT_EQUALS(md->capExtent(), DiskLoc(0, 0));
- ASSERT_EQUALS(md->capFirstNewRecord(), DiskLoc(0, 1000));
- }
+TEST(CappedRecordStoreV1, EmptySingleExtent) {
+ OperationContextNoop txn;
+ DummyExtentManager em;
+ DummyRecordStoreV1MetaData* md = new DummyRecordStoreV1MetaData(true, 0);
+ DummyCappedDocumentDeleteCallback cb;
+ CappedRecordStoreV1 rs(&txn, &cb, "test.foo", md, &em, false);
+
+ {
+ LocAndSize records[] = {{}};
+ LocAndSize drecs[] = {{DiskLoc(0, 1000), 1000}, {}};
+ md->setCapExtent(&txn, DiskLoc(0, 0));
+ md->setCapFirstNewRecord(&txn, DiskLoc().setInvalid());
+ initializeV1RS(&txn, records, drecs, NULL, &em, md);
}
- TEST(CappedRecordStoreV1, NonFirstLoopWithSingleExtentExactSize) {
- OperationContextNoop txn;
- DummyExtentManager em;
- DummyRecordStoreV1MetaData* md = new DummyRecordStoreV1MetaData( true, 0 );
- DummyCappedDocumentDeleteCallback cb;
- CappedRecordStoreV1 rs(&txn, &cb, "test.foo", md, &em, false);
+ rs.insertRecord(&txn, zeros, 100 - MmapV1RecordHeader::HeaderSize, false);
- {
- LocAndSize records[] = {
- {DiskLoc(0, 1000), 100},
- {DiskLoc(0, 1100), 100},
- {DiskLoc(0, 1200), 100},
- {DiskLoc(0, 1300), 100},
- {DiskLoc(0, 1400), 100},
- {}
- };
- LocAndSize drecs[] = {
- {DiskLoc(0, 1500), 50},
- {}
- };
- md->setCapExtent(&txn, DiskLoc(0, 0));
- md->setCapFirstNewRecord(&txn, DiskLoc(0, 1000));
- initializeV1RS(&txn, records, drecs, NULL, &em, md);
- }
-
- rs.insertRecord(&txn, zeros, 100 - MmapV1RecordHeader::HeaderSize, false);
-
- {
- LocAndSize recs[] = {
- {DiskLoc(0, 1200), 100}, // first old record
- {DiskLoc(0, 1300), 100},
- {DiskLoc(0, 1400), 100}, // last old record
- {DiskLoc(0, 1000), 100}, // first new record
- {}
- };
- LocAndSize drecs[] = {
- {DiskLoc(0, 1100), 100}, // gap after newest record XXX this is probably a bug
- {DiskLoc(0, 1500), 50}, // gap at end of extent
- {}
- };
- assertStateV1RS(&txn, recs, drecs, NULL, &em, md);
- ASSERT_EQUALS(md->capExtent(), DiskLoc(0, 0));
- ASSERT_EQUALS(md->capFirstNewRecord(), DiskLoc(0, 1000));
- }
+ {
+ LocAndSize recs[] = {{DiskLoc(0, 1000), 100}, {}};
+ LocAndSize drecs[] = {{DiskLoc(0, 1100), 900}, {}};
+ assertStateV1RS(&txn, recs, drecs, NULL, &em, md);
+ ASSERT_EQUALS(md->capExtent(), DiskLoc(0, 0));
+ ASSERT_EQUALS(md->capFirstNewRecord(), DiskLoc().setInvalid()); // unlooped
}
+}
- /**
- * Current code always tries to leave 24 bytes to create a DeletedRecord.
- */
- TEST(CappedRecordStoreV1, WillLoopWithout24SpareBytes) {
- OperationContextNoop txn;
- DummyExtentManager em;
- DummyRecordStoreV1MetaData* md = new DummyRecordStoreV1MetaData( true, 0 );
- DummyCappedDocumentDeleteCallback cb;
- CappedRecordStoreV1 rs(&txn, &cb, "test.foo", md, &em, false);
+TEST(CappedRecordStoreV1, FirstLoopWithSingleExtentExactSize) {
+ OperationContextNoop txn;
+ DummyExtentManager em;
+ DummyRecordStoreV1MetaData* md = new DummyRecordStoreV1MetaData(true, 0);
+ DummyCappedDocumentDeleteCallback cb;
+ CappedRecordStoreV1 rs(&txn, &cb, "test.foo", md, &em, false);
+
+ {
+ LocAndSize records[] = {{DiskLoc(0, 1000), 100},
+ {DiskLoc(0, 1100), 100},
+ {DiskLoc(0, 1200), 100},
+ {DiskLoc(0, 1300), 100},
+ {DiskLoc(0, 1400), 100},
+ {}};
+ LocAndSize drecs[] = {{DiskLoc(0, 1500), 50}, {}};
+ md->setCapExtent(&txn, DiskLoc(0, 0));
+ md->setCapFirstNewRecord(&txn, DiskLoc().setInvalid()); // unlooped
+ initializeV1RS(&txn, records, drecs, NULL, &em, md);
+ }
- {
- LocAndSize records[] = {
- {DiskLoc(0, 1000), 100},
- {DiskLoc(0, 1100), 100},
- {DiskLoc(0, 1200), 100},
- {DiskLoc(0, 1300), 100},
- {DiskLoc(0, 1400), 100},
- {}
- };
- LocAndSize drecs[] = {
- {DiskLoc(0, 1500), 123},
- {}
- };
- md->setCapExtent(&txn, DiskLoc(0, 0));
- md->setCapFirstNewRecord(&txn, DiskLoc(0, 1000));
- initializeV1RS(&txn, records, drecs, NULL, &em, md);
- }
+ rs.insertRecord(&txn, zeros, 100 - MmapV1RecordHeader::HeaderSize, false);
+
+ {
+ LocAndSize recs[] = {{DiskLoc(0, 1200), 100}, // first old record
+ {DiskLoc(0, 1300), 100},
+ {DiskLoc(0, 1400), 100}, // last old record
+ {DiskLoc(0, 1000), 100}, // first new record
+ {}};
+ LocAndSize drecs[] = {
+ {DiskLoc(0, 1100), 100}, // gap after newest record XXX this is probably a bug
+ {DiskLoc(0, 1500), 50}, // gap at end of extent
+ {}};
+ assertStateV1RS(&txn, recs, drecs, NULL, &em, md);
+ ASSERT_EQUALS(md->capExtent(), DiskLoc(0, 0));
+ ASSERT_EQUALS(md->capFirstNewRecord(), DiskLoc(0, 1000));
+ }
+}
- rs.insertRecord(&txn, zeros, 100 - MmapV1RecordHeader::HeaderSize, false);
+TEST(CappedRecordStoreV1, NonFirstLoopWithSingleExtentExactSize) {
+ OperationContextNoop txn;
+ DummyExtentManager em;
+ DummyRecordStoreV1MetaData* md = new DummyRecordStoreV1MetaData(true, 0);
+ DummyCappedDocumentDeleteCallback cb;
+ CappedRecordStoreV1 rs(&txn, &cb, "test.foo", md, &em, false);
+
+ {
+ LocAndSize records[] = {{DiskLoc(0, 1000), 100},
+ {DiskLoc(0, 1100), 100},
+ {DiskLoc(0, 1200), 100},
+ {DiskLoc(0, 1300), 100},
+ {DiskLoc(0, 1400), 100},
+ {}};
+ LocAndSize drecs[] = {{DiskLoc(0, 1500), 50}, {}};
+ md->setCapExtent(&txn, DiskLoc(0, 0));
+ md->setCapFirstNewRecord(&txn, DiskLoc(0, 1000));
+ initializeV1RS(&txn, records, drecs, NULL, &em, md);
+ }
- {
- LocAndSize recs[] = {
- {DiskLoc(0, 1200), 100}, // first old record
- {DiskLoc(0, 1300), 100},
- {DiskLoc(0, 1400), 100}, // last old record
- {DiskLoc(0, 1000), 100}, // first new record
- {}
- };
- LocAndSize drecs[] = {
- {DiskLoc(0, 1100), 100}, // gap after newest record
- {DiskLoc(0, 1500), 123}, // gap at end of extent
- {}
- };
- assertStateV1RS(&txn, recs, drecs, NULL, &em, md);
- ASSERT_EQUALS(md->capExtent(), DiskLoc(0, 0));
- ASSERT_EQUALS(md->capFirstNewRecord(), DiskLoc(0, 1000));
- }
+ rs.insertRecord(&txn, zeros, 100 - MmapV1RecordHeader::HeaderSize, false);
+
+ {
+ LocAndSize recs[] = {{DiskLoc(0, 1200), 100}, // first old record
+ {DiskLoc(0, 1300), 100},
+ {DiskLoc(0, 1400), 100}, // last old record
+ {DiskLoc(0, 1000), 100}, // first new record
+ {}};
+ LocAndSize drecs[] = {
+ {DiskLoc(0, 1100), 100}, // gap after newest record XXX this is probably a bug
+ {DiskLoc(0, 1500), 50}, // gap at end of extent
+ {}};
+ assertStateV1RS(&txn, recs, drecs, NULL, &em, md);
+ ASSERT_EQUALS(md->capExtent(), DiskLoc(0, 0));
+ ASSERT_EQUALS(md->capFirstNewRecord(), DiskLoc(0, 1000));
}
+}
- TEST(CappedRecordStoreV1, WontLoopWith24SpareBytes) {
- OperationContextNoop txn;
- DummyExtentManager em;
- DummyRecordStoreV1MetaData* md = new DummyRecordStoreV1MetaData( true, 0 );
- DummyCappedDocumentDeleteCallback cb;
- CappedRecordStoreV1 rs(&txn, &cb, "test.foo", md, &em, false);
+/**
+ * Current code always tries to leave 24 bytes to create a DeletedRecord.
+ */
+TEST(CappedRecordStoreV1, WillLoopWithout24SpareBytes) {
+ OperationContextNoop txn;
+ DummyExtentManager em;
+ DummyRecordStoreV1MetaData* md = new DummyRecordStoreV1MetaData(true, 0);
+ DummyCappedDocumentDeleteCallback cb;
+ CappedRecordStoreV1 rs(&txn, &cb, "test.foo", md, &em, false);
+
+ {
+ LocAndSize records[] = {{DiskLoc(0, 1000), 100},
+ {DiskLoc(0, 1100), 100},
+ {DiskLoc(0, 1200), 100},
+ {DiskLoc(0, 1300), 100},
+ {DiskLoc(0, 1400), 100},
+ {}};
+ LocAndSize drecs[] = {{DiskLoc(0, 1500), 123}, {}};
+ md->setCapExtent(&txn, DiskLoc(0, 0));
+ md->setCapFirstNewRecord(&txn, DiskLoc(0, 1000));
+ initializeV1RS(&txn, records, drecs, NULL, &em, md);
+ }
- {
- LocAndSize records[] = {
- {DiskLoc(0, 1000), 100},
- {DiskLoc(0, 1100), 100},
- {DiskLoc(0, 1200), 100},
- {DiskLoc(0, 1300), 100},
- {DiskLoc(0, 1400), 100},
- {}
- };
- LocAndSize drecs[] = {
- {DiskLoc(0, 1500), 124},
- {}
- };
- md->setCapExtent(&txn, DiskLoc(0, 0));
- md->setCapFirstNewRecord(&txn, DiskLoc(0, 1000));
- initializeV1RS(&txn, records, drecs, NULL, &em, md);
- }
+ rs.insertRecord(&txn, zeros, 100 - MmapV1RecordHeader::HeaderSize, false);
+
+ {
+ LocAndSize recs[] = {{DiskLoc(0, 1200), 100}, // first old record
+ {DiskLoc(0, 1300), 100},
+ {DiskLoc(0, 1400), 100}, // last old record
+ {DiskLoc(0, 1000), 100}, // first new record
+ {}};
+ LocAndSize drecs[] = {{DiskLoc(0, 1100), 100}, // gap after newest record
+ {DiskLoc(0, 1500), 123}, // gap at end of extent
+ {}};
+ assertStateV1RS(&txn, recs, drecs, NULL, &em, md);
+ ASSERT_EQUALS(md->capExtent(), DiskLoc(0, 0));
+ ASSERT_EQUALS(md->capFirstNewRecord(), DiskLoc(0, 1000));
+ }
+}
- rs.insertRecord(&txn, zeros, 100 - MmapV1RecordHeader::HeaderSize, false);
+TEST(CappedRecordStoreV1, WontLoopWith24SpareBytes) {
+ OperationContextNoop txn;
+ DummyExtentManager em;
+ DummyRecordStoreV1MetaData* md = new DummyRecordStoreV1MetaData(true, 0);
+ DummyCappedDocumentDeleteCallback cb;
+ CappedRecordStoreV1 rs(&txn, &cb, "test.foo", md, &em, false);
+
+ {
+ LocAndSize records[] = {{DiskLoc(0, 1000), 100},
+ {DiskLoc(0, 1100), 100},
+ {DiskLoc(0, 1200), 100},
+ {DiskLoc(0, 1300), 100},
+ {DiskLoc(0, 1400), 100},
+ {}};
+ LocAndSize drecs[] = {{DiskLoc(0, 1500), 124}, {}};
+ md->setCapExtent(&txn, DiskLoc(0, 0));
+ md->setCapFirstNewRecord(&txn, DiskLoc(0, 1000));
+ initializeV1RS(&txn, records, drecs, NULL, &em, md);
+ }
- {
- LocAndSize recs[] = {
- {DiskLoc(0, 1000), 100},
- {DiskLoc(0, 1100), 100},
- {DiskLoc(0, 1200), 100},
- {DiskLoc(0, 1300), 100},
- {DiskLoc(0, 1400), 100},
- {DiskLoc(0, 1500), 100},
- {}
- };
- LocAndSize drecs[] = {
- {DiskLoc(0, 1600), 24}, // gap at end of extent
- {}
- };
- assertStateV1RS(&txn, recs, drecs, NULL, &em, md);
- ASSERT_EQUALS(md->capExtent(), DiskLoc(0, 0));
- ASSERT_EQUALS(md->capFirstNewRecord(), DiskLoc(0, 1000));
- }
+ rs.insertRecord(&txn, zeros, 100 - MmapV1RecordHeader::HeaderSize, false);
+
+ {
+ LocAndSize recs[] = {{DiskLoc(0, 1000), 100},
+ {DiskLoc(0, 1100), 100},
+ {DiskLoc(0, 1200), 100},
+ {DiskLoc(0, 1300), 100},
+ {DiskLoc(0, 1400), 100},
+ {DiskLoc(0, 1500), 100},
+ {}};
+ LocAndSize drecs[] = {{DiskLoc(0, 1600), 24}, // gap at end of extent
+ {}};
+ assertStateV1RS(&txn, recs, drecs, NULL, &em, md);
+ ASSERT_EQUALS(md->capExtent(), DiskLoc(0, 0));
+ ASSERT_EQUALS(md->capFirstNewRecord(), DiskLoc(0, 1000));
}
+}
- TEST(CappedRecordStoreV1, MoveToSecondExtentUnLooped) {
- OperationContextNoop txn;
- DummyExtentManager em;
- DummyRecordStoreV1MetaData* md = new DummyRecordStoreV1MetaData( true, 0 );
- DummyCappedDocumentDeleteCallback cb;
- CappedRecordStoreV1 rs(&txn, &cb, "test.foo", md, &em, false);
+TEST(CappedRecordStoreV1, MoveToSecondExtentUnLooped) {
+ OperationContextNoop txn;
+ DummyExtentManager em;
+ DummyRecordStoreV1MetaData* md = new DummyRecordStoreV1MetaData(true, 0);
+ DummyCappedDocumentDeleteCallback cb;
+ CappedRecordStoreV1 rs(&txn, &cb, "test.foo", md, &em, false);
+
+ {
+ // Two extents, each with 1000 bytes.
+ LocAndSize records[] = {
+ {DiskLoc(0, 1000), 500}, {DiskLoc(0, 1500), 300}, {DiskLoc(0, 1800), 100}, {}};
+ LocAndSize drecs[] = {{DiskLoc(0, 1900), 100}, {DiskLoc(1, 1000), 1000}, {}};
+ md->setCapExtent(&txn, DiskLoc(0, 0));
+ md->setCapFirstNewRecord(&txn, DiskLoc().setInvalid());
+ initializeV1RS(&txn, records, drecs, NULL, &em, md);
+ }
- {
- // Two extents, each with 1000 bytes.
- LocAndSize records[] = {
- {DiskLoc(0, 1000), 500},
- {DiskLoc(0, 1500), 300},
- {DiskLoc(0, 1800), 100},
- {}
- };
- LocAndSize drecs[] = {
- {DiskLoc(0, 1900), 100},
- {DiskLoc(1, 1000), 1000},
- {}
- };
- md->setCapExtent(&txn, DiskLoc(0, 0));
- md->setCapFirstNewRecord(&txn, DiskLoc().setInvalid());
- initializeV1RS(&txn, records, drecs, NULL, &em, md);
- }
+ rs.insertRecord(&txn, zeros, 100 - MmapV1RecordHeader::HeaderSize, false);
- rs.insertRecord(&txn, zeros, 100 - MmapV1RecordHeader::HeaderSize, false);
+ {
+ LocAndSize recs[] = {{DiskLoc(0, 1000), 500},
+ {DiskLoc(0, 1500), 300},
+ {DiskLoc(0, 1800), 100},
- {
- LocAndSize recs[] = {
- {DiskLoc(0, 1000), 500},
- {DiskLoc(0, 1500), 300},
- {DiskLoc(0, 1800), 100},
-
- {DiskLoc(1, 1000), 100},
- {}
- };
- LocAndSize drecs[] = {
- {DiskLoc(0, 1900), 100},
- {DiskLoc(1, 1100), 900},
- {}
- };
- assertStateV1RS(&txn, recs, drecs, NULL, &em, md);
- ASSERT_EQUALS(md->capExtent(), DiskLoc(1, 0));
- ASSERT_EQUALS(md->capFirstNewRecord(), DiskLoc().setInvalid()); // unlooped
- }
+ {DiskLoc(1, 1000), 100},
+ {}};
+ LocAndSize drecs[] = {{DiskLoc(0, 1900), 100}, {DiskLoc(1, 1100), 900}, {}};
+ assertStateV1RS(&txn, recs, drecs, NULL, &em, md);
+ ASSERT_EQUALS(md->capExtent(), DiskLoc(1, 0));
+ ASSERT_EQUALS(md->capFirstNewRecord(), DiskLoc().setInvalid()); // unlooped
}
+}
- TEST(CappedRecordStoreV1, MoveToSecondExtentLooped) {
- OperationContextNoop txn;
- DummyExtentManager em;
- DummyRecordStoreV1MetaData* md = new DummyRecordStoreV1MetaData( true, 0 );
- DummyCappedDocumentDeleteCallback cb;
- CappedRecordStoreV1 rs(&txn, &cb, "test.foo", md, &em, false);
+TEST(CappedRecordStoreV1, MoveToSecondExtentLooped) {
+ OperationContextNoop txn;
+ DummyExtentManager em;
+ DummyRecordStoreV1MetaData* md = new DummyRecordStoreV1MetaData(true, 0);
+ DummyCappedDocumentDeleteCallback cb;
+ CappedRecordStoreV1 rs(&txn, &cb, "test.foo", md, &em, false);
+
+ {
+ // Two extents, each with 1000 bytes.
+ LocAndSize records[] = {{DiskLoc(0, 1800), 100}, // old
+ {DiskLoc(0, 1000), 500}, // first new
+ {DiskLoc(0, 1500), 400},
+
+ {DiskLoc(1, 1000), 300},
+ {DiskLoc(1, 1300), 600},
+ {}};
+ LocAndSize drecs[] = {{DiskLoc(0, 1900), 100}, {DiskLoc(1, 1900), 100}, {}};
+ md->setCapExtent(&txn, DiskLoc(0, 0));
+ md->setCapFirstNewRecord(&txn, DiskLoc(0, 1000));
+ initializeV1RS(&txn, records, drecs, NULL, &em, md);
+ }
- {
- // Two extents, each with 1000 bytes.
- LocAndSize records[] = {
- {DiskLoc(0, 1800), 100}, // old
- {DiskLoc(0, 1000), 500}, // first new
- {DiskLoc(0, 1500), 400},
-
- {DiskLoc(1, 1000), 300},
- {DiskLoc(1, 1300), 600},
- {}
- };
- LocAndSize drecs[] = {
- {DiskLoc(0, 1900), 100},
- {DiskLoc(1, 1900), 100},
- {}
- };
- md->setCapExtent(&txn, DiskLoc(0, 0));
- md->setCapFirstNewRecord(&txn, DiskLoc(0, 1000));
- initializeV1RS(&txn, records, drecs, NULL, &em, md);
- }
+ rs.insertRecord(&txn, zeros, 200 - MmapV1RecordHeader::HeaderSize, false);
- rs.insertRecord(&txn, zeros, 200 - MmapV1RecordHeader::HeaderSize, false);
+ {
+ LocAndSize recs[] = {{DiskLoc(0, 1000), 500},
+ {DiskLoc(0, 1500), 400},
- {
- LocAndSize recs[] = {
- {DiskLoc(0, 1000), 500},
- {DiskLoc(0, 1500), 400},
-
- {DiskLoc(1, 1300), 600}, // old
- {DiskLoc(1, 1000), 200}, // first new
- {}
- };
- LocAndSize drecs[] = {
- {DiskLoc(0, 1800), 200},
- {DiskLoc(1, 1200), 100},
- {DiskLoc(1, 1900), 100},
- {}
- };
- assertStateV1RS(&txn, recs, drecs, NULL, &em, md);
- ASSERT_EQUALS(md->capExtent(), DiskLoc(1, 0));
- ASSERT_EQUALS(md->capFirstNewRecord(), DiskLoc(1, 1000));
- }
+ {DiskLoc(1, 1300), 600}, // old
+ {DiskLoc(1, 1000), 200}, // first new
+ {}};
+ LocAndSize drecs[] = {
+ {DiskLoc(0, 1800), 200}, {DiskLoc(1, 1200), 100}, {DiskLoc(1, 1900), 100}, {}};
+ assertStateV1RS(&txn, recs, drecs, NULL, &em, md);
+ ASSERT_EQUALS(md->capExtent(), DiskLoc(1, 0));
+ ASSERT_EQUALS(md->capFirstNewRecord(), DiskLoc(1, 1000));
}
+}
- // Larger than storageSize (fails early)
- TEST(CappedRecordStoreV1, OversizedRecordHuge) {
- OperationContextNoop txn;
- DummyExtentManager em;
- DummyRecordStoreV1MetaData* md = new DummyRecordStoreV1MetaData( true, 0 );
- DummyCappedDocumentDeleteCallback cb;
- CappedRecordStoreV1 rs(&txn, &cb, "test.foo", md, &em, false);
+// Larger than storageSize (fails early)
+TEST(CappedRecordStoreV1, OversizedRecordHuge) {
+ OperationContextNoop txn;
+ DummyExtentManager em;
+ DummyRecordStoreV1MetaData* md = new DummyRecordStoreV1MetaData(true, 0);
+ DummyCappedDocumentDeleteCallback cb;
+ CappedRecordStoreV1 rs(&txn, &cb, "test.foo", md, &em, false);
+
+ {
+ LocAndSize records[] = {{}};
+ LocAndSize drecs[] = {{DiskLoc(0, 1000), 1000}, {}};
+ md->setCapExtent(&txn, DiskLoc(0, 0));
+ md->setCapFirstNewRecord(&txn, DiskLoc().setInvalid());
+ initializeV1RS(&txn, records, drecs, NULL, &em, md);
+ }
- {
- LocAndSize records[] = {
- {}
- };
- LocAndSize drecs[] = {
- {DiskLoc(0, 1000), 1000},
- {}
- };
- md->setCapExtent(&txn, DiskLoc(0, 0));
- md->setCapFirstNewRecord(&txn, DiskLoc().setInvalid());
- initializeV1RS(&txn, records, drecs, NULL, &em, md);
- }
+ StatusWith<RecordId> status = rs.insertRecord(&txn, zeros, 16000, false);
+ ASSERT_EQUALS(status.getStatus(), ErrorCodes::DocTooLargeForCapped);
+ ASSERT_EQUALS(status.getStatus().location(), 16328);
+}
- StatusWith<RecordId> status = rs.insertRecord(&txn, zeros, 16000, false);
- ASSERT_EQUALS(status.getStatus(), ErrorCodes::DocTooLargeForCapped);
- ASSERT_EQUALS(status.getStatus().location(), 16328);
+// Smaller than storageSize, but larger than usable space (fails late)
+TEST(CappedRecordStoreV1, OversizedRecordMedium) {
+ OperationContextNoop txn;
+ DummyExtentManager em;
+ DummyRecordStoreV1MetaData* md = new DummyRecordStoreV1MetaData(true, 0);
+ DummyCappedDocumentDeleteCallback cb;
+ CappedRecordStoreV1 rs(&txn, &cb, "test.foo", md, &em, false);
+
+ {
+ LocAndSize records[] = {{}};
+ LocAndSize drecs[] = {{DiskLoc(0, 1000), 1000}, {}};
+ md->setCapExtent(&txn, DiskLoc(0, 0));
+ md->setCapFirstNewRecord(&txn, DiskLoc().setInvalid());
+ initializeV1RS(&txn, records, drecs, NULL, &em, md);
}
- // Smaller than storageSize, but larger than usable space (fails late)
- TEST(CappedRecordStoreV1, OversizedRecordMedium) {
- OperationContextNoop txn;
- DummyExtentManager em;
- DummyRecordStoreV1MetaData* md = new DummyRecordStoreV1MetaData( true, 0 );
- DummyCappedDocumentDeleteCallback cb;
- CappedRecordStoreV1 rs(&txn, &cb, "test.foo", md, &em, false);
+ StatusWith<RecordId> status =
+ rs.insertRecord(&txn, zeros, 1004 - MmapV1RecordHeader::HeaderSize, false);
+ ASSERT_EQUALS(status.getStatus(), ErrorCodes::DocTooLargeForCapped);
+ ASSERT_EQUALS(status.getStatus().location(), 28575);
+}
- {
- LocAndSize records[] = {
- {}
- };
- LocAndSize drecs[] = {
- {DiskLoc(0, 1000), 1000},
- {}
- };
- md->setCapExtent(&txn, DiskLoc(0, 0));
- md->setCapFirstNewRecord(&txn, DiskLoc().setInvalid());
- initializeV1RS(&txn, records, drecs, NULL, &em, md);
- }
+//
+// XXX The CappedRecordStoreV1Scrambler suite of tests describe existing behavior that is less
+// than ideal. Any improved implementation will need to be able to handle a collection that has
+// been scrambled like this.
+//
- StatusWith<RecordId> status = rs.insertRecord(&txn, zeros, 1004 - MmapV1RecordHeader::HeaderSize, false);
- ASSERT_EQUALS(status.getStatus(), ErrorCodes::DocTooLargeForCapped);
- ASSERT_EQUALS(status.getStatus().location(), 28575);
+/**
+ * This is a minimal example that shows the current allocator laying out records out-of-order.
+ */
+TEST(CappedRecordStoreV1Scrambler, Minimal) {
+ OperationContextNoop txn;
+ DummyExtentManager em;
+ DummyRecordStoreV1MetaData* md = new DummyRecordStoreV1MetaData(true, 0);
+ DummyCappedDocumentDeleteCallback cb;
+ CappedRecordStoreV1 rs(&txn, &cb, "test.foo", md, &em, false);
+
+ {
+ // Starting with a single empty 1000 byte extent.
+ LocAndSize records[] = {{}};
+ LocAndSize drecs[] = {{DiskLoc(0, 1000), 1000}, {}};
+ md->setCapExtent(&txn, DiskLoc(0, 0));
+ md->setCapFirstNewRecord(&txn, DiskLoc().setInvalid()); // unlooped
+ initializeV1RS(&txn, records, drecs, NULL, &em, md);
}
- //
- // XXX The CappedRecordStoreV1Scrambler suite of tests describe existing behavior that is less
- // than ideal. Any improved implementation will need to be able to handle a collection that has
- // been scrambled like this.
- //
-
- /**
- * This is a minimal example that shows the current allocator laying out records out-of-order.
- */
- TEST(CappedRecordStoreV1Scrambler, Minimal) {
- OperationContextNoop txn;
- DummyExtentManager em;
- DummyRecordStoreV1MetaData* md = new DummyRecordStoreV1MetaData( true, 0 );
- DummyCappedDocumentDeleteCallback cb;
- CappedRecordStoreV1 rs(&txn, &cb, "test.foo", md, &em, false);
-
- {
- // Starting with a single empty 1000 byte extent.
- LocAndSize records[] = {
- {}
- };
- LocAndSize drecs[] = {
- {DiskLoc(0, 1000), 1000},
- {}
- };
- md->setCapExtent(&txn, DiskLoc(0, 0));
- md->setCapFirstNewRecord(&txn, DiskLoc().setInvalid()); // unlooped
- initializeV1RS(&txn, records, drecs, NULL, &em, md);
- }
-
- rs.insertRecord(&txn, zeros, 500 - MmapV1RecordHeader::HeaderSize, false);
- rs.insertRecord(&txn, zeros, 300 - MmapV1RecordHeader::HeaderSize, false);
- rs.insertRecord(&txn, zeros, 400 - MmapV1RecordHeader::HeaderSize, false); // won't fit at end so wraps
- rs.insertRecord(&txn, zeros, 120 - MmapV1RecordHeader::HeaderSize, false); // fits at end
- rs.insertRecord(&txn, zeros, 60 - MmapV1RecordHeader::HeaderSize, false); // fits in earlier hole
+ rs.insertRecord(&txn, zeros, 500 - MmapV1RecordHeader::HeaderSize, false);
+ rs.insertRecord(&txn, zeros, 300 - MmapV1RecordHeader::HeaderSize, false);
+ rs.insertRecord(
+ &txn, zeros, 400 - MmapV1RecordHeader::HeaderSize, false); // won't fit at end so wraps
+ rs.insertRecord(&txn, zeros, 120 - MmapV1RecordHeader::HeaderSize, false); // fits at end
+ rs.insertRecord(
+ &txn, zeros, 60 - MmapV1RecordHeader::HeaderSize, false); // fits in earlier hole
+
+ {
+ LocAndSize recs[] = {{DiskLoc(0, 1500), 300}, // 2nd insert
+ {DiskLoc(0, 1000), 400}, // 3rd (1st new)
+ {DiskLoc(0, 1800), 120}, // 4th
+ {DiskLoc(0, 1400), 60}, // 5th
+ {}};
+ LocAndSize drecs[] = {{DiskLoc(0, 1460), 40}, {DiskLoc(0, 1920), 80}, {}};
+ assertStateV1RS(&txn, recs, drecs, NULL, &em, md);
+ ASSERT_EQUALS(md->capExtent(), DiskLoc(0, 0));
+ ASSERT_EQUALS(md->capFirstNewRecord(), DiskLoc(0, 1000));
+ }
+}
- {
- LocAndSize recs[] = {
- {DiskLoc(0, 1500), 300}, // 2nd insert
- {DiskLoc(0, 1000), 400}, // 3rd (1st new)
- {DiskLoc(0, 1800), 120}, // 4th
- {DiskLoc(0, 1400), 60}, // 5th
- {}
- };
- LocAndSize drecs[] = {
- {DiskLoc(0, 1460), 40},
- {DiskLoc(0, 1920), 80},
- {}
- };
- assertStateV1RS(&txn, recs, drecs, NULL, &em, md);
- ASSERT_EQUALS(md->capExtent(), DiskLoc(0, 0));
- ASSERT_EQUALS(md->capFirstNewRecord(), DiskLoc(0, 1000));
- }
+/**
+ * This tests a specially crafted set of inserts that scrambles a capped collection in a way
+ * that leaves 4 deleted records in a single extent.
+ */
+TEST(CappedRecordStoreV1Scrambler, FourDeletedRecordsInSingleExtent) {
+ OperationContextNoop txn;
+ DummyExtentManager em;
+ DummyRecordStoreV1MetaData* md = new DummyRecordStoreV1MetaData(true, 0);
+ DummyCappedDocumentDeleteCallback cb;
+ CappedRecordStoreV1 rs(&txn, &cb, "test.foo", md, &em, false);
+
+ {
+ // Starting with a single empty 1000 byte extent.
+ LocAndSize records[] = {{}};
+ LocAndSize drecs[] = {{DiskLoc(0, 1000), 1000}, {}};
+ md->setCapExtent(&txn, DiskLoc(0, 0));
+ md->setCapFirstNewRecord(&txn, DiskLoc().setInvalid()); // unlooped
+ initializeV1RS(&txn, records, drecs, NULL, &em, md);
}
- /**
- * This tests a specially crafted set of inserts that scrambles a capped collection in a way
- * that leaves 4 deleted records in a single extent.
- */
- TEST(CappedRecordStoreV1Scrambler, FourDeletedRecordsInSingleExtent) {
- OperationContextNoop txn;
- DummyExtentManager em;
- DummyRecordStoreV1MetaData* md = new DummyRecordStoreV1MetaData( true, 0 );
- DummyCappedDocumentDeleteCallback cb;
- CappedRecordStoreV1 rs(&txn, &cb, "test.foo", md, &em, false);
+ // This list of sizes was empirically generated to achieve this outcome. Don't think too
+ // much about them.
+ rs.insertRecord(&txn, zeros, 500 - MmapV1RecordHeader::HeaderSize, false);
+ rs.insertRecord(&txn, zeros, 300 - MmapV1RecordHeader::HeaderSize, false);
+ rs.insertRecord(&txn, zeros, 304 - MmapV1RecordHeader::HeaderSize, false);
+ rs.insertRecord(&txn, zeros, 76 - MmapV1RecordHeader::HeaderSize, false);
+ rs.insertRecord(&txn, zeros, 100 - MmapV1RecordHeader::HeaderSize, false);
+ rs.insertRecord(&txn, zeros, 96 - MmapV1RecordHeader::HeaderSize, false);
+ rs.insertRecord(&txn, zeros, 76 - MmapV1RecordHeader::HeaderSize, false);
+ rs.insertRecord(&txn, zeros, 200 - MmapV1RecordHeader::HeaderSize, false);
+ rs.insertRecord(&txn, zeros, 100 - MmapV1RecordHeader::HeaderSize, false);
+ rs.insertRecord(&txn, zeros, 100 - MmapV1RecordHeader::HeaderSize, false);
+ rs.insertRecord(&txn, zeros, 200 - MmapV1RecordHeader::HeaderSize, false);
+ rs.insertRecord(&txn, zeros, 56 - MmapV1RecordHeader::HeaderSize, false);
+ rs.insertRecord(&txn, zeros, 100 - MmapV1RecordHeader::HeaderSize, false);
+ rs.insertRecord(&txn, zeros, 96 - MmapV1RecordHeader::HeaderSize, false);
+ rs.insertRecord(&txn, zeros, 104 - MmapV1RecordHeader::HeaderSize, false);
+ rs.insertRecord(&txn, zeros, 96 - MmapV1RecordHeader::HeaderSize, false);
+ rs.insertRecord(&txn, zeros, 60 - MmapV1RecordHeader::HeaderSize, false);
+ rs.insertRecord(&txn, zeros, 60 - MmapV1RecordHeader::HeaderSize, false);
+ rs.insertRecord(&txn, zeros, 146 - MmapV1RecordHeader::HeaderSize, false);
+ rs.insertRecord(&txn, zeros, 146 - MmapV1RecordHeader::HeaderSize, false);
+ rs.insertRecord(&txn, zeros, 40 - MmapV1RecordHeader::HeaderSize, false);
+ rs.insertRecord(&txn, zeros, 40 - MmapV1RecordHeader::HeaderSize, false);
+ rs.insertRecord(&txn, zeros, 36 - MmapV1RecordHeader::HeaderSize, false);
+ rs.insertRecord(&txn, zeros, 100 - MmapV1RecordHeader::HeaderSize, false);
+ rs.insertRecord(&txn, zeros, 96 - MmapV1RecordHeader::HeaderSize, false);
+ rs.insertRecord(&txn, zeros, 200 - MmapV1RecordHeader::HeaderSize, false);
+ rs.insertRecord(&txn, zeros, 60 - MmapV1RecordHeader::HeaderSize, false);
+ rs.insertRecord(&txn, zeros, 64 - MmapV1RecordHeader::HeaderSize, false);
+
+ {
+ LocAndSize recs[] = {{DiskLoc(0, 1148), 148},
+ {DiskLoc(0, 1936), 40},
+ {DiskLoc(0, 1712), 40},
+ {DiskLoc(0, 1296), 36},
+ {DiskLoc(0, 1752), 100},
+ {DiskLoc(0, 1332), 96},
+ {DiskLoc(0, 1428), 200},
+ {DiskLoc(0, 1852), 60},
+ {DiskLoc(0, 1000), 64}, // (1st new)
+ {}};
+ LocAndSize drecs[] = {{DiskLoc(0, 1064), 84},
+ {DiskLoc(0, 1976), 24},
+ {DiskLoc(0, 1912), 24},
+ {DiskLoc(0, 1628), 84},
+ {}};
+ assertStateV1RS(&txn, recs, drecs, NULL, &em, md);
+ ASSERT_EQUALS(md->capExtent(), DiskLoc(0, 0));
+ ASSERT_EQUALS(md->capFirstNewRecord(), DiskLoc(0, 1000));
+ }
+}
- {
- // Starting with a single empty 1000 byte extent.
- LocAndSize records[] = {
- {}
- };
- LocAndSize drecs[] = {
- {DiskLoc(0, 1000), 1000},
- {}
- };
- md->setCapExtent(&txn, DiskLoc(0, 0));
- md->setCapFirstNewRecord(&txn, DiskLoc().setInvalid()); // unlooped
- initializeV1RS(&txn, records, drecs, NULL, &em, md);
+//
+// The CappedRecordStoreV1QueryStage tests some nitty-gritty capped
+// collection details. Ported and polished from pdfiletests.cpp.
+//
+
+class CollscanHelper {
+public:
+ CollscanHelper(int nExtents)
+ : md(new DummyRecordStoreV1MetaData(true, 0)), rs(&txn, &cb, ns(), md, &em, false) {
+ LocAndSize recs[] = {{}};
+ LocAndSize drecs[8];
+ ASSERT_LESS_THAN(nExtents, 8);
+ for (int j = 0; j < nExtents; ++j) {
+ drecs[j].loc = DiskLoc(j, 1000);
+ drecs[j].size = 1000;
}
+ drecs[nExtents].loc = DiskLoc();
+ drecs[nExtents].size = 0;
- // This list of sizes was empirically generated to achieve this outcome. Don't think too
- // much about them.
- rs.insertRecord(&txn, zeros, 500 - MmapV1RecordHeader::HeaderSize, false);
- rs.insertRecord(&txn, zeros, 300 - MmapV1RecordHeader::HeaderSize, false);
- rs.insertRecord(&txn, zeros, 304 - MmapV1RecordHeader::HeaderSize, false);
- rs.insertRecord(&txn, zeros, 76 - MmapV1RecordHeader::HeaderSize, false);
- rs.insertRecord(&txn, zeros, 100 - MmapV1RecordHeader::HeaderSize, false);
- rs.insertRecord(&txn, zeros, 96 - MmapV1RecordHeader::HeaderSize, false);
- rs.insertRecord(&txn, zeros, 76 - MmapV1RecordHeader::HeaderSize, false);
- rs.insertRecord(&txn, zeros, 200 - MmapV1RecordHeader::HeaderSize, false);
- rs.insertRecord(&txn, zeros, 100 - MmapV1RecordHeader::HeaderSize, false);
- rs.insertRecord(&txn, zeros, 100 - MmapV1RecordHeader::HeaderSize, false);
- rs.insertRecord(&txn, zeros, 200 - MmapV1RecordHeader::HeaderSize, false);
- rs.insertRecord(&txn, zeros, 56 - MmapV1RecordHeader::HeaderSize, false);
- rs.insertRecord(&txn, zeros, 100 - MmapV1RecordHeader::HeaderSize, false);
- rs.insertRecord(&txn, zeros, 96 - MmapV1RecordHeader::HeaderSize, false);
- rs.insertRecord(&txn, zeros, 104 - MmapV1RecordHeader::HeaderSize, false);
- rs.insertRecord(&txn, zeros, 96 - MmapV1RecordHeader::HeaderSize, false);
- rs.insertRecord(&txn, zeros, 60 - MmapV1RecordHeader::HeaderSize, false);
- rs.insertRecord(&txn, zeros, 60 - MmapV1RecordHeader::HeaderSize, false);
- rs.insertRecord(&txn, zeros, 146 - MmapV1RecordHeader::HeaderSize, false);
- rs.insertRecord(&txn, zeros, 146 - MmapV1RecordHeader::HeaderSize, false);
- rs.insertRecord(&txn, zeros, 40 - MmapV1RecordHeader::HeaderSize, false);
- rs.insertRecord(&txn, zeros, 40 - MmapV1RecordHeader::HeaderSize, false);
- rs.insertRecord(&txn, zeros, 36 - MmapV1RecordHeader::HeaderSize, false);
- rs.insertRecord(&txn, zeros, 100 - MmapV1RecordHeader::HeaderSize, false);
- rs.insertRecord(&txn, zeros, 96 - MmapV1RecordHeader::HeaderSize, false);
- rs.insertRecord(&txn, zeros, 200 - MmapV1RecordHeader::HeaderSize, false);
- rs.insertRecord(&txn, zeros, 60 - MmapV1RecordHeader::HeaderSize, false);
- rs.insertRecord(&txn, zeros, 64 - MmapV1RecordHeader::HeaderSize, false);
+ md->setCapExtent(&txn, DiskLoc(0, 0));
+ md->setCapFirstNewRecord(&txn, DiskLoc().setInvalid()); // unlooped
+ initializeV1RS(&txn, recs, drecs, NULL, &em, md);
+ }
- {
- LocAndSize recs[] = {
- {DiskLoc(0, 1148), 148},
- {DiskLoc(0, 1936), 40},
- {DiskLoc(0, 1712), 40},
- {DiskLoc(0, 1296), 36},
- {DiskLoc(0, 1752), 100},
- {DiskLoc(0, 1332), 96},
- {DiskLoc(0, 1428), 200},
- {DiskLoc(0, 1852), 60},
- {DiskLoc(0, 1000), 64}, // (1st new)
- {}
- };
- LocAndSize drecs[] = {
- {DiskLoc(0, 1064), 84},
- {DiskLoc(0, 1976), 24},
- {DiskLoc(0, 1912), 24},
- {DiskLoc(0, 1628), 84},
- {}
- };
- assertStateV1RS(&txn, recs, drecs, NULL, &em, md);
- ASSERT_EQUALS(md->capExtent(), DiskLoc(0, 0));
- ASSERT_EQUALS(md->capFirstNewRecord(), DiskLoc(0, 1000));
+ // Insert bypasses standard alloc/insert routines to use the extent we want.
+ // TODO: Directly declare resulting record store state instead of procedurally creating it
+ DiskLoc insert(const DiskLoc& ext, int i) {
+ // Copied verbatim.
+ BSONObjBuilder b;
+ b.append("a", i);
+ BSONObj o = b.done();
+ int len = o.objsize();
+ Extent* e = em.getExtent(ext);
+ e = txn.recoveryUnit()->writing(e);
+ int ofs;
+ if (e->lastRecord.isNull()) {
+ ofs = ext.getOfs() + (e->_extentData - (char*)e);
+ } else {
+ ofs = e->lastRecord.getOfs() + em.recordForV1(e->lastRecord)->lengthWithHeaders();
}
+ DiskLoc dl(ext.a(), ofs);
+ MmapV1RecordHeader* r = em.recordForV1(dl);
+ r = (MmapV1RecordHeader*)txn.recoveryUnit()->writingPtr(
+ r, MmapV1RecordHeader::HeaderSize + len);
+ r->lengthWithHeaders() = MmapV1RecordHeader::HeaderSize + len;
+ r->extentOfs() = e->myLoc.getOfs();
+ r->nextOfs() = DiskLoc::NullOfs;
+ r->prevOfs() = e->lastRecord.isNull() ? DiskLoc::NullOfs : e->lastRecord.getOfs();
+ memcpy(r->data(), o.objdata(), len);
+ if (e->firstRecord.isNull())
+ e->firstRecord = dl;
+ else
+ txn.recoveryUnit()->writingInt(em.recordForV1(e->lastRecord)->nextOfs()) = ofs;
+ e->lastRecord = dl;
+ return dl;
}
- //
- // The CappedRecordStoreV1QueryStage tests some nitty-gritty capped
- // collection details. Ported and polished from pdfiletests.cpp.
- //
-
- class CollscanHelper {
- public:
- CollscanHelper(int nExtents)
- : md(new DummyRecordStoreV1MetaData( true, 0 )),
- rs(&txn, &cb, ns(), md, &em, false)
+ // TODO: Directly assert the desired record store state instead of just walking it
+ void walkAndCount(int expectedCount) {
+ // Walk the collection going forward.
{
- LocAndSize recs[] = {
- {}
- };
- LocAndSize drecs[8];
- ASSERT_LESS_THAN(nExtents, 8);
- for (int j = 0; j < nExtents; ++j) {
- drecs[j].loc = DiskLoc(j, 1000);
- drecs[j].size = 1000;
+ CappedRecordStoreV1Iterator cursor(&txn, &rs, /*forward=*/true);
+ int resultCount = 0;
+ while (auto record = cursor.next()) {
+ ++resultCount;
}
- drecs[nExtents].loc = DiskLoc();
- drecs[nExtents].size = 0;
- md->setCapExtent(&txn, DiskLoc(0, 0));
- md->setCapFirstNewRecord(&txn, DiskLoc().setInvalid()); // unlooped
- initializeV1RS(&txn, recs, drecs, NULL, &em, md);
+ ASSERT_EQUALS(resultCount, expectedCount);
}
- // Insert bypasses standard alloc/insert routines to use the extent we want.
- // TODO: Directly declare resulting record store state instead of procedurally creating it
- DiskLoc insert( const DiskLoc& ext, int i ) {
- // Copied verbatim.
- BSONObjBuilder b;
- b.append( "a", i );
- BSONObj o = b.done();
- int len = o.objsize();
- Extent *e = em.getExtent(ext);
- e = txn.recoveryUnit()->writing(e);
- int ofs;
- if ( e->lastRecord.isNull() ) {
- ofs = ext.getOfs() + ( e->_extentData - (char *)e );
- }
- else {
- ofs = e->lastRecord.getOfs()
- + em.recordForV1(e->lastRecord)->lengthWithHeaders();
- }
- DiskLoc dl( ext.a(), ofs );
- MmapV1RecordHeader *r = em.recordForV1(dl);
- r = (MmapV1RecordHeader*) txn.recoveryUnit()->writingPtr(r, MmapV1RecordHeader::HeaderSize + len);
- r->lengthWithHeaders() = MmapV1RecordHeader::HeaderSize + len;
- r->extentOfs() = e->myLoc.getOfs();
- r->nextOfs() = DiskLoc::NullOfs;
- r->prevOfs() = e->lastRecord.isNull() ? DiskLoc::NullOfs : e->lastRecord.getOfs();
- memcpy( r->data(), o.objdata(), len );
- if ( e->firstRecord.isNull() )
- e->firstRecord = dl;
- else
- txn.recoveryUnit()->writingInt(em.recordForV1(e->lastRecord)->nextOfs()) = ofs;
- e->lastRecord = dl;
- return dl;
- }
-
- // TODO: Directly assert the desired record store state instead of just walking it
- void walkAndCount (int expectedCount) {
- // Walk the collection going forward.
- {
- CappedRecordStoreV1Iterator cursor(&txn, &rs, /*forward=*/true);
- int resultCount = 0;
- while (auto record = cursor.next()) {
- ++resultCount;
- }
-
- ASSERT_EQUALS(resultCount, expectedCount);
+ // Walk the collection going backwards.
+ {
+ CappedRecordStoreV1Iterator cursor(&txn, &rs, /*forward=*/false);
+ int resultCount = expectedCount;
+ while (auto record = cursor.next()) {
+ --resultCount;
}
- // Walk the collection going backwards.
- {
- CappedRecordStoreV1Iterator cursor(&txn, &rs, /*forward=*/false);
- int resultCount = expectedCount;
- while (auto record = cursor.next()) {
- --resultCount;
- }
-
- ASSERT_EQUALS(resultCount, 0);
- }
+ ASSERT_EQUALS(resultCount, 0);
}
+ }
- static const char *ns() { return "unittests.QueryStageCollectionScanCapped"; }
+ static const char* ns() {
+ return "unittests.QueryStageCollectionScanCapped";
+ }
- OperationContextNoop txn;
- DummyRecordStoreV1MetaData* md;
- DummyExtentManager em;
+ OperationContextNoop txn;
+ DummyRecordStoreV1MetaData* md;
+ DummyExtentManager em;
- private:
- DummyCappedDocumentDeleteCallback cb;
- CappedRecordStoreV1 rs;
- };
+private:
+ DummyCappedDocumentDeleteCallback cb;
+ CappedRecordStoreV1 rs;
+};
- TEST(CappedRecordStoreV1QueryStage, CollscanCappedBase) {
- CollscanHelper h(1);
- h.walkAndCount(0);
- }
+TEST(CappedRecordStoreV1QueryStage, CollscanCappedBase) {
+ CollscanHelper h(1);
+ h.walkAndCount(0);
+}
- TEST(CappedRecordStoreV1QueryStage, CollscanEmptyLooped) {
- CollscanHelper h(1);
- h.md->setCapFirstNewRecord( &h.txn, DiskLoc() );
- h.walkAndCount(0);
- }
+TEST(CappedRecordStoreV1QueryStage, CollscanEmptyLooped) {
+ CollscanHelper h(1);
+ h.md->setCapFirstNewRecord(&h.txn, DiskLoc());
+ h.walkAndCount(0);
+}
- TEST(CappedRecordStoreV1QueryStage, CollscanEmptyMultiExtentLooped) {
- CollscanHelper h(3);
- h.md->setCapFirstNewRecord( &h.txn, DiskLoc() );
- h.walkAndCount(0);
- }
+TEST(CappedRecordStoreV1QueryStage, CollscanEmptyMultiExtentLooped) {
+ CollscanHelper h(3);
+ h.md->setCapFirstNewRecord(&h.txn, DiskLoc());
+ h.walkAndCount(0);
+}
- TEST(CappedRecordStoreV1QueryStage, CollscanSingle) {
- CollscanHelper h(1);
+TEST(CappedRecordStoreV1QueryStage, CollscanSingle) {
+ CollscanHelper h(1);
- h.md->setCapFirstNewRecord(&h.txn, h.insert( h.md->capExtent(), 0 ));
- h.walkAndCount(1);
- }
+ h.md->setCapFirstNewRecord(&h.txn, h.insert(h.md->capExtent(), 0));
+ h.walkAndCount(1);
+}
- TEST(CappedRecordStoreV1QueryStage, CollscanNewCapFirst) {
- CollscanHelper h(1);
- DiskLoc x = h.insert(h.md->capExtent(), 0 );
- h.md->setCapFirstNewRecord( &h.txn, x );
- h.insert(h.md->capExtent(), 1 );
- h.walkAndCount(2);
- }
+TEST(CappedRecordStoreV1QueryStage, CollscanNewCapFirst) {
+ CollscanHelper h(1);
+ DiskLoc x = h.insert(h.md->capExtent(), 0);
+ h.md->setCapFirstNewRecord(&h.txn, x);
+ h.insert(h.md->capExtent(), 1);
+ h.walkAndCount(2);
+}
- TEST(CappedRecordStoreV1QueryStage, CollscanNewCapMiddle) {
- CollscanHelper h(1);
- h.insert(h.md->capExtent(), 0 );
- h.md->setCapFirstNewRecord(&h.txn, h.insert( h.md->capExtent(), 1 ) );
- h.insert( h.md->capExtent(), 2 );
- h.walkAndCount(3);
- }
+TEST(CappedRecordStoreV1QueryStage, CollscanNewCapMiddle) {
+ CollscanHelper h(1);
+ h.insert(h.md->capExtent(), 0);
+ h.md->setCapFirstNewRecord(&h.txn, h.insert(h.md->capExtent(), 1));
+ h.insert(h.md->capExtent(), 2);
+ h.walkAndCount(3);
+}
- TEST(CappedRecordStoreV1QueryStage, CollscanFirstExtent) {
- CollscanHelper h(2);
- h.insert(h.md->capExtent(), 0 );
- h.insert(h.md->lastExtent(&h.txn), 1 );
- h.md->setCapFirstNewRecord(&h.txn, h.insert( h.md->capExtent(), 2 ) );
- h.insert( h.md->capExtent(), 3 );
- h.walkAndCount(4);
- }
+TEST(CappedRecordStoreV1QueryStage, CollscanFirstExtent) {
+ CollscanHelper h(2);
+ h.insert(h.md->capExtent(), 0);
+ h.insert(h.md->lastExtent(&h.txn), 1);
+ h.md->setCapFirstNewRecord(&h.txn, h.insert(h.md->capExtent(), 2));
+ h.insert(h.md->capExtent(), 3);
+ h.walkAndCount(4);
+}
- TEST(CappedRecordStoreV1QueryStage, CollscanLastExtent) {
- CollscanHelper h(2);
- h.md->setCapExtent( &h.txn, h.md->lastExtent(&h.txn) );
- h.insert( h.md->capExtent(), 0 );
- h.insert( h.md->firstExtent(&h.txn), 1 );
- h.md->setCapFirstNewRecord( &h.txn, h.insert( h.md->capExtent(), 2 ) );
- h.insert( h.md->capExtent(), 3 );
- h.walkAndCount(4);
- }
+TEST(CappedRecordStoreV1QueryStage, CollscanLastExtent) {
+ CollscanHelper h(2);
+ h.md->setCapExtent(&h.txn, h.md->lastExtent(&h.txn));
+ h.insert(h.md->capExtent(), 0);
+ h.insert(h.md->firstExtent(&h.txn), 1);
+ h.md->setCapFirstNewRecord(&h.txn, h.insert(h.md->capExtent(), 2));
+ h.insert(h.md->capExtent(), 3);
+ h.walkAndCount(4);
+}
- TEST(CappedRecordStoreV1QueryStage, CollscanMidExtent) {
- CollscanHelper h(3);
- h.md->setCapExtent( &h.txn, h.em.getExtent(h.md->firstExtent(&h.txn))->xnext );
- h.insert( h.md->capExtent(), 0 );
- h.insert( h.md->lastExtent(&h.txn), 1 );
- h.insert( h.md->firstExtent(&h.txn), 2 );
- h.md->setCapFirstNewRecord( &h.txn, h.insert( h.md->capExtent(), 3 ) );
- h.insert( h.md->capExtent(), 4 );
- h.walkAndCount(5);
- }
+TEST(CappedRecordStoreV1QueryStage, CollscanMidExtent) {
+ CollscanHelper h(3);
+ h.md->setCapExtent(&h.txn, h.em.getExtent(h.md->firstExtent(&h.txn))->xnext);
+ h.insert(h.md->capExtent(), 0);
+ h.insert(h.md->lastExtent(&h.txn), 1);
+ h.insert(h.md->firstExtent(&h.txn), 2);
+ h.md->setCapFirstNewRecord(&h.txn, h.insert(h.md->capExtent(), 3));
+ h.insert(h.md->capExtent(), 4);
+ h.walkAndCount(5);
+}
- TEST(CappedRecordStoreV1QueryStage, CollscanAloneInExtent) {
- CollscanHelper h(3);
- h.md->setCapExtent( &h.txn, h.em.getExtent(h.md->firstExtent(&h.txn))->xnext );
- h.insert( h.md->lastExtent(&h.txn), 0 );
- h.insert( h.md->firstExtent(&h.txn), 1 );
- h.md->setCapFirstNewRecord( &h.txn, h.insert( h.md->capExtent(), 2 ) );
- h.walkAndCount(3);
- }
+TEST(CappedRecordStoreV1QueryStage, CollscanAloneInExtent) {
+ CollscanHelper h(3);
+ h.md->setCapExtent(&h.txn, h.em.getExtent(h.md->firstExtent(&h.txn))->xnext);
+ h.insert(h.md->lastExtent(&h.txn), 0);
+ h.insert(h.md->firstExtent(&h.txn), 1);
+ h.md->setCapFirstNewRecord(&h.txn, h.insert(h.md->capExtent(), 2));
+ h.walkAndCount(3);
+}
- TEST(CappedRecordStoreV1QueryStage, CollscanFirstInExtent) {
- CollscanHelper h(3);
- h.md->setCapExtent( &h.txn, h.em.getExtent(h.md->firstExtent(&h.txn))->xnext );
- h.insert( h.md->lastExtent(&h.txn), 0 );
- h.insert( h.md->firstExtent(&h.txn), 1 );
- h.md->setCapFirstNewRecord( &h.txn, h.insert( h.md->capExtent(), 2 ) );
- h.insert( h.md->capExtent(), 3 );
- h.walkAndCount(4);
- }
+TEST(CappedRecordStoreV1QueryStage, CollscanFirstInExtent) {
+ CollscanHelper h(3);
+ h.md->setCapExtent(&h.txn, h.em.getExtent(h.md->firstExtent(&h.txn))->xnext);
+ h.insert(h.md->lastExtent(&h.txn), 0);
+ h.insert(h.md->firstExtent(&h.txn), 1);
+ h.md->setCapFirstNewRecord(&h.txn, h.insert(h.md->capExtent(), 2));
+ h.insert(h.md->capExtent(), 3);
+ h.walkAndCount(4);
+}
- TEST(CappedRecordStoreV1QueryStage, CollscanLastInExtent) {
- CollscanHelper h(3);
- h.md->setCapExtent( &h.txn, h.em.getExtent(h.md->firstExtent(&h.txn))->xnext );
- h.insert( h.md->capExtent(), 0 );
- h.insert( h.md->lastExtent(&h.txn), 1 );
- h.insert( h.md->firstExtent(&h.txn), 2 );
- h.md->setCapFirstNewRecord( &h.txn, h.insert( h.md->capExtent(), 3 ) );
- h.walkAndCount(4);
- }
+TEST(CappedRecordStoreV1QueryStage, CollscanLastInExtent) {
+ CollscanHelper h(3);
+ h.md->setCapExtent(&h.txn, h.em.getExtent(h.md->firstExtent(&h.txn))->xnext);
+ h.insert(h.md->capExtent(), 0);
+ h.insert(h.md->lastExtent(&h.txn), 1);
+ h.insert(h.md->firstExtent(&h.txn), 2);
+ h.md->setCapFirstNewRecord(&h.txn, h.insert(h.md->capExtent(), 3));
+ h.walkAndCount(4);
+}
}
diff --git a/src/mongo/db/storage/mmap_v1/record_store_v1_repair_iterator.cpp b/src/mongo/db/storage/mmap_v1/record_store_v1_repair_iterator.cpp
index a4cb9977fe3..728f07d6013 100644
--- a/src/mongo/db/storage/mmap_v1/record_store_v1_repair_iterator.cpp
+++ b/src/mongo/db/storage/mmap_v1/record_store_v1_repair_iterator.cpp
@@ -38,49 +38,47 @@
namespace mongo {
- using std::endl;
-
- RecordStoreV1RepairCursor::RecordStoreV1RepairCursor(OperationContext* txn,
- const RecordStoreV1Base* recordStore)
- : _txn(txn), _recordStore(recordStore), _stage(FORWARD_SCAN) {
-
- // Position the iterator at the first record
- //
- advance();
- }
-
- boost::optional<Record> RecordStoreV1RepairCursor::next() {
- if (_currRecord.isNull()) return {};
- auto out = _currRecord.toRecordId();
- advance();
- return {{out, _recordStore->dataFor(_txn, out)}};
- }
-
- boost::optional<Record> RecordStoreV1RepairCursor::seekExact(const RecordId& id) {
- invariant(!"seekExact not supported");
- }
-
- void RecordStoreV1RepairCursor::advance() {
- const ExtentManager* em = _recordStore->_extentManager;
-
- while (true) {
- if (_currRecord.isNull()) {
-
- if (!_advanceToNextValidExtent()) {
- return;
- }
+using std::endl;
+
+RecordStoreV1RepairCursor::RecordStoreV1RepairCursor(OperationContext* txn,
+ const RecordStoreV1Base* recordStore)
+ : _txn(txn), _recordStore(recordStore), _stage(FORWARD_SCAN) {
+ // Position the iterator at the first record
+ //
+ advance();
+}
+
+boost::optional<Record> RecordStoreV1RepairCursor::next() {
+ if (_currRecord.isNull())
+ return {};
+ auto out = _currRecord.toRecordId();
+ advance();
+ return {{out, _recordStore->dataFor(_txn, out)}};
+}
+
+boost::optional<Record> RecordStoreV1RepairCursor::seekExact(const RecordId& id) {
+ invariant(!"seekExact not supported");
+}
+
+void RecordStoreV1RepairCursor::advance() {
+ const ExtentManager* em = _recordStore->_extentManager;
+
+ while (true) {
+ if (_currRecord.isNull()) {
+ if (!_advanceToNextValidExtent()) {
+ return;
+ }
- _seenInCurrentExtent.clear();
+ _seenInCurrentExtent.clear();
- // Otherwise _advanceToNextValidExtent would have returned false
- //
- invariant(!_currExtent.isNull());
+ // Otherwise _advanceToNextValidExtent would have returned false
+ //
+ invariant(!_currExtent.isNull());
- const Extent* e = em->getExtent(_currExtent, false);
- _currRecord = (FORWARD_SCAN == _stage ? e->firstRecord : e->lastRecord);
- }
- else {
- switch (_stage) {
+ const Extent* e = em->getExtent(_currExtent, false);
+ _currRecord = (FORWARD_SCAN == _stage ? e->firstRecord : e->lastRecord);
+ } else {
+ switch (_stage) {
case FORWARD_SCAN:
_currRecord = _recordStore->getNextRecordInExtent(_txn, _currRecord);
break;
@@ -90,37 +88,37 @@ namespace mongo {
default:
invariant(!"This should never be reached.");
break;
- }
- }
-
- if (_currRecord.isNull()) {
- continue;
}
+ }
- // Validate the contents of the record's disk location and deduplicate
- //
- if (!_seenInCurrentExtent.insert(_currRecord).second) {
- error() << "infinite loop in extent, seen: " << _currRecord << " before" << endl;
- _currRecord = DiskLoc();
- continue;
- }
+ if (_currRecord.isNull()) {
+ continue;
+ }
- if (_currRecord.getOfs() <= 0){
- error() << "offset is 0 for record which should be impossible" << endl;
- _currRecord = DiskLoc();
- continue;
- }
+ // Validate the contents of the record's disk location and deduplicate
+ //
+ if (!_seenInCurrentExtent.insert(_currRecord).second) {
+ error() << "infinite loop in extent, seen: " << _currRecord << " before" << endl;
+ _currRecord = DiskLoc();
+ continue;
+ }
- return;
+ if (_currRecord.getOfs() <= 0) {
+ error() << "offset is 0 for record which should be impossible" << endl;
+ _currRecord = DiskLoc();
+ continue;
}
+
+ return;
}
+}
- bool RecordStoreV1RepairCursor::_advanceToNextValidExtent() {
- const ExtentManager* em = _recordStore->_extentManager;
+bool RecordStoreV1RepairCursor::_advanceToNextValidExtent() {
+ const ExtentManager* em = _recordStore->_extentManager;
- while (true) {
- if (_currExtent.isNull()) {
- switch (_stage) {
+ while (true) {
+ if (_currExtent.isNull()) {
+ switch (_stage) {
case FORWARD_SCAN:
_currExtent = _recordStore->details()->firstExtent(_txn);
break;
@@ -130,35 +128,34 @@ namespace mongo {
default:
invariant(DONE == _stage);
return false;
- }
- }
- else {
- // If _currExtent is not NULL, then it must point to a valid extent, so no extra
- // checks here.
- //
- const Extent* e = em->getExtent(_currExtent, false);
- _currExtent = (FORWARD_SCAN == _stage ? e->xnext : e->xprev);
}
-
- bool hasNextExtent = !_currExtent.isNull();
-
- // Sanity checks for the extent's disk location
+ } else {
+ // If _currExtent is not NULL, then it must point to a valid extent, so no extra
+ // checks here.
//
- if (hasNextExtent && (!_currExtent.isValid() || (_currExtent.getOfs() < 0))) {
- error() << "Invalid extent location: " << _currExtent << endl;
+ const Extent* e = em->getExtent(_currExtent, false);
+ _currExtent = (FORWARD_SCAN == _stage ? e->xnext : e->xprev);
+ }
- // Switch the direction of scan
- //
- hasNextExtent = false;
- }
+ bool hasNextExtent = !_currExtent.isNull();
- if (hasNextExtent) {
- break;
- }
+ // Sanity checks for the extent's disk location
+ //
+ if (hasNextExtent && (!_currExtent.isValid() || (_currExtent.getOfs() < 0))) {
+ error() << "Invalid extent location: " << _currExtent << endl;
- // Swap the direction of scan and loop again
+ // Switch the direction of scan
//
- switch (_stage) {
+ hasNextExtent = false;
+ }
+
+ if (hasNextExtent) {
+ break;
+ }
+
+ // Swap the direction of scan and loop again
+ //
+ switch (_stage) {
case FORWARD_SCAN:
_stage = BACKWARD_SCAN;
break;
@@ -168,49 +165,48 @@ namespace mongo {
default:
invariant(!"This should never be reached.");
break;
- }
-
- _currExtent = DiskLoc();
}
+ _currExtent = DiskLoc();
+ }
- // Check _currExtent's contents for validity, but do not count is as failure if they
- // don't check out.
- //
- const Extent* e = em->getExtent(_currExtent, false);
- if (!e->isOk()){
- warning() << "Extent not ok magic: " << e->magic << " going to try to continue"
- << endl;
- }
-
- log() << (FORWARD_SCAN == _stage ? "FORWARD" : "BACKWARD") << " Extent loc: "
- << _currExtent << ", length: " << e->length << endl;
- return true;
+ // Check _currExtent's contents for validity, but do not count is as failure if they
+ // don't check out.
+ //
+ const Extent* e = em->getExtent(_currExtent, false);
+ if (!e->isOk()) {
+ warning() << "Extent not ok magic: " << e->magic << " going to try to continue" << endl;
}
- void RecordStoreV1RepairCursor::invalidate(const RecordId& id) {
- // If we see this record again it probably means it was reinserted rather than an infinite
- // loop. If we do loop, we should quickly hit another seen record that hasn't been
- // invalidated.
- DiskLoc dl = DiskLoc::fromRecordId(id);
- _seenInCurrentExtent.erase(dl);
+ log() << (FORWARD_SCAN == _stage ? "FORWARD" : "BACKWARD") << " Extent loc: " << _currExtent
+ << ", length: " << e->length << endl;
- if (_currRecord == dl) {
- // The DiskLoc being invalidated is also the one pointed at by this iterator. We
- // advance the iterator so it's not pointing at invalid data.
- advance();
+ return true;
+}
- if (_currRecord == dl) {
- // Even after advancing the iterator, we're still pointing at the DiskLoc being
- // invalidated. This is expected when 'dl' is the last DiskLoc in the FORWARD scan,
- // and the initial call to getNext() moves the iterator to the first loc in the
- // BACKWARDS scan.
- advance();
- }
+void RecordStoreV1RepairCursor::invalidate(const RecordId& id) {
+ // If we see this record again it probably means it was reinserted rather than an infinite
+ // loop. If we do loop, we should quickly hit another seen record that hasn't been
+ // invalidated.
+ DiskLoc dl = DiskLoc::fromRecordId(id);
+ _seenInCurrentExtent.erase(dl);
+
+ if (_currRecord == dl) {
+ // The DiskLoc being invalidated is also the one pointed at by this iterator. We
+ // advance the iterator so it's not pointing at invalid data.
+ advance();
- invariant(_currRecord != dl);
+ if (_currRecord == dl) {
+ // Even after advancing the iterator, we're still pointing at the DiskLoc being
+ // invalidated. This is expected when 'dl' is the last DiskLoc in the FORWARD scan,
+ // and the initial call to getNext() moves the iterator to the first loc in the
+ // BACKWARDS scan.
+ advance();
}
+
+ invariant(_currRecord != dl);
}
+}
} // namespace mongo
diff --git a/src/mongo/db/storage/mmap_v1/record_store_v1_repair_iterator.h b/src/mongo/db/storage/mmap_v1/record_store_v1_repair_iterator.h
index 6b93ad5941a..def5178ad8e 100644
--- a/src/mongo/db/storage/mmap_v1/record_store_v1_repair_iterator.h
+++ b/src/mongo/db/storage/mmap_v1/record_store_v1_repair_iterator.h
@@ -35,63 +35,60 @@
namespace mongo {
+/**
+ * This iterator will go over the collection twice - once going forward (first extent -> last
+ * extent) and once backwards in an attempt to salvage potentially corrupted or unreachable
+ * records. It is used by the mongodump --repair option.
+ */
+class RecordStoreV1RepairCursor final : public RecordCursor {
+public:
+ RecordStoreV1RepairCursor(OperationContext* txn, const RecordStoreV1Base* recordStore);
+
+ boost::optional<Record> next() final;
+ boost::optional<Record> seekExact(const RecordId& id) final;
+ void invalidate(const RecordId& dl);
+ void savePositioned() final {
+ _txn = nullptr;
+ }
+ bool restore(OperationContext* txn) final {
+ _txn = txn;
+ return true;
+ }
+
+ // Explicitly not supporting fetcherForNext(). The expected use case for this class is a
+ // special offline operation where there are no concurrent operations, so it would be better
+ // to take the pagefault inline with the operation.
+
+private:
+ void advance();
+
/**
- * This iterator will go over the collection twice - once going forward (first extent -> last
- * extent) and once backwards in an attempt to salvage potentially corrupted or unreachable
- * records. It is used by the mongodump --repair option.
+ * Based on the direction of scan, finds the next valid (un-corrupted) extent in the chain
+ * and sets _currExtent to point to that.
+ *
+ * @return true if valid extent was found (_currExtent will not be null)
+ * false otherwise and _currExtent will be null
*/
- class RecordStoreV1RepairCursor final : public RecordCursor {
- public:
- RecordStoreV1RepairCursor(OperationContext* txn,
- const RecordStoreV1Base* recordStore);
-
- boost::optional<Record> next() final;
- boost::optional<Record> seekExact(const RecordId& id) final;
- void invalidate(const RecordId& dl);
- void savePositioned() final { _txn = nullptr; }
- bool restore(OperationContext* txn) final {
- _txn = txn;
- return true;
- }
-
- // Explicitly not supporting fetcherForNext(). The expected use case for this class is a
- // special offline operation where there are no concurrent operations, so it would be better
- // to take the pagefault inline with the operation.
-
- private:
- void advance();
-
- /**
- * Based on the direction of scan, finds the next valid (un-corrupted) extent in the chain
- * and sets _currExtent to point to that.
- *
- * @return true if valid extent was found (_currExtent will not be null)
- * false otherwise and _currExtent will be null
- */
- bool _advanceToNextValidExtent();
-
- // transactional context for read locks. Not owned by us
- OperationContext* _txn;
-
- // Reference to the owning RecordStore. The store must not be deleted while there are
- // active iterators on it.
- //
- const RecordStoreV1Base* _recordStore;
-
- DiskLoc _currExtent;
- DiskLoc _currRecord;
-
- enum Stage {
- FORWARD_SCAN = 0,
- BACKWARD_SCAN = 1,
- DONE = 2
- };
-
- Stage _stage;
-
- // Used to find cycles within an extent. Cleared after each extent has been processed.
- //
- std::set<DiskLoc> _seenInCurrentExtent;
- };
+ bool _advanceToNextValidExtent();
+
+ // transactional context for read locks. Not owned by us
+ OperationContext* _txn;
+
+ // Reference to the owning RecordStore. The store must not be deleted while there are
+ // active iterators on it.
+ //
+ const RecordStoreV1Base* _recordStore;
+
+ DiskLoc _currExtent;
+ DiskLoc _currRecord;
+
+ enum Stage { FORWARD_SCAN = 0, BACKWARD_SCAN = 1, DONE = 2 };
+
+ Stage _stage;
+
+ // Used to find cycles within an extent. Cleared after each extent has been processed.
+ //
+ std::set<DiskLoc> _seenInCurrentExtent;
+};
} // namespace mongo
diff --git a/src/mongo/db/storage/mmap_v1/record_store_v1_simple.cpp b/src/mongo/db/storage/mmap_v1/record_store_v1_simple.cpp
index 029883254bd..5948553b9af 100644
--- a/src/mongo/db/storage/mmap_v1/record_store_v1_simple.cpp
+++ b/src/mongo/db/storage/mmap_v1/record_store_v1_simple.cpp
@@ -53,447 +53,431 @@
namespace mongo {
- using std::endl;
- using std::vector;
-
- static Counter64 freelistAllocs;
- static Counter64 freelistBucketExhausted;
- static Counter64 freelistIterations;
-
- // TODO figure out what to do about these.
- static ServerStatusMetricField<Counter64> dFreelist1( "storage.freelist.search.requests",
- &freelistAllocs );
-
- static ServerStatusMetricField<Counter64> dFreelist2( "storage.freelist.search.bucketExhausted",
- &freelistBucketExhausted );
-
- static ServerStatusMetricField<Counter64> dFreelist3( "storage.freelist.search.scanned",
- &freelistIterations );
-
- SimpleRecordStoreV1::SimpleRecordStoreV1( OperationContext* txn,
- StringData ns,
- RecordStoreV1MetaData* details,
- ExtentManager* em,
- bool isSystemIndexes )
- : RecordStoreV1Base( ns, details, em, isSystemIndexes ) {
-
- invariant( !details->isCapped() );
- _normalCollection = NamespaceString::normal( ns );
- }
+using std::endl;
+using std::vector;
+
+static Counter64 freelistAllocs;
+static Counter64 freelistBucketExhausted;
+static Counter64 freelistIterations;
+
+// TODO figure out what to do about these.
+static ServerStatusMetricField<Counter64> dFreelist1("storage.freelist.search.requests",
+ &freelistAllocs);
+
+static ServerStatusMetricField<Counter64> dFreelist2("storage.freelist.search.bucketExhausted",
+ &freelistBucketExhausted);
+
+static ServerStatusMetricField<Counter64> dFreelist3("storage.freelist.search.scanned",
+ &freelistIterations);
+
+SimpleRecordStoreV1::SimpleRecordStoreV1(OperationContext* txn,
+ StringData ns,
+ RecordStoreV1MetaData* details,
+ ExtentManager* em,
+ bool isSystemIndexes)
+ : RecordStoreV1Base(ns, details, em, isSystemIndexes) {
+ invariant(!details->isCapped());
+ _normalCollection = NamespaceString::normal(ns);
+}
- SimpleRecordStoreV1::~SimpleRecordStoreV1() {
+SimpleRecordStoreV1::~SimpleRecordStoreV1() {}
+
+DiskLoc SimpleRecordStoreV1::_allocFromExistingExtents(OperationContext* txn, int lenToAllocRaw) {
+ // Slowly drain the deletedListLegacyGrabBag by popping one record off and putting it in the
+ // correct deleted list each time we try to allocate a new record. This ensures we won't
+ // orphan any data when upgrading from old versions, without needing a long upgrade phase.
+ // This is done before we try to allocate the new record so we can take advantage of the new
+ // space immediately.
+ {
+ const DiskLoc head = _details->deletedListLegacyGrabBag();
+ if (!head.isNull()) {
+ _details->setDeletedListLegacyGrabBag(txn, drec(head)->nextDeleted());
+ addDeletedRec(txn, head);
+ }
}
- DiskLoc SimpleRecordStoreV1::_allocFromExistingExtents( OperationContext* txn,
- int lenToAllocRaw ) {
-
- // Slowly drain the deletedListLegacyGrabBag by popping one record off and putting it in the
- // correct deleted list each time we try to allocate a new record. This ensures we won't
- // orphan any data when upgrading from old versions, without needing a long upgrade phase.
- // This is done before we try to allocate the new record so we can take advantage of the new
- // space immediately.
- {
- const DiskLoc head = _details->deletedListLegacyGrabBag();
- if (!head.isNull()) {
- _details->setDeletedListLegacyGrabBag(txn, drec(head)->nextDeleted());
- addDeletedRec(txn, head);
+ // align size up to a multiple of 4
+ const int lenToAlloc = (lenToAllocRaw + (4 - 1)) & ~(4 - 1);
+
+ freelistAllocs.increment();
+ DiskLoc loc;
+ DeletedRecord* dr = NULL;
+ {
+ int myBucket;
+ for (myBucket = bucket(lenToAlloc); myBucket < Buckets; myBucket++) {
+ // Only look at the first entry in each bucket. This works because we are either
+ // quantizing or allocating fixed-size blocks.
+ const DiskLoc head = _details->deletedListEntry(myBucket);
+ if (head.isNull())
+ continue;
+ DeletedRecord* const candidate = drec(head);
+ if (candidate->lengthWithHeaders() >= lenToAlloc) {
+ loc = head;
+ dr = candidate;
+ break;
}
}
- // align size up to a multiple of 4
- const int lenToAlloc = (lenToAllocRaw + (4-1)) & ~(4-1);
+ if (!dr)
+ return DiskLoc(); // no space
- freelistAllocs.increment();
- DiskLoc loc;
- DeletedRecord* dr = NULL;
- {
-
- int myBucket;
- for (myBucket = bucket(lenToAlloc); myBucket < Buckets; myBucket++) {
- // Only look at the first entry in each bucket. This works because we are either
- // quantizing or allocating fixed-size blocks.
- const DiskLoc head = _details->deletedListEntry(myBucket);
- if (head.isNull()) continue;
- DeletedRecord* const candidate = drec(head);
- if (candidate->lengthWithHeaders() >= lenToAlloc) {
- loc = head;
- dr = candidate;
- break;
- }
- }
-
- if (!dr)
- return DiskLoc(); // no space
-
- // Unlink ourself from the deleted list
- _details->setDeletedListEntry(txn, myBucket, dr->nextDeleted());
- *txn->recoveryUnit()->writing(&dr->nextDeleted()) = DiskLoc().setInvalid(); // defensive
- }
+ // Unlink ourself from the deleted list
+ _details->setDeletedListEntry(txn, myBucket, dr->nextDeleted());
+ *txn->recoveryUnit()->writing(&dr->nextDeleted()) = DiskLoc().setInvalid(); // defensive
+ }
- invariant( dr->extentOfs() < loc.getOfs() );
+ invariant(dr->extentOfs() < loc.getOfs());
- // Split the deleted record if it has at least as much left over space as our smallest
- // allocation size. Otherwise, just take the whole DeletedRecord.
- const int remainingLength = dr->lengthWithHeaders() - lenToAlloc;
- if (remainingLength >= bucketSizes[0]) {
- txn->recoveryUnit()->writingInt(dr->lengthWithHeaders()) = lenToAlloc;
- const DiskLoc newDelLoc = DiskLoc(loc.a(), loc.getOfs() + lenToAlloc);
- DeletedRecord* newDel = txn->recoveryUnit()->writing(drec(newDelLoc));
- newDel->extentOfs() = dr->extentOfs();
- newDel->lengthWithHeaders() = remainingLength;
- newDel->nextDeleted().Null();
+ // Split the deleted record if it has at least as much left over space as our smallest
+ // allocation size. Otherwise, just take the whole DeletedRecord.
+ const int remainingLength = dr->lengthWithHeaders() - lenToAlloc;
+ if (remainingLength >= bucketSizes[0]) {
+ txn->recoveryUnit()->writingInt(dr->lengthWithHeaders()) = lenToAlloc;
+ const DiskLoc newDelLoc = DiskLoc(loc.a(), loc.getOfs() + lenToAlloc);
+ DeletedRecord* newDel = txn->recoveryUnit()->writing(drec(newDelLoc));
+ newDel->extentOfs() = dr->extentOfs();
+ newDel->lengthWithHeaders() = remainingLength;
+ newDel->nextDeleted().Null();
- addDeletedRec(txn, newDelLoc);
- }
-
- return loc;
+ addDeletedRec(txn, newDelLoc);
}
- StatusWith<DiskLoc> SimpleRecordStoreV1::allocRecord( OperationContext* txn,
- int lengthWithHeaders,
- bool enforceQuota ) {
- if (lengthWithHeaders > MaxAllowedAllocation) {
- return StatusWith<DiskLoc>(
- ErrorCodes::InvalidLength,
- str::stream() << "Attempting to allocate a record larger than maximum size: "
- << lengthWithHeaders << " > 16.5MB");
- }
+ return loc;
+}
- DiskLoc loc = _allocFromExistingExtents( txn, lengthWithHeaders );
- if ( !loc.isNull() )
- return StatusWith<DiskLoc>( loc );
+StatusWith<DiskLoc> SimpleRecordStoreV1::allocRecord(OperationContext* txn,
+ int lengthWithHeaders,
+ bool enforceQuota) {
+ if (lengthWithHeaders > MaxAllowedAllocation) {
+ return StatusWith<DiskLoc>(
+ ErrorCodes::InvalidLength,
+ str::stream() << "Attempting to allocate a record larger than maximum size: "
+ << lengthWithHeaders << " > 16.5MB");
+ }
- LOG(1) << "allocating new extent";
+ DiskLoc loc = _allocFromExistingExtents(txn, lengthWithHeaders);
+ if (!loc.isNull())
+ return StatusWith<DiskLoc>(loc);
- increaseStorageSize( txn,
- _extentManager->followupSize( lengthWithHeaders,
- _details->lastExtentSize(txn)),
- enforceQuota );
+ LOG(1) << "allocating new extent";
- loc = _allocFromExistingExtents( txn, lengthWithHeaders );
- if ( !loc.isNull() ) {
- // got on first try
- return StatusWith<DiskLoc>( loc );
- }
+ increaseStorageSize(
+ txn,
+ _extentManager->followupSize(lengthWithHeaders, _details->lastExtentSize(txn)),
+ enforceQuota);
- log() << "warning: alloc() failed after allocating new extent. "
- << "lengthWithHeaders: " << lengthWithHeaders << " last extent size:"
- << _details->lastExtentSize(txn) << "; trying again";
+ loc = _allocFromExistingExtents(txn, lengthWithHeaders);
+ if (!loc.isNull()) {
+ // got on first try
+ return StatusWith<DiskLoc>(loc);
+ }
- for ( int z = 0; z < 10 && lengthWithHeaders > _details->lastExtentSize(txn); z++ ) {
- log() << "try #" << z << endl;
+ log() << "warning: alloc() failed after allocating new extent. "
+ << "lengthWithHeaders: " << lengthWithHeaders
+ << " last extent size:" << _details->lastExtentSize(txn) << "; trying again";
- increaseStorageSize( txn,
- _extentManager->followupSize( lengthWithHeaders,
- _details->lastExtentSize(txn)),
- enforceQuota );
+ for (int z = 0; z < 10 && lengthWithHeaders > _details->lastExtentSize(txn); z++) {
+ log() << "try #" << z << endl;
- loc = _allocFromExistingExtents( txn, lengthWithHeaders );
- if ( ! loc.isNull() )
- return StatusWith<DiskLoc>( loc );
- }
+ increaseStorageSize(
+ txn,
+ _extentManager->followupSize(lengthWithHeaders, _details->lastExtentSize(txn)),
+ enforceQuota);
- return StatusWith<DiskLoc>( ErrorCodes::InternalError, "cannot allocate space" );
+ loc = _allocFromExistingExtents(txn, lengthWithHeaders);
+ if (!loc.isNull())
+ return StatusWith<DiskLoc>(loc);
}
- Status SimpleRecordStoreV1::truncate(OperationContext* txn) {
- const DiskLoc firstExtLoc = _details->firstExtent(txn);
- if (firstExtLoc.isNull() || !firstExtLoc.isValid()) {
- // Already empty
- return Status::OK();
- }
-
- // Free all extents except the first.
- Extent* firstExt = _extentManager->getExtent(firstExtLoc);
- if (!firstExt->xnext.isNull()) {
- const DiskLoc extNextLoc = firstExt->xnext;
- const DiskLoc oldLastExtLoc = _details->lastExtent(txn);
- Extent* const nextExt = _extentManager->getExtent(extNextLoc);
+ return StatusWith<DiskLoc>(ErrorCodes::InternalError, "cannot allocate space");
+}
- // Unlink other extents;
- *txn->recoveryUnit()->writing(&nextExt->xprev) = DiskLoc();
- *txn->recoveryUnit()->writing(&firstExt->xnext) = DiskLoc();
- _details->setLastExtent(txn, firstExtLoc);
- _details->setLastExtentSize(txn, firstExt->length);
+Status SimpleRecordStoreV1::truncate(OperationContext* txn) {
+ const DiskLoc firstExtLoc = _details->firstExtent(txn);
+ if (firstExtLoc.isNull() || !firstExtLoc.isValid()) {
+ // Already empty
+ return Status::OK();
+ }
- _extentManager->freeExtents(txn, extNextLoc, oldLastExtLoc);
- }
+ // Free all extents except the first.
+ Extent* firstExt = _extentManager->getExtent(firstExtLoc);
+ if (!firstExt->xnext.isNull()) {
+ const DiskLoc extNextLoc = firstExt->xnext;
+ const DiskLoc oldLastExtLoc = _details->lastExtent(txn);
+ Extent* const nextExt = _extentManager->getExtent(extNextLoc);
- // Make the first (now only) extent a single large deleted record.
- *txn->recoveryUnit()->writing(&firstExt->firstRecord) = DiskLoc();
- *txn->recoveryUnit()->writing(&firstExt->lastRecord) = DiskLoc();
- _details->orphanDeletedList(txn);
- addDeletedRec(txn, _findFirstSpot(txn, firstExtLoc, firstExt));
+ // Unlink other extents;
+ *txn->recoveryUnit()->writing(&nextExt->xprev) = DiskLoc();
+ *txn->recoveryUnit()->writing(&firstExt->xnext) = DiskLoc();
+ _details->setLastExtent(txn, firstExtLoc);
+ _details->setLastExtentSize(txn, firstExt->length);
- // Make stats reflect that there are now no documents in this record store.
- _details->setStats(txn, 0, 0);
-
- return Status::OK();
+ _extentManager->freeExtents(txn, extNextLoc, oldLastExtLoc);
}
- void SimpleRecordStoreV1::addDeletedRec( OperationContext* txn, const DiskLoc& dloc ) {
- DeletedRecord* d = drec( dloc );
+ // Make the first (now only) extent a single large deleted record.
+ *txn->recoveryUnit()->writing(&firstExt->firstRecord) = DiskLoc();
+ *txn->recoveryUnit()->writing(&firstExt->lastRecord) = DiskLoc();
+ _details->orphanDeletedList(txn);
+ addDeletedRec(txn, _findFirstSpot(txn, firstExtLoc, firstExt));
- int b = bucket(d->lengthWithHeaders());
- *txn->recoveryUnit()->writing(&d->nextDeleted()) = _details->deletedListEntry(b);
- _details->setDeletedListEntry(txn, b, dloc);
- }
+ // Make stats reflect that there are now no documents in this record store.
+ _details->setStats(txn, 0, 0);
- std::unique_ptr<RecordCursor> SimpleRecordStoreV1::getCursor(OperationContext* txn,
- bool forward) const {
- return stdx::make_unique<SimpleRecordStoreV1Iterator>( txn, this, forward );
- }
+ return Status::OK();
+}
- vector<std::unique_ptr<RecordCursor>> SimpleRecordStoreV1::getManyCursors(
- OperationContext* txn) const {
- vector<std::unique_ptr<RecordCursor>> cursors;
- const Extent* ext;
- for (DiskLoc extLoc = details()->firstExtent(txn); !extLoc.isNull(); extLoc = ext->xnext) {
- ext = _getExtent(txn, extLoc);
- if (ext->firstRecord.isNull())
- continue;
- cursors.push_back(
- stdx::make_unique<RecordStoreV1Base::IntraExtentIterator>(txn,
- ext->firstRecord,
- this));
- }
+void SimpleRecordStoreV1::addDeletedRec(OperationContext* txn, const DiskLoc& dloc) {
+ DeletedRecord* d = drec(dloc);
- return cursors;
- }
+ int b = bucket(d->lengthWithHeaders());
+ *txn->recoveryUnit()->writing(&d->nextDeleted()) = _details->deletedListEntry(b);
+ _details->setDeletedListEntry(txn, b, dloc);
+}
- class CompactDocWriter : public DocWriter {
- public:
- /**
- * param allocationSize - allocation size WITH header
- */
- CompactDocWriter( const MmapV1RecordHeader* rec, unsigned dataSize, size_t allocationSize )
- : _rec( rec ),
- _dataSize( dataSize ),
- _allocationSize( allocationSize ) {
- }
+std::unique_ptr<RecordCursor> SimpleRecordStoreV1::getCursor(OperationContext* txn,
+ bool forward) const {
+ return stdx::make_unique<SimpleRecordStoreV1Iterator>(txn, this, forward);
+}
- virtual ~CompactDocWriter() {}
+vector<std::unique_ptr<RecordCursor>> SimpleRecordStoreV1::getManyCursors(
+ OperationContext* txn) const {
+ vector<std::unique_ptr<RecordCursor>> cursors;
+ const Extent* ext;
+ for (DiskLoc extLoc = details()->firstExtent(txn); !extLoc.isNull(); extLoc = ext->xnext) {
+ ext = _getExtent(txn, extLoc);
+ if (ext->firstRecord.isNull())
+ continue;
+ cursors.push_back(
+ stdx::make_unique<RecordStoreV1Base::IntraExtentIterator>(txn, ext->firstRecord, this));
+ }
- virtual void writeDocument( char* buf ) const {
- memcpy( buf, _rec->data(), _dataSize );
- }
+ return cursors;
+}
- virtual size_t documentSize() const {
- return _allocationSize - MmapV1RecordHeader::HeaderSize;
- }
+class CompactDocWriter : public DocWriter {
+public:
+ /**
+ * param allocationSize - allocation size WITH header
+ */
+ CompactDocWriter(const MmapV1RecordHeader* rec, unsigned dataSize, size_t allocationSize)
+ : _rec(rec), _dataSize(dataSize), _allocationSize(allocationSize) {}
- virtual bool addPadding() const {
- return false;
- }
+ virtual ~CompactDocWriter() {}
- private:
- const MmapV1RecordHeader* _rec;
- size_t _dataSize;
- size_t _allocationSize;
- };
+ virtual void writeDocument(char* buf) const {
+ memcpy(buf, _rec->data(), _dataSize);
+ }
- void SimpleRecordStoreV1::_compactExtent(OperationContext* txn,
- const DiskLoc extentLoc,
- int extentNumber,
- RecordStoreCompactAdaptor* adaptor,
- const CompactOptions* compactOptions,
- CompactStats* stats ) {
+ virtual size_t documentSize() const {
+ return _allocationSize - MmapV1RecordHeader::HeaderSize;
+ }
- log() << "compact begin extent #" << extentNumber
- << " for namespace " << _ns << " " << extentLoc;
+ virtual bool addPadding() const {
+ return false;
+ }
- unsigned oldObjSize = 0; // we'll report what the old padding was
- unsigned oldObjSizeWithPadding = 0;
+private:
+ const MmapV1RecordHeader* _rec;
+ size_t _dataSize;
+ size_t _allocationSize;
+};
- Extent* const sourceExtent = _extentManager->getExtent( extentLoc );
- sourceExtent->assertOk();
- fassert( 17437, sourceExtent->validates(extentLoc) );
+void SimpleRecordStoreV1::_compactExtent(OperationContext* txn,
+ const DiskLoc extentLoc,
+ int extentNumber,
+ RecordStoreCompactAdaptor* adaptor,
+ const CompactOptions* compactOptions,
+ CompactStats* stats) {
+ log() << "compact begin extent #" << extentNumber << " for namespace " << _ns << " "
+ << extentLoc;
+
+ unsigned oldObjSize = 0; // we'll report what the old padding was
+ unsigned oldObjSizeWithPadding = 0;
+
+ Extent* const sourceExtent = _extentManager->getExtent(extentLoc);
+ sourceExtent->assertOk();
+ fassert(17437, sourceExtent->validates(extentLoc));
+
+ {
+ // The next/prev MmapV1RecordHeader pointers within the Extent might not be in order so we first
+ // page in the whole Extent sequentially.
+ // TODO benchmark on slow storage to verify this is measurably faster.
+ log() << "compact paging in len=" << sourceExtent->length / 1000000.0 << "MB" << endl;
+ Timer t;
+ size_t length = sourceExtent->length;
+
+ touch_pages(reinterpret_cast<const char*>(sourceExtent), length);
+ int ms = t.millis();
+ if (ms > 1000)
+ log() << "compact end paging in " << ms << "ms "
+ << sourceExtent->length / 1000000.0 / t.seconds() << "MB/sec" << endl;
+ }
- {
- // The next/prev MmapV1RecordHeader pointers within the Extent might not be in order so we first
- // page in the whole Extent sequentially.
- // TODO benchmark on slow storage to verify this is measurably faster.
- log() << "compact paging in len=" << sourceExtent->length/1000000.0 << "MB" << endl;
- Timer t;
- size_t length = sourceExtent->length;
-
- touch_pages( reinterpret_cast<const char*>(sourceExtent), length );
- int ms = t.millis();
- if( ms > 1000 )
- log() << "compact end paging in " << ms << "ms "
- << sourceExtent->length/1000000.0/t.seconds() << "MB/sec" << endl;
- }
+ {
+ // Move each MmapV1RecordHeader out of this extent and insert it in to the "new" extents.
+ log() << "compact copying records" << endl;
+ long long totalNetSize = 0;
+ long long nrecords = 0;
+ DiskLoc nextSourceLoc = sourceExtent->firstRecord;
+ while (!nextSourceLoc.isNull()) {
+ txn->checkForInterrupt();
- {
- // Move each MmapV1RecordHeader out of this extent and insert it in to the "new" extents.
- log() << "compact copying records" << endl;
- long long totalNetSize = 0;
- long long nrecords = 0;
- DiskLoc nextSourceLoc = sourceExtent->firstRecord;
- while (!nextSourceLoc.isNull()) {
- txn->checkForInterrupt();
-
- WriteUnitOfWork wunit(txn);
- MmapV1RecordHeader* recOld = recordFor(nextSourceLoc);
- RecordData oldData = recOld->toRecordData();
- nextSourceLoc = getNextRecordInExtent(txn, nextSourceLoc);
-
- if ( compactOptions->validateDocuments && !adaptor->isDataValid( oldData ) ) {
- // object is corrupt!
- log() << "compact removing corrupt document!";
- stats->corruptDocuments++;
- }
- else {
- // How much data is in the record. Excludes padding and MmapV1RecordHeader headers.
- const unsigned rawDataSize = adaptor->dataSize( oldData );
-
- nrecords++;
- oldObjSize += rawDataSize;
- oldObjSizeWithPadding += recOld->netLength();
-
- // Allocation sizes include the headers and possibly some padding.
- const unsigned minAllocationSize = rawDataSize + MmapV1RecordHeader::HeaderSize;
- unsigned allocationSize = minAllocationSize;
- switch( compactOptions->paddingMode ) {
- case CompactOptions::NONE: // default padding
+ WriteUnitOfWork wunit(txn);
+ MmapV1RecordHeader* recOld = recordFor(nextSourceLoc);
+ RecordData oldData = recOld->toRecordData();
+ nextSourceLoc = getNextRecordInExtent(txn, nextSourceLoc);
+
+ if (compactOptions->validateDocuments && !adaptor->isDataValid(oldData)) {
+ // object is corrupt!
+ log() << "compact removing corrupt document!";
+ stats->corruptDocuments++;
+ } else {
+ // How much data is in the record. Excludes padding and MmapV1RecordHeader headers.
+ const unsigned rawDataSize = adaptor->dataSize(oldData);
+
+ nrecords++;
+ oldObjSize += rawDataSize;
+ oldObjSizeWithPadding += recOld->netLength();
+
+ // Allocation sizes include the headers and possibly some padding.
+ const unsigned minAllocationSize = rawDataSize + MmapV1RecordHeader::HeaderSize;
+ unsigned allocationSize = minAllocationSize;
+ switch (compactOptions->paddingMode) {
+ case CompactOptions::NONE: // default padding
if (shouldPadInserts()) {
allocationSize = quantizeAllocationSpace(minAllocationSize);
}
break;
- case CompactOptions::PRESERVE: // keep original padding
+ case CompactOptions::PRESERVE: // keep original padding
allocationSize = recOld->lengthWithHeaders();
break;
- case CompactOptions::MANUAL: // user specified how much padding to use
+ case CompactOptions::MANUAL: // user specified how much padding to use
allocationSize = compactOptions->computeRecordSize(minAllocationSize);
- if (allocationSize < minAllocationSize
- || allocationSize > BSONObjMaxUserSize / 2 ) {
+ if (allocationSize < minAllocationSize ||
+ allocationSize > BSONObjMaxUserSize / 2) {
allocationSize = minAllocationSize;
}
break;
- }
- invariant(allocationSize >= minAllocationSize);
-
- // Copy the data to a new record. Because we orphaned the record freelist at the
- // start of the compact, this insert will allocate a record in a new extent.
- // See the comment in compact() for more details.
- CompactDocWriter writer( recOld, rawDataSize, allocationSize );
- StatusWith<RecordId> status = insertRecord( txn, &writer, false );
- uassertStatusOK( status.getStatus() );
- const MmapV1RecordHeader* newRec = recordFor(DiskLoc::fromRecordId(status.getValue()));
- invariant(unsigned(newRec->netLength()) >= rawDataSize);
- totalNetSize += newRec->netLength();
-
- // Tells the caller that the record has been moved, so it can do things such as
- // add it to indexes.
- adaptor->inserted(newRec->toRecordData(), status.getValue());
- }
-
- // Remove the old record from the linked list of records withing the sourceExtent.
- // The old record is not added to the freelist as we will be freeing the whole
- // extent at the end.
- *txn->recoveryUnit()->writing(&sourceExtent->firstRecord) = nextSourceLoc;
- if (nextSourceLoc.isNull()) {
- // Just moved the last record out of the extent. Mark extent as empty.
- *txn->recoveryUnit()->writing(&sourceExtent->lastRecord) = DiskLoc();
}
- else {
- MmapV1RecordHeader* newFirstRecord = recordFor(nextSourceLoc);
- txn->recoveryUnit()->writingInt(newFirstRecord->prevOfs()) = DiskLoc::NullOfs;
- }
-
- // Adjust the stats to reflect the removal of the old record. The insert above
- // handled adjusting the stats for the new record.
- _details->incrementStats(txn, -(recOld->netLength()), -1);
-
- wunit.commit();
+ invariant(allocationSize >= minAllocationSize);
+
+ // Copy the data to a new record. Because we orphaned the record freelist at the
+ // start of the compact, this insert will allocate a record in a new extent.
+ // See the comment in compact() for more details.
+ CompactDocWriter writer(recOld, rawDataSize, allocationSize);
+ StatusWith<RecordId> status = insertRecord(txn, &writer, false);
+ uassertStatusOK(status.getStatus());
+ const MmapV1RecordHeader* newRec =
+ recordFor(DiskLoc::fromRecordId(status.getValue()));
+ invariant(unsigned(newRec->netLength()) >= rawDataSize);
+ totalNetSize += newRec->netLength();
+
+ // Tells the caller that the record has been moved, so it can do things such as
+ // add it to indexes.
+ adaptor->inserted(newRec->toRecordData(), status.getValue());
}
- // The extent must now be empty.
- invariant(sourceExtent->firstRecord.isNull());
- invariant(sourceExtent->lastRecord.isNull());
+ // Remove the old record from the linked list of records withing the sourceExtent.
+ // The old record is not added to the freelist as we will be freeing the whole
+ // extent at the end.
+ *txn->recoveryUnit()->writing(&sourceExtent->firstRecord) = nextSourceLoc;
+ if (nextSourceLoc.isNull()) {
+ // Just moved the last record out of the extent. Mark extent as empty.
+ *txn->recoveryUnit()->writing(&sourceExtent->lastRecord) = DiskLoc();
+ } else {
+ MmapV1RecordHeader* newFirstRecord = recordFor(nextSourceLoc);
+ txn->recoveryUnit()->writingInt(newFirstRecord->prevOfs()) = DiskLoc::NullOfs;
+ }
- // We are still the first extent, but we must not be the only extent.
- invariant( _details->firstExtent(txn) == extentLoc );
- invariant( _details->lastExtent(txn) != extentLoc );
+ // Adjust the stats to reflect the removal of the old record. The insert above
+ // handled adjusting the stats for the new record.
+ _details->incrementStats(txn, -(recOld->netLength()), -1);
- // Remove the newly emptied sourceExtent from the extent linked list and return it to
- // the extent manager.
- WriteUnitOfWork wunit(txn);
- const DiskLoc newFirst = sourceExtent->xnext;
- _details->setFirstExtent( txn, newFirst );
- *txn->recoveryUnit()->writing(&_extentManager->getExtent( newFirst )->xprev) = DiskLoc();
- _extentManager->freeExtent( txn, extentLoc );
wunit.commit();
-
- {
- const double oldPadding = oldObjSize ? double(oldObjSizeWithPadding) / oldObjSize
- : 1.0; // defining 0/0 as 1 for this.
-
- log() << "compact finished extent #" << extentNumber << " containing " << nrecords
- << " documents (" << totalNetSize / (1024*1024.0) << "MB)"
- << " oldPadding: " << oldPadding;
- }
}
- }
+ // The extent must now be empty.
+ invariant(sourceExtent->firstRecord.isNull());
+ invariant(sourceExtent->lastRecord.isNull());
- Status SimpleRecordStoreV1::compact( OperationContext* txn,
- RecordStoreCompactAdaptor* adaptor,
- const CompactOptions* options,
- CompactStats* stats ) {
-
- std::vector<DiskLoc> extents;
- for( DiskLoc extLocation = _details->firstExtent(txn);
- !extLocation.isNull();
- extLocation = _extentManager->getExtent( extLocation )->xnext ) {
- extents.push_back( extLocation );
- }
- log() << "compact " << extents.size() << " extents";
+ // We are still the first extent, but we must not be the only extent.
+ invariant(_details->firstExtent(txn) == extentLoc);
+ invariant(_details->lastExtent(txn) != extentLoc);
- {
- WriteUnitOfWork wunit(txn);
- // Orphaning the deleted lists ensures that all inserts go to new extents rather than
- // the ones that existed before starting the compact. If we abort the operation before
- // completion, any free space in the old extents will be leaked and never reused unless
- // the collection is compacted again or dropped. This is considered an acceptable
- // failure mode as no data will be lost.
- log() << "compact orphan deleted lists" << endl;
- _details->orphanDeletedList(txn);
-
- // Start over from scratch with our extent sizing and growth
- _details->setLastExtentSize( txn, 0 );
-
- // create a new extent so new records go there
- increaseStorageSize( txn, _details->lastExtentSize(txn), true );
- wunit.commit();
- }
+ // Remove the newly emptied sourceExtent from the extent linked list and return it to
+ // the extent manager.
+ WriteUnitOfWork wunit(txn);
+ const DiskLoc newFirst = sourceExtent->xnext;
+ _details->setFirstExtent(txn, newFirst);
+ *txn->recoveryUnit()->writing(&_extentManager->getExtent(newFirst)->xprev) = DiskLoc();
+ _extentManager->freeExtent(txn, extentLoc);
+ wunit.commit();
- stdx::unique_lock<Client> lk(*txn->getClient());
- ProgressMeterHolder pm(*txn->setMessage_inlock("compact extent",
- "Extent Compacting Progress",
- extents.size()));
- lk.unlock();
+ {
+ const double oldPadding = oldObjSize ? double(oldObjSizeWithPadding) / oldObjSize
+ : 1.0; // defining 0/0 as 1 for this.
- // Go through all old extents and move each record to a new set of extents.
- int extentNumber = 0;
- for( std::vector<DiskLoc>::iterator it = extents.begin(); it != extents.end(); it++ ) {
- txn->checkForInterrupt();
- invariant(_details->firstExtent(txn) == *it);
- // empties and removes the first extent
- _compactExtent(txn, *it, extentNumber++, adaptor, options, stats );
- invariant(_details->firstExtent(txn) != *it);
- pm.hit();
+ log() << "compact finished extent #" << extentNumber << " containing " << nrecords
+ << " documents (" << totalNetSize / (1024 * 1024.0) << "MB)"
+ << " oldPadding: " << oldPadding;
}
+ }
+}
- invariant( _extentManager->getExtent( _details->firstExtent(txn) )->xprev.isNull() );
- invariant( _extentManager->getExtent( _details->lastExtent(txn) )->xnext.isNull() );
+Status SimpleRecordStoreV1::compact(OperationContext* txn,
+ RecordStoreCompactAdaptor* adaptor,
+ const CompactOptions* options,
+ CompactStats* stats) {
+ std::vector<DiskLoc> extents;
+ for (DiskLoc extLocation = _details->firstExtent(txn); !extLocation.isNull();
+ extLocation = _extentManager->getExtent(extLocation)->xnext) {
+ extents.push_back(extLocation);
+ }
+ log() << "compact " << extents.size() << " extents";
+
+ {
+ WriteUnitOfWork wunit(txn);
+ // Orphaning the deleted lists ensures that all inserts go to new extents rather than
+ // the ones that existed before starting the compact. If we abort the operation before
+ // completion, any free space in the old extents will be leaked and never reused unless
+ // the collection is compacted again or dropped. This is considered an acceptable
+ // failure mode as no data will be lost.
+ log() << "compact orphan deleted lists" << endl;
+ _details->orphanDeletedList(txn);
- // indexes will do their own progress meter
- pm.finished();
+ // Start over from scratch with our extent sizing and growth
+ _details->setLastExtentSize(txn, 0);
- return Status::OK();
+ // create a new extent so new records go there
+ increaseStorageSize(txn, _details->lastExtentSize(txn), true);
+ wunit.commit();
}
+ stdx::unique_lock<Client> lk(*txn->getClient());
+ ProgressMeterHolder pm(
+ *txn->setMessage_inlock("compact extent", "Extent Compacting Progress", extents.size()));
+ lk.unlock();
+
+ // Go through all old extents and move each record to a new set of extents.
+ int extentNumber = 0;
+ for (std::vector<DiskLoc>::iterator it = extents.begin(); it != extents.end(); it++) {
+ txn->checkForInterrupt();
+ invariant(_details->firstExtent(txn) == *it);
+ // empties and removes the first extent
+ _compactExtent(txn, *it, extentNumber++, adaptor, options, stats);
+ invariant(_details->firstExtent(txn) != *it);
+ pm.hit();
+ }
+
+ invariant(_extentManager->getExtent(_details->firstExtent(txn))->xprev.isNull());
+ invariant(_extentManager->getExtent(_details->lastExtent(txn))->xnext.isNull());
+
+ // indexes will do their own progress meter
+ pm.finished();
+
+ return Status::OK();
+}
}
diff --git a/src/mongo/db/storage/mmap_v1/record_store_v1_simple.h b/src/mongo/db/storage/mmap_v1/record_store_v1_simple.h
index a108305492a..9ab6ba86f78 100644
--- a/src/mongo/db/storage/mmap_v1/record_store_v1_simple.h
+++ b/src/mongo/db/storage/mmap_v1/record_store_v1_simple.h
@@ -36,65 +36,70 @@
namespace mongo {
- class SimpleRecordStoreV1Cursor;
-
- // used by index and original collections
- class SimpleRecordStoreV1 : public RecordStoreV1Base {
- public:
- SimpleRecordStoreV1( OperationContext* txn,
- StringData ns,
- RecordStoreV1MetaData* details,
- ExtentManager* em,
- bool isSystemIndexes );
-
- virtual ~SimpleRecordStoreV1();
-
- const char* name() const { return "SimpleRecordStoreV1"; }
-
- std::unique_ptr<RecordCursor> getCursor(OperationContext* txn, bool forward) const final;
-
- std::vector<std::unique_ptr<RecordCursor>> getManyCursors(
- OperationContext* txn) const final;
-
- virtual Status truncate(OperationContext* txn);
-
- virtual void temp_cappedTruncateAfter(OperationContext* txn, RecordId end, bool inclusive) {
- invariant(!"cappedTruncateAfter not supported");
- }
-
- virtual bool compactSupported() const { return true; }
- virtual bool compactsInPlace() const { return false; }
- virtual Status compact( OperationContext* txn,
- RecordStoreCompactAdaptor* adaptor,
- const CompactOptions* options,
- CompactStats* stats );
-
- protected:
- virtual bool isCapped() const { return false; }
- virtual bool shouldPadInserts() const {
- return !_details->isUserFlagSet(CollectionOptions::Flag_NoPadding);
- }
-
- virtual StatusWith<DiskLoc> allocRecord( OperationContext* txn,
- int lengthWithHeaders,
- bool enforceQuota );
-
- virtual void addDeletedRec(OperationContext* txn,
- const DiskLoc& dloc);
- private:
- DiskLoc _allocFromExistingExtents( OperationContext* txn,
- int lengthWithHeaders );
-
- void _compactExtent(OperationContext* txn,
- const DiskLoc diskloc,
- int extentNumber,
- RecordStoreCompactAdaptor* adaptor,
- const CompactOptions* compactOptions,
- CompactStats* stats );
-
- bool _normalCollection;
-
- friend class SimpleRecordStoreV1Iterator;
- };
-
+class SimpleRecordStoreV1Cursor;
+
+// used by index and original collections
+class SimpleRecordStoreV1 : public RecordStoreV1Base {
+public:
+ SimpleRecordStoreV1(OperationContext* txn,
+ StringData ns,
+ RecordStoreV1MetaData* details,
+ ExtentManager* em,
+ bool isSystemIndexes);
+
+ virtual ~SimpleRecordStoreV1();
+
+ const char* name() const {
+ return "SimpleRecordStoreV1";
+ }
+
+ std::unique_ptr<RecordCursor> getCursor(OperationContext* txn, bool forward) const final;
+
+ std::vector<std::unique_ptr<RecordCursor>> getManyCursors(OperationContext* txn) const final;
+
+ virtual Status truncate(OperationContext* txn);
+
+ virtual void temp_cappedTruncateAfter(OperationContext* txn, RecordId end, bool inclusive) {
+ invariant(!"cappedTruncateAfter not supported");
+ }
+
+ virtual bool compactSupported() const {
+ return true;
+ }
+ virtual bool compactsInPlace() const {
+ return false;
+ }
+ virtual Status compact(OperationContext* txn,
+ RecordStoreCompactAdaptor* adaptor,
+ const CompactOptions* options,
+ CompactStats* stats);
+
+protected:
+ virtual bool isCapped() const {
+ return false;
+ }
+ virtual bool shouldPadInserts() const {
+ return !_details->isUserFlagSet(CollectionOptions::Flag_NoPadding);
+ }
+
+ virtual StatusWith<DiskLoc> allocRecord(OperationContext* txn,
+ int lengthWithHeaders,
+ bool enforceQuota);
+
+ virtual void addDeletedRec(OperationContext* txn, const DiskLoc& dloc);
+
+private:
+ DiskLoc _allocFromExistingExtents(OperationContext* txn, int lengthWithHeaders);
+
+ void _compactExtent(OperationContext* txn,
+ const DiskLoc diskloc,
+ int extentNumber,
+ RecordStoreCompactAdaptor* adaptor,
+ const CompactOptions* compactOptions,
+ CompactStats* stats);
+
+ bool _normalCollection;
+
+ friend class SimpleRecordStoreV1Iterator;
+};
}
diff --git a/src/mongo/db/storage/mmap_v1/record_store_v1_simple_iterator.cpp b/src/mongo/db/storage/mmap_v1/record_store_v1_simple_iterator.cpp
index ec1e51abe02..babfbcf26ea 100644
--- a/src/mongo/db/storage/mmap_v1/record_store_v1_simple_iterator.cpp
+++ b/src/mongo/db/storage/mmap_v1/record_store_v1_simple_iterator.cpp
@@ -35,100 +35,94 @@
namespace mongo {
- //
- // Regular / non-capped collection traversal
- //
-
- SimpleRecordStoreV1Iterator::SimpleRecordStoreV1Iterator(OperationContext* txn,
- const SimpleRecordStoreV1* collection,
- bool forward)
- : _txn(txn)
- , _recordStore(collection)
- , _forward(forward) {
-
- // Eagerly seek to first Record on creation since it is cheap.
- const ExtentManager* em = _recordStore->_extentManager;
- if ( _recordStore->details()->firstExtent(txn).isNull() ) {
- // nothing in the collection
- verify( _recordStore->details()->lastExtent(txn).isNull() );
+//
+// Regular / non-capped collection traversal
+//
+
+SimpleRecordStoreV1Iterator::SimpleRecordStoreV1Iterator(OperationContext* txn,
+ const SimpleRecordStoreV1* collection,
+ bool forward)
+ : _txn(txn), _recordStore(collection), _forward(forward) {
+ // Eagerly seek to first Record on creation since it is cheap.
+ const ExtentManager* em = _recordStore->_extentManager;
+ if (_recordStore->details()->firstExtent(txn).isNull()) {
+ // nothing in the collection
+ verify(_recordStore->details()->lastExtent(txn).isNull());
+ } else if (_forward) {
+ // Find a non-empty extent and start with the first record in it.
+ Extent* e = em->getExtent(_recordStore->details()->firstExtent(txn));
+
+ while (e->firstRecord.isNull() && !e->xnext.isNull()) {
+ e = em->getExtent(e->xnext);
}
- else if (_forward) {
- // Find a non-empty extent and start with the first record in it.
- Extent* e = em->getExtent( _recordStore->details()->firstExtent(txn) );
- while (e->firstRecord.isNull() && !e->xnext.isNull()) {
- e = em->getExtent( e->xnext );
- }
-
- // _curr may be set to DiskLoc() here if e->lastRecord isNull but there is no
- // valid e->xnext
- _curr = e->firstRecord;
- }
- else {
- // Walk backwards, skipping empty extents, and use the last record in the first
- // non-empty extent we see.
- Extent* e = em->getExtent( _recordStore->details()->lastExtent(txn) );
-
- // TODO ELABORATE
- // Does one of e->lastRecord.isNull(), e.firstRecord.isNull() imply the other?
- while (e->lastRecord.isNull() && !e->xprev.isNull()) {
- e = em->getExtent( e->xprev );
- }
-
- // _curr may be set to DiskLoc() here if e->lastRecord isNull but there is no
- // valid e->xprev
- _curr = e->lastRecord;
+ // _curr may be set to DiskLoc() here if e->lastRecord isNull but there is no
+ // valid e->xnext
+ _curr = e->firstRecord;
+ } else {
+ // Walk backwards, skipping empty extents, and use the last record in the first
+ // non-empty extent we see.
+ Extent* e = em->getExtent(_recordStore->details()->lastExtent(txn));
+
+ // TODO ELABORATE
+ // Does one of e->lastRecord.isNull(), e.firstRecord.isNull() imply the other?
+ while (e->lastRecord.isNull() && !e->xprev.isNull()) {
+ e = em->getExtent(e->xprev);
}
- }
- boost::optional<Record> SimpleRecordStoreV1Iterator::next() {
- if (isEOF()) return {};
- auto toReturn = _curr.toRecordId();
- advance();
- return {{toReturn, _recordStore->RecordStore::dataFor(_txn, toReturn)}};
+ // _curr may be set to DiskLoc() here if e->lastRecord isNull but there is no
+ // valid e->xprev
+ _curr = e->lastRecord;
}
+}
- boost::optional<Record> SimpleRecordStoreV1Iterator::seekExact(const RecordId& id) {
- _curr = DiskLoc::fromRecordId(id);
- advance();
- return {{id, _recordStore->RecordStore::dataFor(_txn, id)}};
- }
+boost::optional<Record> SimpleRecordStoreV1Iterator::next() {
+ if (isEOF())
+ return {};
+ auto toReturn = _curr.toRecordId();
+ advance();
+ return {{toReturn, _recordStore->RecordStore::dataFor(_txn, toReturn)}};
+}
- void SimpleRecordStoreV1Iterator::advance() {
- // Move to the next thing.
- if (!isEOF()) {
- if (_forward) {
- _curr = _recordStore->getNextRecord( _txn, _curr );
- }
- else {
- _curr = _recordStore->getPrevRecord( _txn, _curr );
- }
- }
- }
+boost::optional<Record> SimpleRecordStoreV1Iterator::seekExact(const RecordId& id) {
+ _curr = DiskLoc::fromRecordId(id);
+ advance();
+ return {{id, _recordStore->RecordStore::dataFor(_txn, id)}};
+}
- void SimpleRecordStoreV1Iterator::invalidate(const RecordId& dl) {
- // Just move past the thing being deleted.
- if (dl == _curr.toRecordId()) {
- advance();
+void SimpleRecordStoreV1Iterator::advance() {
+ // Move to the next thing.
+ if (!isEOF()) {
+ if (_forward) {
+ _curr = _recordStore->getNextRecord(_txn, _curr);
+ } else {
+ _curr = _recordStore->getPrevRecord(_txn, _curr);
}
}
+}
- void SimpleRecordStoreV1Iterator::savePositioned() {
- _txn = nullptr;
+void SimpleRecordStoreV1Iterator::invalidate(const RecordId& dl) {
+ // Just move past the thing being deleted.
+ if (dl == _curr.toRecordId()) {
+ advance();
}
+}
- bool SimpleRecordStoreV1Iterator::restore(OperationContext* txn) {
- _txn = txn;
- // if the collection is dropped, then the cursor should be destroyed
- return true;
- }
+void SimpleRecordStoreV1Iterator::savePositioned() {
+ _txn = nullptr;
+}
- std::unique_ptr<RecordFetcher> SimpleRecordStoreV1Iterator::fetcherForNext() const {
- return _recordStore->_extentManager->recordNeedsFetch(_curr);
- }
+bool SimpleRecordStoreV1Iterator::restore(OperationContext* txn) {
+ _txn = txn;
+ // if the collection is dropped, then the cursor should be destroyed
+ return true;
+}
- std::unique_ptr<RecordFetcher> SimpleRecordStoreV1Iterator::fetcherForId(
- const RecordId& id) const {
- return _recordStore->_extentManager->recordNeedsFetch(DiskLoc::fromRecordId(id));
- }
+std::unique_ptr<RecordFetcher> SimpleRecordStoreV1Iterator::fetcherForNext() const {
+ return _recordStore->_extentManager->recordNeedsFetch(_curr);
+}
+
+std::unique_ptr<RecordFetcher> SimpleRecordStoreV1Iterator::fetcherForId(const RecordId& id) const {
+ return _recordStore->_extentManager->recordNeedsFetch(DiskLoc::fromRecordId(id));
+}
}
diff --git a/src/mongo/db/storage/mmap_v1/record_store_v1_simple_iterator.h b/src/mongo/db/storage/mmap_v1/record_store_v1_simple_iterator.h
index c19c0c386b3..91b0088bf72 100644
--- a/src/mongo/db/storage/mmap_v1/record_store_v1_simple_iterator.h
+++ b/src/mongo/db/storage/mmap_v1/record_store_v1_simple_iterator.h
@@ -33,39 +33,41 @@
namespace mongo {
- class SimpleRecordStoreV1;
+class SimpleRecordStoreV1;
- /**
- * This class iterates over a non-capped collection identified by 'ns'.
- * The collection must exist when the constructor is called.
- *
- * If start is not DiskLoc(), the iteration begins at that DiskLoc.
- */
- class SimpleRecordStoreV1Iterator final : public RecordCursor {
- public:
- SimpleRecordStoreV1Iterator( OperationContext* txn,
- const SimpleRecordStoreV1* records,
- bool forward);
+/**
+ * This class iterates over a non-capped collection identified by 'ns'.
+ * The collection must exist when the constructor is called.
+ *
+ * If start is not DiskLoc(), the iteration begins at that DiskLoc.
+ */
+class SimpleRecordStoreV1Iterator final : public RecordCursor {
+public:
+ SimpleRecordStoreV1Iterator(OperationContext* txn,
+ const SimpleRecordStoreV1* records,
+ bool forward);
- boost::optional<Record> next() final;
- boost::optional<Record> seekExact(const RecordId& id) final;
- void savePositioned() final;
- bool restore(OperationContext* txn) final;
- void invalidate(const RecordId& dl) final;
- std::unique_ptr<RecordFetcher> fetcherForNext() const final;
- std::unique_ptr<RecordFetcher> fetcherForId(const RecordId& id) const final;
+ boost::optional<Record> next() final;
+ boost::optional<Record> seekExact(const RecordId& id) final;
+ void savePositioned() final;
+ bool restore(OperationContext* txn) final;
+ void invalidate(const RecordId& dl) final;
+ std::unique_ptr<RecordFetcher> fetcherForNext() const final;
+ std::unique_ptr<RecordFetcher> fetcherForId(const RecordId& id) const final;
- private:
- void advance();
- bool isEOF() { return _curr.isNull(); }
+private:
+ void advance();
+ bool isEOF() {
+ return _curr.isNull();
+ }
- // for getNext, not owned
- OperationContext* _txn;
+ // for getNext, not owned
+ OperationContext* _txn;
- // The result returned on the next call to getNext().
- DiskLoc _curr;
- const SimpleRecordStoreV1* const _recordStore;
- const bool _forward;
- };
+ // The result returned on the next call to getNext().
+ DiskLoc _curr;
+ const SimpleRecordStoreV1* const _recordStore;
+ const bool _forward;
+};
} // namespace mongo
diff --git a/src/mongo/db/storage/mmap_v1/record_store_v1_simple_test.cpp b/src/mongo/db/storage/mmap_v1/record_store_v1_simple_test.cpp
index 21ffdf6ef2b..e4e85168b01 100644
--- a/src/mongo/db/storage/mmap_v1/record_store_v1_simple_test.cpp
+++ b/src/mongo/db/storage/mmap_v1/record_store_v1_simple_test.cpp
@@ -40,501 +40,413 @@ using namespace mongo;
namespace {
- using std::string;
-
- TEST( SimpleRecordStoreV1, quantizeAllocationSpaceSimple ) {
- ASSERT_EQUALS(RecordStoreV1Base::quantizeAllocationSpace(33), 64);
- ASSERT_EQUALS(RecordStoreV1Base::quantizeAllocationSpace(1000), 1024);
- ASSERT_EQUALS(RecordStoreV1Base::quantizeAllocationSpace(10001), 16*1024);
- ASSERT_EQUALS(RecordStoreV1Base::quantizeAllocationSpace(100000), 128*1024);
- ASSERT_EQUALS(RecordStoreV1Base::quantizeAllocationSpace(1000001), 1024*1024);
- ASSERT_EQUALS(RecordStoreV1Base::quantizeAllocationSpace(10000000), 10*1024*1024);
- ASSERT_EQUALS(RecordStoreV1Base::quantizeAllocationSpace(14*1024*1024 - 1), 14*1024*1024);
- ASSERT_EQUALS(RecordStoreV1Base::quantizeAllocationSpace(14*1024*1024), 14*1024*1024);
- ASSERT_EQUALS(RecordStoreV1Base::quantizeAllocationSpace(14*1024*1024 + 1),
- 16*1024*1024 + 512*1024);
- ASSERT_EQUALS(RecordStoreV1Base::quantizeAllocationSpace(16*1024*1024 + 512*1024),
- 16*1024*1024 + 512*1024);
- }
+using std::string;
+
+TEST(SimpleRecordStoreV1, quantizeAllocationSpaceSimple) {
+ ASSERT_EQUALS(RecordStoreV1Base::quantizeAllocationSpace(33), 64);
+ ASSERT_EQUALS(RecordStoreV1Base::quantizeAllocationSpace(1000), 1024);
+ ASSERT_EQUALS(RecordStoreV1Base::quantizeAllocationSpace(10001), 16 * 1024);
+ ASSERT_EQUALS(RecordStoreV1Base::quantizeAllocationSpace(100000), 128 * 1024);
+ ASSERT_EQUALS(RecordStoreV1Base::quantizeAllocationSpace(1000001), 1024 * 1024);
+ ASSERT_EQUALS(RecordStoreV1Base::quantizeAllocationSpace(10000000), 10 * 1024 * 1024);
+ ASSERT_EQUALS(RecordStoreV1Base::quantizeAllocationSpace(14 * 1024 * 1024 - 1),
+ 14 * 1024 * 1024);
+ ASSERT_EQUALS(RecordStoreV1Base::quantizeAllocationSpace(14 * 1024 * 1024), 14 * 1024 * 1024);
+ ASSERT_EQUALS(RecordStoreV1Base::quantizeAllocationSpace(14 * 1024 * 1024 + 1),
+ 16 * 1024 * 1024 + 512 * 1024);
+ ASSERT_EQUALS(RecordStoreV1Base::quantizeAllocationSpace(16 * 1024 * 1024 + 512 * 1024),
+ 16 * 1024 * 1024 + 512 * 1024);
+}
- TEST( SimpleRecordStoreV1, quantizeAllocationMinMaxBound ) {
- const int maxSize = RecordStoreV1Base::MaxAllowedAllocation;
- ASSERT_EQUALS(RecordStoreV1Base::quantizeAllocationSpace(1), 32);
- ASSERT_EQUALS(RecordStoreV1Base::quantizeAllocationSpace(maxSize), maxSize);
- }
+TEST(SimpleRecordStoreV1, quantizeAllocationMinMaxBound) {
+ const int maxSize = RecordStoreV1Base::MaxAllowedAllocation;
+ ASSERT_EQUALS(RecordStoreV1Base::quantizeAllocationSpace(1), 32);
+ ASSERT_EQUALS(RecordStoreV1Base::quantizeAllocationSpace(maxSize), maxSize);
+}
- /**
- * Tests quantization of sizes around all valid bucket sizes.
- */
- TEST( SimpleRecordStoreV1, quantizeAroundBucketSizes ) {
- for (int bucket = 0; bucket < RecordStoreV1Base::Buckets - 2; bucket++) {
- const int size = RecordStoreV1Base::bucketSizes[bucket];
- const int nextSize = RecordStoreV1Base::bucketSizes[bucket + 1];
-
- // size - 1 is quantized to size.
- ASSERT_EQUALS( size,
- RecordStoreV1Base::quantizeAllocationSpace( size - 1 ) );
-
- // size is quantized to size.
- ASSERT_EQUALS( size,
- RecordStoreV1Base::quantizeAllocationSpace( size ) );
-
- // size + 1 is quantized to nextSize (if it is a valid allocation)
- if (size + 1 <= RecordStoreV1Base::MaxAllowedAllocation) {
- ASSERT_EQUALS( nextSize,
- RecordStoreV1Base::quantizeAllocationSpace( size + 1 ) );
- }
+/**
+ * Tests quantization of sizes around all valid bucket sizes.
+ */
+TEST(SimpleRecordStoreV1, quantizeAroundBucketSizes) {
+ for (int bucket = 0; bucket < RecordStoreV1Base::Buckets - 2; bucket++) {
+ const int size = RecordStoreV1Base::bucketSizes[bucket];
+ const int nextSize = RecordStoreV1Base::bucketSizes[bucket + 1];
+
+ // size - 1 is quantized to size.
+ ASSERT_EQUALS(size, RecordStoreV1Base::quantizeAllocationSpace(size - 1));
+
+ // size is quantized to size.
+ ASSERT_EQUALS(size, RecordStoreV1Base::quantizeAllocationSpace(size));
+
+ // size + 1 is quantized to nextSize (if it is a valid allocation)
+ if (size + 1 <= RecordStoreV1Base::MaxAllowedAllocation) {
+ ASSERT_EQUALS(nextSize, RecordStoreV1Base::quantizeAllocationSpace(size + 1));
}
}
+}
+
+BSONObj docForRecordSize(int size) {
+ BSONObjBuilder b;
+ b.append("_id", 5);
+ b.append("x", string(size - MmapV1RecordHeader::HeaderSize - 22, 'x'));
+ BSONObj x = b.obj();
+ ASSERT_EQUALS(MmapV1RecordHeader::HeaderSize + x.objsize(), size);
+ return x;
+}
+
+class BsonDocWriter : public DocWriter {
+public:
+ BsonDocWriter(const BSONObj& obj, bool padding) : _obj(obj), _padding(padding) {}
- BSONObj docForRecordSize( int size ) {
- BSONObjBuilder b;
- b.append( "_id", 5 );
- b.append( "x", string( size - MmapV1RecordHeader::HeaderSize - 22, 'x' ) );
- BSONObj x = b.obj();
- ASSERT_EQUALS( MmapV1RecordHeader::HeaderSize + x.objsize(), size );
- return x;
+ virtual void writeDocument(char* buf) const {
+ memcpy(buf, _obj.objdata(), _obj.objsize());
+ }
+ virtual size_t documentSize() const {
+ return _obj.objsize();
+ }
+ virtual bool addPadding() const {
+ return _padding;
}
- class BsonDocWriter : public DocWriter {
- public:
- BsonDocWriter(const BSONObj& obj, bool padding) : _obj(obj), _padding(padding) {}
+private:
+ BSONObj _obj;
+ bool _padding;
+};
- virtual void writeDocument(char* buf) const { memcpy(buf, _obj.objdata(), _obj.objsize()); }
- virtual size_t documentSize() const { return _obj.objsize(); }
- virtual bool addPadding() const { return _padding; }
+/** alloc() quantizes the requested size using quantizeAllocationSpace() rules. */
+TEST(SimpleRecordStoreV1, AllocQuantized) {
+ OperationContextNoop txn;
+ DummyExtentManager em;
+ DummyRecordStoreV1MetaData* md = new DummyRecordStoreV1MetaData(false, 0);
- private:
- BSONObj _obj;
- bool _padding;
- };
+ string myns = "test.AllocQuantized";
+ SimpleRecordStoreV1 rs(&txn, myns, md, &em, false);
- /** alloc() quantizes the requested size using quantizeAllocationSpace() rules. */
- TEST(SimpleRecordStoreV1, AllocQuantized) {
- OperationContextNoop txn;
- DummyExtentManager em;
- DummyRecordStoreV1MetaData* md = new DummyRecordStoreV1MetaData( false, 0 );
+ BSONObj obj = docForRecordSize(300);
+ StatusWith<RecordId> result = rs.insertRecord(&txn, obj.objdata(), obj.objsize(), false);
+ ASSERT(result.isOK());
- string myns = "test.AllocQuantized";
- SimpleRecordStoreV1 rs( &txn, myns, md, &em, false );
+ // The length of the allocated record is quantized.
+ ASSERT_EQUALS(512, rs.dataFor(&txn, result.getValue()).size() + MmapV1RecordHeader::HeaderSize);
+}
- BSONObj obj = docForRecordSize( 300 );
- StatusWith<RecordId> result = rs.insertRecord( &txn, obj.objdata(), obj.objsize(), false);
- ASSERT( result.isOK() );
+TEST(SimpleRecordStoreV1, AllocNonQuantized) {
+ OperationContextNoop txn;
+ DummyExtentManager em;
+ DummyRecordStoreV1MetaData* md = new DummyRecordStoreV1MetaData(false, 0);
+ md->setUserFlag(&txn, CollectionOptions::Flag_NoPadding);
- // The length of the allocated record is quantized.
- ASSERT_EQUALS( 512 , rs.dataFor( &txn, result.getValue() ).size() + MmapV1RecordHeader::HeaderSize );
- }
+ string myns = "test.AllocQuantized";
+ SimpleRecordStoreV1 rs(&txn, myns, md, &em, false);
- TEST(SimpleRecordStoreV1, AllocNonQuantized) {
- OperationContextNoop txn;
- DummyExtentManager em;
- DummyRecordStoreV1MetaData* md = new DummyRecordStoreV1MetaData( false, 0 );
- md->setUserFlag(&txn, CollectionOptions::Flag_NoPadding);
+ BSONObj obj = docForRecordSize(300);
+ StatusWith<RecordId> result = rs.insertRecord(&txn, obj.objdata(), obj.objsize(), false);
+ ASSERT(result.isOK());
- string myns = "test.AllocQuantized";
- SimpleRecordStoreV1 rs( &txn, myns, md, &em, false );
+ // The length of the allocated record is quantized.
+ ASSERT_EQUALS(300, rs.dataFor(&txn, result.getValue()).size() + MmapV1RecordHeader::HeaderSize);
+}
- BSONObj obj = docForRecordSize( 300 );
- StatusWith<RecordId> result = rs.insertRecord( &txn, obj.objdata(), obj.objsize(), false);
- ASSERT( result.isOK() );
+TEST(SimpleRecordStoreV1, AllocNonQuantizedStillAligned) {
+ OperationContextNoop txn;
+ DummyExtentManager em;
+ DummyRecordStoreV1MetaData* md = new DummyRecordStoreV1MetaData(false, 0);
+ md->setUserFlag(&txn, CollectionOptions::Flag_NoPadding);
- // The length of the allocated record is quantized.
- ASSERT_EQUALS( 300 , rs.dataFor( &txn, result.getValue() ).size() + MmapV1RecordHeader::HeaderSize );
- }
+ string myns = "test.AllocQuantized";
+ SimpleRecordStoreV1 rs(&txn, myns, md, &em, false);
- TEST(SimpleRecordStoreV1, AllocNonQuantizedStillAligned) {
- OperationContextNoop txn;
- DummyExtentManager em;
- DummyRecordStoreV1MetaData* md = new DummyRecordStoreV1MetaData( false, 0 );
- md->setUserFlag(&txn, CollectionOptions::Flag_NoPadding);
+ BSONObj obj = docForRecordSize(298);
+ StatusWith<RecordId> result = rs.insertRecord(&txn, obj.objdata(), obj.objsize(), false);
+ ASSERT(result.isOK());
- string myns = "test.AllocQuantized";
- SimpleRecordStoreV1 rs( &txn, myns, md, &em, false );
+ // The length of the allocated record is quantized.
+ ASSERT_EQUALS(300, rs.dataFor(&txn, result.getValue()).size() + MmapV1RecordHeader::HeaderSize);
+}
- BSONObj obj = docForRecordSize( 298 );
- StatusWith<RecordId> result = rs.insertRecord( &txn, obj.objdata(), obj.objsize(), false);
- ASSERT( result.isOK() );
+/** alloc() quantizes the requested size if DocWriter::addPadding() returns true. */
+TEST(SimpleRecordStoreV1, AllocQuantizedWithDocWriter) {
+ OperationContextNoop txn;
+ DummyExtentManager em;
+ DummyRecordStoreV1MetaData* md = new DummyRecordStoreV1MetaData(false, 0);
- // The length of the allocated record is quantized.
- ASSERT_EQUALS( 300 , rs.dataFor( &txn, result.getValue() ).size() + MmapV1RecordHeader::HeaderSize );
- }
+ string myns = "test.AllocQuantized";
+ SimpleRecordStoreV1 rs(&txn, myns, md, &em, false);
- /** alloc() quantizes the requested size if DocWriter::addPadding() returns true. */
- TEST(SimpleRecordStoreV1, AllocQuantizedWithDocWriter) {
- OperationContextNoop txn;
- DummyExtentManager em;
- DummyRecordStoreV1MetaData* md = new DummyRecordStoreV1MetaData( false, 0 );
+ BsonDocWriter docWriter(docForRecordSize(300), true);
+ StatusWith<RecordId> result = rs.insertRecord(&txn, &docWriter, false);
+ ASSERT(result.isOK());
- string myns = "test.AllocQuantized";
- SimpleRecordStoreV1 rs( &txn, myns, md, &em, false );
+ // The length of the allocated record is quantized.
+ ASSERT_EQUALS(512, rs.dataFor(&txn, result.getValue()).size() + MmapV1RecordHeader::HeaderSize);
+}
- BsonDocWriter docWriter(docForRecordSize( 300 ), true);
- StatusWith<RecordId> result = rs.insertRecord(&txn, &docWriter, false);
- ASSERT( result.isOK() );
+/**
+ * alloc() does not quantize records if DocWriter::addPadding() returns false
+ */
+TEST(SimpleRecordStoreV1, AllocNonQuantizedDocWriter) {
+ OperationContextNoop txn;
+ DummyExtentManager em;
+ DummyRecordStoreV1MetaData* md = new DummyRecordStoreV1MetaData(false, 0);
- // The length of the allocated record is quantized.
- ASSERT_EQUALS( 512 , rs.dataFor( &txn, result.getValue() ).size() + MmapV1RecordHeader::HeaderSize );
- }
+ string myns = "test.AllocIndexNamespaceNotQuantized";
+ SimpleRecordStoreV1 rs(&txn, myns + "$x", md, &em, false);
- /**
- * alloc() does not quantize records if DocWriter::addPadding() returns false
- */
- TEST(SimpleRecordStoreV1, AllocNonQuantizedDocWriter) {
- OperationContextNoop txn;
- DummyExtentManager em;
- DummyRecordStoreV1MetaData* md = new DummyRecordStoreV1MetaData( false, 0 );
+ BsonDocWriter docWriter(docForRecordSize(300), false);
+ StatusWith<RecordId> result = rs.insertRecord(&txn, &docWriter, false);
+ ASSERT(result.isOK());
- string myns = "test.AllocIndexNamespaceNotQuantized";
- SimpleRecordStoreV1 rs( &txn, myns + "$x", md, &em, false );
+ // The length of the allocated record is not quantized.
+ ASSERT_EQUALS(300, rs.dataFor(&txn, result.getValue()).size() + MmapV1RecordHeader::HeaderSize);
+}
+
+/** alloc() aligns record sizes up to 4 bytes even if DocWriter::addPadding returns false. */
+TEST(SimpleRecordStoreV1, AllocAlignedDocWriter) {
+ OperationContextNoop txn;
+ DummyExtentManager em;
+ DummyRecordStoreV1MetaData* md = new DummyRecordStoreV1MetaData(false, 0);
- BsonDocWriter docWriter(docForRecordSize( 300 ), false);
- StatusWith<RecordId> result = rs.insertRecord(&txn, &docWriter, false);
- ASSERT( result.isOK() );
+ string myns = "test.AllocIndexNamespaceNotQuantized";
+ SimpleRecordStoreV1 rs(&txn, myns + "$x", md, &em, false);
- // The length of the allocated record is not quantized.
- ASSERT_EQUALS( 300, rs.dataFor( &txn, result.getValue() ).size() + MmapV1RecordHeader::HeaderSize );
+ BsonDocWriter docWriter(docForRecordSize(298), false);
+ StatusWith<RecordId> result = rs.insertRecord(&txn, &docWriter, false);
+ ASSERT(result.isOK());
+ ASSERT_EQUALS(300, rs.dataFor(&txn, result.getValue()).size() + MmapV1RecordHeader::HeaderSize);
+}
+/**
+ * alloc() with quantized size doesn't split if enough room left over.
+ */
+TEST(SimpleRecordStoreV1, AllocUseQuantizedDeletedRecordWithoutSplit) {
+ OperationContextNoop txn;
+ DummyExtentManager em;
+ DummyRecordStoreV1MetaData* md = new DummyRecordStoreV1MetaData(false, 0);
+ SimpleRecordStoreV1 rs(&txn, "test.foo", md, &em, false);
+
+ {
+ LocAndSize drecs[] = {{DiskLoc(0, 1000), 512 + 31}, {}};
+ initializeV1RS(&txn, NULL, drecs, NULL, &em, md);
}
- /** alloc() aligns record sizes up to 4 bytes even if DocWriter::addPadding returns false. */
- TEST(SimpleRecordStoreV1, AllocAlignedDocWriter) {
- OperationContextNoop txn;
- DummyExtentManager em;
- DummyRecordStoreV1MetaData* md = new DummyRecordStoreV1MetaData( false, 0 );
+ BsonDocWriter docWriter(docForRecordSize(300), true);
+ StatusWith<RecordId> actualLocation = rs.insertRecord(&txn, &docWriter, false);
+ ASSERT_OK(actualLocation.getStatus());
- string myns = "test.AllocIndexNamespaceNotQuantized";
- SimpleRecordStoreV1 rs( &txn, myns + "$x", md, &em, false );
+ {
+ LocAndSize recs[] = {{DiskLoc(0, 1000), 512 + 31}, {}};
+ LocAndSize drecs[] = {{}};
+ assertStateV1RS(&txn, recs, drecs, NULL, &em, md);
+ }
+}
- BsonDocWriter docWriter(docForRecordSize( 298 ), false);
- StatusWith<RecordId> result = rs.insertRecord(&txn, &docWriter, false);
- ASSERT( result.isOK() );
+/**
+ * alloc() with quantized size splits if enough room left over.
+ */
+TEST(SimpleRecordStoreV1, AllocUseQuantizedDeletedRecordWithSplit) {
+ OperationContextNoop txn;
+ DummyExtentManager em;
+ DummyRecordStoreV1MetaData* md = new DummyRecordStoreV1MetaData(false, 0);
+ SimpleRecordStoreV1 rs(&txn, "test.foo", md, &em, false);
+
+ {
+ LocAndSize drecs[] = {{DiskLoc(0, 1000), 512 + 32}, {}};
+ initializeV1RS(&txn, NULL, drecs, NULL, &em, md);
+ }
+
+ BsonDocWriter docWriter(docForRecordSize(300), true);
+ StatusWith<RecordId> actualLocation = rs.insertRecord(&txn, &docWriter, false);
+ ASSERT_OK(actualLocation.getStatus());
- ASSERT_EQUALS( 300, rs.dataFor( &txn, result.getValue() ).size() + MmapV1RecordHeader::HeaderSize );
+ {
+ LocAndSize recs[] = {{DiskLoc(0, 1000), 512}, {}};
+ LocAndSize drecs[] = {{DiskLoc(0, 1512), 32}, {}};
+ assertStateV1RS(&txn, recs, drecs, NULL, &em, md);
}
- /**
- * alloc() with quantized size doesn't split if enough room left over.
- */
- TEST(SimpleRecordStoreV1, AllocUseQuantizedDeletedRecordWithoutSplit) {
- OperationContextNoop txn;
- DummyExtentManager em;
- DummyRecordStoreV1MetaData* md = new DummyRecordStoreV1MetaData( false, 0 );
- SimpleRecordStoreV1 rs( &txn, "test.foo", md, &em, false );
-
- {
- LocAndSize drecs[] = {
- {DiskLoc(0, 1000), 512 + 31},
- {}
- };
- initializeV1RS(&txn, NULL, drecs, NULL, &em, md);
- }
+}
- BsonDocWriter docWriter(docForRecordSize( 300 ), true);
- StatusWith<RecordId> actualLocation = rs.insertRecord(&txn, &docWriter, false);
- ASSERT_OK( actualLocation.getStatus() );
-
- {
- LocAndSize recs[] = {
- {DiskLoc(0, 1000), 512 + 31},
- {}
- };
- LocAndSize drecs[] = {
- {}
- };
- assertStateV1RS(&txn, recs, drecs, NULL, &em, md);
- }
+/**
+ * alloc() with non quantized size doesn't split if enough room left over.
+ */
+TEST(SimpleRecordStoreV1, AllocUseNonQuantizedDeletedRecordWithoutSplit) {
+ OperationContextNoop txn;
+ DummyExtentManager em;
+ DummyRecordStoreV1MetaData* md = new DummyRecordStoreV1MetaData(false, 0);
+ SimpleRecordStoreV1 rs(&txn, "test.foo", md, &em, false);
+
+ {
+ LocAndSize drecs[] = {{DiskLoc(0, 1000), 331}, {}};
+ initializeV1RS(&txn, NULL, drecs, NULL, &em, md);
}
- /**
- * alloc() with quantized size splits if enough room left over.
- */
- TEST(SimpleRecordStoreV1, AllocUseQuantizedDeletedRecordWithSplit) {
- OperationContextNoop txn;
- DummyExtentManager em;
- DummyRecordStoreV1MetaData* md = new DummyRecordStoreV1MetaData( false, 0 );
- SimpleRecordStoreV1 rs( &txn, "test.foo", md, &em, false );
-
- {
- LocAndSize drecs[] = {
- {DiskLoc(0, 1000), 512 + 32},
- {}
- };
- initializeV1RS(&txn, NULL, drecs, NULL, &em, md);
- }
+ BsonDocWriter docWriter(docForRecordSize(300), false);
+ StatusWith<RecordId> actualLocation = rs.insertRecord(&txn, &docWriter, false);
+ ASSERT_OK(actualLocation.getStatus());
- BsonDocWriter docWriter(docForRecordSize( 300 ), true);
- StatusWith<RecordId> actualLocation = rs.insertRecord(&txn, &docWriter, false);
- ASSERT_OK( actualLocation.getStatus() );
-
- {
- LocAndSize recs[] = {
- {DiskLoc(0, 1000), 512},
- {}
- };
- LocAndSize drecs[] = {
- {DiskLoc(0, 1512), 32},
- {}
- };
- assertStateV1RS(&txn, recs, drecs, NULL, &em, md);
- }
+ {
+ LocAndSize recs[] = {{DiskLoc(0, 1000), 331}, {}};
+ LocAndSize drecs[] = {{}};
+ assertStateV1RS(&txn, recs, drecs, NULL, &em, md);
}
+}
- /**
- * alloc() with non quantized size doesn't split if enough room left over.
- */
- TEST(SimpleRecordStoreV1, AllocUseNonQuantizedDeletedRecordWithoutSplit) {
- OperationContextNoop txn;
- DummyExtentManager em;
- DummyRecordStoreV1MetaData* md = new DummyRecordStoreV1MetaData( false, 0 );
- SimpleRecordStoreV1 rs( &txn, "test.foo", md, &em, false );
-
- {
- LocAndSize drecs[] = {
- {DiskLoc(0, 1000), 331},
- {}
- };
- initializeV1RS(&txn, NULL, drecs, NULL, &em, md);
- }
-
- BsonDocWriter docWriter(docForRecordSize( 300 ), false);
- StatusWith<RecordId> actualLocation = rs.insertRecord(&txn, &docWriter, false);
- ASSERT_OK( actualLocation.getStatus() );
-
- {
- LocAndSize recs[] = {
- {DiskLoc(0, 1000), 331},
- {}
- };
- LocAndSize drecs[] = {
- {}
- };
- assertStateV1RS(&txn, recs, drecs, NULL, &em, md);
- }
+/**
+ * alloc() with non quantized size splits if enough room left over.
+ */
+TEST(SimpleRecordStoreV1, AllocUseNonQuantizedDeletedRecordWithSplit) {
+ OperationContextNoop txn;
+ DummyExtentManager em;
+ DummyRecordStoreV1MetaData* md = new DummyRecordStoreV1MetaData(false, 0);
+ SimpleRecordStoreV1 rs(&txn, "test.foo", md, &em, false);
+
+ {
+ LocAndSize drecs[] = {{DiskLoc(0, 1000), 332}, {}};
+ initializeV1RS(&txn, NULL, drecs, NULL, &em, md);
}
- /**
- * alloc() with non quantized size splits if enough room left over.
- */
- TEST(SimpleRecordStoreV1, AllocUseNonQuantizedDeletedRecordWithSplit) {
- OperationContextNoop txn;
- DummyExtentManager em;
- DummyRecordStoreV1MetaData* md = new DummyRecordStoreV1MetaData( false, 0 );
- SimpleRecordStoreV1 rs( &txn, "test.foo", md, &em, false );
-
- {
- LocAndSize drecs[] = {
- {DiskLoc(0, 1000), 332},
- {}
- };
- initializeV1RS(&txn, NULL, drecs, NULL, &em, md);
- }
+ BsonDocWriter docWriter(docForRecordSize(300), false);
+ StatusWith<RecordId> actualLocation = rs.insertRecord(&txn, &docWriter, false);
+ ASSERT_OK(actualLocation.getStatus());
- BsonDocWriter docWriter(docForRecordSize( 300 ), false);
- StatusWith<RecordId> actualLocation = rs.insertRecord(&txn, &docWriter, false);
- ASSERT_OK( actualLocation.getStatus() );
-
- {
- LocAndSize recs[] = {
- {DiskLoc(0, 1000), 300},
- {}
- };
- LocAndSize drecs[] = {
- {DiskLoc(0, 1300), 32},
- {}
- };
- assertStateV1RS(&txn, recs, drecs, NULL, &em, md);
- }
+ {
+ LocAndSize recs[] = {{DiskLoc(0, 1000), 300}, {}};
+ LocAndSize drecs[] = {{DiskLoc(0, 1300), 32}, {}};
+ assertStateV1RS(&txn, recs, drecs, NULL, &em, md);
}
+}
- /**
- * alloc() will use from the legacy grab bag if it can.
- */
- TEST(SimpleRecordStoreV1, GrabBagIsUsed) {
- OperationContextNoop txn;
- DummyExtentManager em;
- DummyRecordStoreV1MetaData* md = new DummyRecordStoreV1MetaData( false, 0 );
- SimpleRecordStoreV1 rs( &txn, "test.foo", md, &em, false );
-
- {
- LocAndSize drecs[] = {
- {}
- };
- LocAndSize grabBag[] = {
- {DiskLoc(0, 1000), 4*1024*1024},
- {DiskLoc(1, 1000), 4*1024*1024},
- {}
- };
- initializeV1RS(&txn, NULL, drecs, grabBag, &em, md);
- }
+/**
+ * alloc() will use from the legacy grab bag if it can.
+ */
+TEST(SimpleRecordStoreV1, GrabBagIsUsed) {
+ OperationContextNoop txn;
+ DummyExtentManager em;
+ DummyRecordStoreV1MetaData* md = new DummyRecordStoreV1MetaData(false, 0);
+ SimpleRecordStoreV1 rs(&txn, "test.foo", md, &em, false);
+
+ {
+ LocAndSize drecs[] = {{}};
+ LocAndSize grabBag[] = {
+ {DiskLoc(0, 1000), 4 * 1024 * 1024}, {DiskLoc(1, 1000), 4 * 1024 * 1024}, {}};
+ initializeV1RS(&txn, NULL, drecs, grabBag, &em, md);
+ }
- BsonDocWriter docWriter(docForRecordSize( 256 ), false);
- StatusWith<RecordId> actualLocation = rs.insertRecord(&txn, &docWriter, false);
- ASSERT_OK( actualLocation.getStatus() );
-
- {
- LocAndSize recs[] = {
- {DiskLoc(0, 1000), 256},
- {}
- };
- LocAndSize drecs[] = {
- {DiskLoc(0, 1256), 4*1024*1024 - 256},
- {}
- };
- LocAndSize grabBag[] = {
- {DiskLoc(1, 1000), 4*1024*1024},
- {}
- };
- assertStateV1RS(&txn, recs, drecs, grabBag, &em, md);
- }
+ BsonDocWriter docWriter(docForRecordSize(256), false);
+ StatusWith<RecordId> actualLocation = rs.insertRecord(&txn, &docWriter, false);
+ ASSERT_OK(actualLocation.getStatus());
+
+ {
+ LocAndSize recs[] = {{DiskLoc(0, 1000), 256}, {}};
+ LocAndSize drecs[] = {{DiskLoc(0, 1256), 4 * 1024 * 1024 - 256}, {}};
+ LocAndSize grabBag[] = {{DiskLoc(1, 1000), 4 * 1024 * 1024}, {}};
+ assertStateV1RS(&txn, recs, drecs, grabBag, &em, md);
}
+}
- /**
- * alloc() will pull from the legacy grab bag even if it isn't needed.
- */
- TEST(SimpleRecordStoreV1, GrabBagIsPoppedEvenIfUnneeded) {
- OperationContextNoop txn;
- DummyExtentManager em;
- DummyRecordStoreV1MetaData* md = new DummyRecordStoreV1MetaData( false, 0 );
- SimpleRecordStoreV1 rs( &txn, "test.foo", md, &em, false );
-
- {
- LocAndSize drecs[] = {
- {DiskLoc(0, 1000), 1000},
- {}
- };
- LocAndSize grabBag[] = {
- {DiskLoc(1, 1000), 4*1024*1024},
- {DiskLoc(2, 1000), 4*1024*1024},
- {}
- };
- initializeV1RS(&txn, NULL, drecs, grabBag, &em, md);
- }
+/**
+ * alloc() will pull from the legacy grab bag even if it isn't needed.
+ */
+TEST(SimpleRecordStoreV1, GrabBagIsPoppedEvenIfUnneeded) {
+ OperationContextNoop txn;
+ DummyExtentManager em;
+ DummyRecordStoreV1MetaData* md = new DummyRecordStoreV1MetaData(false, 0);
+ SimpleRecordStoreV1 rs(&txn, "test.foo", md, &em, false);
+
+ {
+ LocAndSize drecs[] = {{DiskLoc(0, 1000), 1000}, {}};
+ LocAndSize grabBag[] = {
+ {DiskLoc(1, 1000), 4 * 1024 * 1024}, {DiskLoc(2, 1000), 4 * 1024 * 1024}, {}};
+ initializeV1RS(&txn, NULL, drecs, grabBag, &em, md);
+ }
- BsonDocWriter docWriter(docForRecordSize( 1000 ), false);
- StatusWith<RecordId> actualLocation = rs.insertRecord(&txn, &docWriter, false);
- ASSERT_OK( actualLocation.getStatus() );
-
- {
- LocAndSize recs[] = {
- {DiskLoc(0, 1000), 1000},
- {}
- };
- LocAndSize drecs[] = {
- {DiskLoc(1, 1000), 4*1024*1024},
- {}
- };
- LocAndSize grabBag[] = {
- {DiskLoc(2, 1000), 4*1024*1024},
- {}
- };
- assertStateV1RS(&txn, recs, drecs, grabBag, &em, md);
- }
+ BsonDocWriter docWriter(docForRecordSize(1000), false);
+ StatusWith<RecordId> actualLocation = rs.insertRecord(&txn, &docWriter, false);
+ ASSERT_OK(actualLocation.getStatus());
+
+ {
+ LocAndSize recs[] = {{DiskLoc(0, 1000), 1000}, {}};
+ LocAndSize drecs[] = {{DiskLoc(1, 1000), 4 * 1024 * 1024}, {}};
+ LocAndSize grabBag[] = {{DiskLoc(2, 1000), 4 * 1024 * 1024}, {}};
+ assertStateV1RS(&txn, recs, drecs, grabBag, &em, md);
}
+}
- /**
- * alloc() will pull from the legacy grab bag even if it can't be used
- */
- TEST(SimpleRecordStoreV1, GrabBagIsPoppedEvenIfUnusable) {
- OperationContextNoop txn;
- DummyExtentManager em;
- DummyRecordStoreV1MetaData* md = new DummyRecordStoreV1MetaData( false, 0 );
- SimpleRecordStoreV1 rs( &txn, "test.foo", md, &em, false );
-
- {
- LocAndSize drecs[] = {
- {DiskLoc(0, 1000), 8*1024*1024},
- {}
- };
- LocAndSize grabBag[] = {
- {DiskLoc(1, 1000), 4*1024*1024},
- {DiskLoc(2, 1000), 4*1024*1024},
- {}
- };
- initializeV1RS(&txn, NULL, drecs, grabBag, &em, md);
- }
+/**
+ * alloc() will pull from the legacy grab bag even if it can't be used
+ */
+TEST(SimpleRecordStoreV1, GrabBagIsPoppedEvenIfUnusable) {
+ OperationContextNoop txn;
+ DummyExtentManager em;
+ DummyRecordStoreV1MetaData* md = new DummyRecordStoreV1MetaData(false, 0);
+ SimpleRecordStoreV1 rs(&txn, "test.foo", md, &em, false);
+
+ {
+ LocAndSize drecs[] = {{DiskLoc(0, 1000), 8 * 1024 * 1024}, {}};
+ LocAndSize grabBag[] = {
+ {DiskLoc(1, 1000), 4 * 1024 * 1024}, {DiskLoc(2, 1000), 4 * 1024 * 1024}, {}};
+ initializeV1RS(&txn, NULL, drecs, grabBag, &em, md);
+ }
- BsonDocWriter docWriter(docForRecordSize( 8*1024*1024 ), false);
- StatusWith<RecordId> actualLocation = rs.insertRecord(&txn, &docWriter, false);
- ASSERT_OK( actualLocation.getStatus() );
-
- {
- LocAndSize recs[] = {
- {DiskLoc(0, 1000), 8*1024*1024},
- {}
- };
- LocAndSize drecs[] = {
- {DiskLoc(1, 1000), 4*1024*1024},
- {}
- };
- LocAndSize grabBag[] = {
- {DiskLoc(2, 1000), 4*1024*1024},
- {}
- };
- assertStateV1RS(&txn, recs, drecs, grabBag, &em, md);
- }
+ BsonDocWriter docWriter(docForRecordSize(8 * 1024 * 1024), false);
+ StatusWith<RecordId> actualLocation = rs.insertRecord(&txn, &docWriter, false);
+ ASSERT_OK(actualLocation.getStatus());
+
+ {
+ LocAndSize recs[] = {{DiskLoc(0, 1000), 8 * 1024 * 1024}, {}};
+ LocAndSize drecs[] = {{DiskLoc(1, 1000), 4 * 1024 * 1024}, {}};
+ LocAndSize grabBag[] = {{DiskLoc(2, 1000), 4 * 1024 * 1024}, {}};
+ assertStateV1RS(&txn, recs, drecs, grabBag, &em, md);
}
+}
+
+// -----------------
+
+TEST(SimpleRecordStoreV1, FullSimple1) {
+ OperationContextNoop txn;
+ DummyExtentManager em;
+ DummyRecordStoreV1MetaData* md = new DummyRecordStoreV1MetaData(false, 0);
+ SimpleRecordStoreV1 rs(&txn, "test.foo", md, &em, false);
- // -----------------
-
- TEST( SimpleRecordStoreV1, FullSimple1 ) {
- OperationContextNoop txn;
- DummyExtentManager em;
- DummyRecordStoreV1MetaData* md = new DummyRecordStoreV1MetaData( false, 0 );
- SimpleRecordStoreV1 rs( &txn,
- "test.foo",
- md,
- &em,
- false );
-
-
- ASSERT_EQUALS( 0, md->numRecords() );
- StatusWith<RecordId> result = rs.insertRecord( &txn, "abc", 4, 1000 );
- ASSERT_TRUE( result.isOK() );
- ASSERT_EQUALS( 1, md->numRecords() );
- RecordData recordData = rs.dataFor( &txn, result.getValue() );
- ASSERT_EQUALS( string("abc"), string(recordData.data()) );
+
+ ASSERT_EQUALS(0, md->numRecords());
+ StatusWith<RecordId> result = rs.insertRecord(&txn, "abc", 4, 1000);
+ ASSERT_TRUE(result.isOK());
+ ASSERT_EQUALS(1, md->numRecords());
+ RecordData recordData = rs.dataFor(&txn, result.getValue());
+ ASSERT_EQUALS(string("abc"), string(recordData.data()));
+}
+
+// -----------------
+
+TEST(SimpleRecordStoreV1, Truncate) {
+ OperationContextNoop txn;
+ DummyExtentManager em;
+ DummyRecordStoreV1MetaData* md = new DummyRecordStoreV1MetaData(false, 0);
+ SimpleRecordStoreV1 rs(&txn, "test.foo", md, &em, false);
+
+ {
+ LocAndSize recs[] = {{DiskLoc(0, 1000), 100},
+ {DiskLoc(0, 1100), 100},
+ {DiskLoc(0, 1300), 100},
+ {DiskLoc(2, 1100), 100},
+ {}};
+ LocAndSize drecs[] = {
+ {DiskLoc(0, 1200), 100}, {DiskLoc(2, 1000), 100}, {DiskLoc(1, 1000), 1000}, {}};
+
+ initializeV1RS(&txn, recs, drecs, NULL, &em, md);
+
+ ASSERT_EQUALS(em.getExtent(DiskLoc(0, 0))->length, em.minSize());
}
- // -----------------
-
- TEST( SimpleRecordStoreV1, Truncate ) {
- OperationContextNoop txn;
- DummyExtentManager em;
- DummyRecordStoreV1MetaData* md = new DummyRecordStoreV1MetaData( false, 0 );
- SimpleRecordStoreV1 rs( &txn, "test.foo", md, &em, false );
-
- {
- LocAndSize recs[] = {
- {DiskLoc(0, 1000), 100},
- {DiskLoc(0, 1100), 100},
- {DiskLoc(0, 1300), 100},
- {DiskLoc(2, 1100), 100},
- {}
- };
- LocAndSize drecs[] = {
- {DiskLoc(0, 1200), 100},
- {DiskLoc(2, 1000), 100},
- {DiskLoc(1, 1000), 1000},
- {}
- };
-
- initializeV1RS(&txn, recs, drecs, NULL, &em, md);
-
- ASSERT_EQUALS(em.getExtent(DiskLoc(0, 0))->length, em.minSize());
- }
+ rs.truncate(&txn);
- rs.truncate(&txn);
-
- {
- LocAndSize recs[] = {
- {}
- };
- LocAndSize drecs[] = {
- // One extent filled with a single deleted record.
- {DiskLoc(0, Extent::HeaderSize()), em.minSize() - Extent::HeaderSize()},
- {}
- };
- assertStateV1RS(&txn, recs, drecs, NULL, &em, md);
- }
+ {
+ LocAndSize recs[] = {{}};
+ LocAndSize drecs[] = {
+ // One extent filled with a single deleted record.
+ {DiskLoc(0, Extent::HeaderSize()), em.minSize() - Extent::HeaderSize()},
+ {}};
+ assertStateV1RS(&txn, recs, drecs, NULL, &em, md);
}
}
+}
diff --git a/src/mongo/db/storage/mmap_v1/record_store_v1_test_help.cpp b/src/mongo/db/storage/mmap_v1/record_store_v1_test_help.cpp
index 7bfaee1867e..12801124b95 100644
--- a/src/mongo/db/storage/mmap_v1/record_store_v1_test_help.cpp
+++ b/src/mongo/db/storage/mmap_v1/record_store_v1_test_help.cpp
@@ -47,631 +47,609 @@
namespace mongo {
- using std::numeric_limits;
-
- DummyRecordStoreV1MetaData::DummyRecordStoreV1MetaData( bool capped, int userFlags ) {
- _dataSize = 0;
- _numRecords = 0;
- _capped = capped;
- _userFlags = userFlags;
- _lastExtentSize = 0;
- _paddingFactor = 1;
- _maxCappedDocs = numeric_limits<long long>::max();
- _capFirstNewRecord.setInvalid();
- if ( _capped ) {
- // copied from NamespaceDetails::NamespaceDetails()
- setDeletedListEntry( NULL, 1, DiskLoc().setInvalid() );
- }
- }
-
- const DiskLoc& DummyRecordStoreV1MetaData::capExtent() const {
- return _capExtent;
- }
-
- void DummyRecordStoreV1MetaData::setCapExtent( OperationContext* txn,
- const DiskLoc& loc ) {
- _capExtent = loc;
- }
-
- const DiskLoc& DummyRecordStoreV1MetaData::capFirstNewRecord() const {
- return _capFirstNewRecord;
- }
-
- void DummyRecordStoreV1MetaData::setCapFirstNewRecord( OperationContext* txn,
- const DiskLoc& loc ) {
- _capFirstNewRecord = loc;
- }
-
- long long DummyRecordStoreV1MetaData::dataSize() const {
- return _dataSize;
+using std::numeric_limits;
+
+DummyRecordStoreV1MetaData::DummyRecordStoreV1MetaData(bool capped, int userFlags) {
+ _dataSize = 0;
+ _numRecords = 0;
+ _capped = capped;
+ _userFlags = userFlags;
+ _lastExtentSize = 0;
+ _paddingFactor = 1;
+ _maxCappedDocs = numeric_limits<long long>::max();
+ _capFirstNewRecord.setInvalid();
+ if (_capped) {
+ // copied from NamespaceDetails::NamespaceDetails()
+ setDeletedListEntry(NULL, 1, DiskLoc().setInvalid());
}
+}
- long long DummyRecordStoreV1MetaData::numRecords() const {
- return _numRecords;
- }
+const DiskLoc& DummyRecordStoreV1MetaData::capExtent() const {
+ return _capExtent;
+}
- void DummyRecordStoreV1MetaData::incrementStats( OperationContext* txn,
- long long dataSizeIncrement,
- long long numRecordsIncrement ) {
- _dataSize += dataSizeIncrement;
- _numRecords += numRecordsIncrement;
- }
+void DummyRecordStoreV1MetaData::setCapExtent(OperationContext* txn, const DiskLoc& loc) {
+ _capExtent = loc;
+}
- void DummyRecordStoreV1MetaData::setStats( OperationContext* txn,
- long long dataSize,
- long long numRecords ) {
- _dataSize = dataSize;
- _numRecords = numRecords;
- }
+const DiskLoc& DummyRecordStoreV1MetaData::capFirstNewRecord() const {
+ return _capFirstNewRecord;
+}
- namespace {
- DiskLoc myNull;
- }
+void DummyRecordStoreV1MetaData::setCapFirstNewRecord(OperationContext* txn, const DiskLoc& loc) {
+ _capFirstNewRecord = loc;
+}
- DiskLoc DummyRecordStoreV1MetaData::deletedListEntry( int bucket ) const {
- invariant( bucket >= 0 );
- if ( static_cast<size_t>( bucket ) >= _deletedLists.size() )
- return myNull;
- return _deletedLists[bucket];
- }
+long long DummyRecordStoreV1MetaData::dataSize() const {
+ return _dataSize;
+}
- void DummyRecordStoreV1MetaData::setDeletedListEntry( OperationContext* txn,
- int bucket,
- const DiskLoc& loc ) {
- invariant( bucket >= 0 );
- invariant( bucket < 1000 );
- while ( static_cast<size_t>( bucket ) >= _deletedLists.size() )
- _deletedLists.push_back( DiskLoc() );
- _deletedLists[bucket] = loc;
- }
+long long DummyRecordStoreV1MetaData::numRecords() const {
+ return _numRecords;
+}
- DiskLoc DummyRecordStoreV1MetaData::deletedListLegacyGrabBag() const {
- return _deletedListLegacyGrabBag;
- }
+void DummyRecordStoreV1MetaData::incrementStats(OperationContext* txn,
+ long long dataSizeIncrement,
+ long long numRecordsIncrement) {
+ _dataSize += dataSizeIncrement;
+ _numRecords += numRecordsIncrement;
+}
- void DummyRecordStoreV1MetaData::setDeletedListLegacyGrabBag(OperationContext* txn,
- const DiskLoc& loc) {
- _deletedListLegacyGrabBag = loc;
- }
+void DummyRecordStoreV1MetaData::setStats(OperationContext* txn,
+ long long dataSize,
+ long long numRecords) {
+ _dataSize = dataSize;
+ _numRecords = numRecords;
+}
- void DummyRecordStoreV1MetaData::orphanDeletedList(OperationContext* txn) {
- // They will be recreated on demand.
- _deletedLists.clear();
- }
+namespace {
+DiskLoc myNull;
+}
- const DiskLoc& DummyRecordStoreV1MetaData::firstExtent(OperationContext* txn) const {
- return _firstExtent;
- }
+DiskLoc DummyRecordStoreV1MetaData::deletedListEntry(int bucket) const {
+ invariant(bucket >= 0);
+ if (static_cast<size_t>(bucket) >= _deletedLists.size())
+ return myNull;
+ return _deletedLists[bucket];
+}
- void DummyRecordStoreV1MetaData::setFirstExtent( OperationContext* txn,
- const DiskLoc& loc ) {
- _firstExtent = loc;
- }
+void DummyRecordStoreV1MetaData::setDeletedListEntry(OperationContext* txn,
+ int bucket,
+ const DiskLoc& loc) {
+ invariant(bucket >= 0);
+ invariant(bucket < 1000);
+ while (static_cast<size_t>(bucket) >= _deletedLists.size())
+ _deletedLists.push_back(DiskLoc());
+ _deletedLists[bucket] = loc;
+}
- const DiskLoc& DummyRecordStoreV1MetaData::lastExtent(OperationContext* txn) const {
- return _lastExtent;
- }
+DiskLoc DummyRecordStoreV1MetaData::deletedListLegacyGrabBag() const {
+ return _deletedListLegacyGrabBag;
+}
- void DummyRecordStoreV1MetaData::setLastExtent( OperationContext* txn,
- const DiskLoc& loc ) {
- _lastExtent = loc;
- }
+void DummyRecordStoreV1MetaData::setDeletedListLegacyGrabBag(OperationContext* txn,
+ const DiskLoc& loc) {
+ _deletedListLegacyGrabBag = loc;
+}
- bool DummyRecordStoreV1MetaData::isCapped() const {
- return _capped;
- }
+void DummyRecordStoreV1MetaData::orphanDeletedList(OperationContext* txn) {
+ // They will be recreated on demand.
+ _deletedLists.clear();
+}
- bool DummyRecordStoreV1MetaData::isUserFlagSet( int flag ) const {
- return _userFlags & flag;
- }
+const DiskLoc& DummyRecordStoreV1MetaData::firstExtent(OperationContext* txn) const {
+ return _firstExtent;
+}
- bool DummyRecordStoreV1MetaData::setUserFlag( OperationContext* txn, int flag ) {
- if ( ( _userFlags & flag ) == flag )
- return false;
+void DummyRecordStoreV1MetaData::setFirstExtent(OperationContext* txn, const DiskLoc& loc) {
+ _firstExtent = loc;
+}
- _userFlags |= flag;
- return true;
+const DiskLoc& DummyRecordStoreV1MetaData::lastExtent(OperationContext* txn) const {
+ return _lastExtent;
+}
- }
- bool DummyRecordStoreV1MetaData::clearUserFlag( OperationContext* txn, int flag ) {
- if ( ( _userFlags & flag ) == 0 )
- return false;
+void DummyRecordStoreV1MetaData::setLastExtent(OperationContext* txn, const DiskLoc& loc) {
+ _lastExtent = loc;
+}
- _userFlags &= ~flag;
- return true;
+bool DummyRecordStoreV1MetaData::isCapped() const {
+ return _capped;
+}
- }
- bool DummyRecordStoreV1MetaData::replaceUserFlags( OperationContext* txn, int flags ) {
- if ( _userFlags == flags )
- return false;
- _userFlags = flags;
- return true;
- }
+bool DummyRecordStoreV1MetaData::isUserFlagSet(int flag) const {
+ return _userFlags & flag;
+}
+bool DummyRecordStoreV1MetaData::setUserFlag(OperationContext* txn, int flag) {
+ if ((_userFlags & flag) == flag)
+ return false;
- int DummyRecordStoreV1MetaData::lastExtentSize(OperationContext* txn) const {
- return _lastExtentSize;
- }
+ _userFlags |= flag;
+ return true;
+}
+bool DummyRecordStoreV1MetaData::clearUserFlag(OperationContext* txn, int flag) {
+ if ((_userFlags & flag) == 0)
+ return false;
- void DummyRecordStoreV1MetaData::setLastExtentSize( OperationContext* txn, int newMax ) {
- _lastExtentSize = newMax;
- }
+ _userFlags &= ~flag;
+ return true;
+}
+bool DummyRecordStoreV1MetaData::replaceUserFlags(OperationContext* txn, int flags) {
+ if (_userFlags == flags)
+ return false;
+ _userFlags = flags;
+ return true;
+}
- long long DummyRecordStoreV1MetaData::maxCappedDocs() const {
- return _maxCappedDocs;
- }
- // -----------------------------------------
+int DummyRecordStoreV1MetaData::lastExtentSize(OperationContext* txn) const {
+ return _lastExtentSize;
+}
- DummyExtentManager::~DummyExtentManager() {
- for ( size_t i = 0; i < _extents.size(); i++ ) {
- if ( _extents[i].data )
- free( _extents[i].data );
- }
- }
+void DummyRecordStoreV1MetaData::setLastExtentSize(OperationContext* txn, int newMax) {
+ _lastExtentSize = newMax;
+}
- Status DummyExtentManager::init(OperationContext* txn) {
- return Status::OK();
- }
+long long DummyRecordStoreV1MetaData::maxCappedDocs() const {
+ return _maxCappedDocs;
+}
- int DummyExtentManager::numFiles() const {
- return static_cast<int>( _extents.size() );
- }
+// -----------------------------------------
- long long DummyExtentManager::fileSize() const {
- invariant( false );
- return -1;
+DummyExtentManager::~DummyExtentManager() {
+ for (size_t i = 0; i < _extents.size(); i++) {
+ if (_extents[i].data)
+ free(_extents[i].data);
}
+}
- DiskLoc DummyExtentManager::allocateExtent( OperationContext* txn,
- bool capped,
- int size,
- bool enforceQuota ) {
- size = quantizeExtentSize( size );
+Status DummyExtentManager::init(OperationContext* txn) {
+ return Status::OK();
+}
- ExtentInfo info;
- info.data = static_cast<char*>( mongoMalloc( size ) );
- info.length = size;
+int DummyExtentManager::numFiles() const {
+ return static_cast<int>(_extents.size());
+}
- DiskLoc loc( _extents.size(), 0 );
- _extents.push_back( info );
+long long DummyExtentManager::fileSize() const {
+ invariant(false);
+ return -1;
+}
- Extent* e = getExtent( loc, false );
- e->magic = Extent::extentSignature;
- e->myLoc = loc;
- e->xnext.Null();
- e->xprev.Null();
- e->length = size;
- e->firstRecord.Null();
- e->lastRecord.Null();
+DiskLoc DummyExtentManager::allocateExtent(OperationContext* txn,
+ bool capped,
+ int size,
+ bool enforceQuota) {
+ size = quantizeExtentSize(size);
+
+ ExtentInfo info;
+ info.data = static_cast<char*>(mongoMalloc(size));
+ info.length = size;
+
+ DiskLoc loc(_extents.size(), 0);
+ _extents.push_back(info);
+
+ Extent* e = getExtent(loc, false);
+ e->magic = Extent::extentSignature;
+ e->myLoc = loc;
+ e->xnext.Null();
+ e->xprev.Null();
+ e->length = size;
+ e->firstRecord.Null();
+ e->lastRecord.Null();
+
+ return loc;
+}
- return loc;
+void DummyExtentManager::freeExtents(OperationContext* txn, DiskLoc firstExt, DiskLoc lastExt) {
+ // XXX
+}
- }
+void DummyExtentManager::freeExtent(OperationContext* txn, DiskLoc extent) {
+ // XXX
+}
+void DummyExtentManager::freeListStats(OperationContext* txn,
+ int* numExtents,
+ int64_t* totalFreeSizeBytes) const {
+ invariant(false);
+}
- void DummyExtentManager::freeExtents( OperationContext* txn,
- DiskLoc firstExt, DiskLoc lastExt ) {
- // XXX
- }
+std::unique_ptr<RecordFetcher> DummyExtentManager::recordNeedsFetch(const DiskLoc& loc) const {
+ return {};
+}
- void DummyExtentManager::freeExtent( OperationContext* txn, DiskLoc extent ) {
- // XXX
- }
- void DummyExtentManager::freeListStats(OperationContext* txn,
- int* numExtents,
- int64_t* totalFreeSizeBytes) const {
- invariant(false);
- }
+MmapV1RecordHeader* DummyExtentManager::recordForV1(const DiskLoc& loc) const {
+ if (static_cast<size_t>(loc.a()) >= _extents.size())
+ return NULL;
+ if (static_cast<size_t>(loc.getOfs()) >= _extents[loc.a()].length)
+ return NULL;
+ char* root = _extents[loc.a()].data;
+ return reinterpret_cast<MmapV1RecordHeader*>(root + loc.getOfs());
+}
- std::unique_ptr<RecordFetcher> DummyExtentManager::recordNeedsFetch(const DiskLoc& loc) const {
- return {};
- }
+Extent* DummyExtentManager::extentForV1(const DiskLoc& loc) const {
+ invariant(false);
+}
- MmapV1RecordHeader* DummyExtentManager::recordForV1( const DiskLoc& loc ) const {
- if ( static_cast<size_t>( loc.a() ) >= _extents.size() )
- return NULL;
- if ( static_cast<size_t>( loc.getOfs() ) >= _extents[loc.a()].length )
- return NULL;
- char* root = _extents[loc.a()].data;
- return reinterpret_cast<MmapV1RecordHeader*>( root + loc.getOfs() );
- }
+DiskLoc DummyExtentManager::extentLocForV1(const DiskLoc& loc) const {
+ return DiskLoc(loc.a(), 0);
+}
- Extent* DummyExtentManager::extentForV1( const DiskLoc& loc ) const {
- invariant( false );
- }
+Extent* DummyExtentManager::getExtent(const DiskLoc& loc, bool doSanityCheck) const {
+ invariant(!loc.isNull());
+ invariant(static_cast<size_t>(loc.a()) < _extents.size());
+ invariant(loc.getOfs() == 0);
+ Extent* ext = reinterpret_cast<Extent*>(_extents[loc.a()].data);
+ if (doSanityCheck)
+ ext->assertOk();
+ return ext;
+}
- DiskLoc DummyExtentManager::extentLocForV1( const DiskLoc& loc ) const {
- return DiskLoc( loc.a(), 0 );
- }
+int DummyExtentManager::maxSize() const {
+ return 1024 * 1024 * 64;
+}
- Extent* DummyExtentManager::getExtent( const DiskLoc& loc, bool doSanityCheck ) const {
- invariant( !loc.isNull() );
- invariant( static_cast<size_t>( loc.a() ) < _extents.size() );
- invariant( loc.getOfs() == 0 );
- Extent* ext = reinterpret_cast<Extent*>( _extents[loc.a()].data );
- if (doSanityCheck)
- ext->assertOk();
- return ext;
- }
+DummyExtentManager::CacheHint* DummyExtentManager::cacheHint(const DiskLoc& extentLoc,
+ const HintType& hint) {
+ return new CacheHint();
+}
- int DummyExtentManager::maxSize() const {
- return 1024 * 1024 * 64;
- }
+namespace {
+void accumulateExtentSizeRequirements(const LocAndSize* las, std::map<int, size_t>* sizes) {
+ if (!las)
+ return;
- DummyExtentManager::CacheHint* DummyExtentManager::cacheHint( const DiskLoc& extentLoc, const HintType& hint ) {
- return new CacheHint();
- }
+ while (!las->loc.isNull()) {
+ // We require passed in offsets to be > 1000 to leave room for Extent headers.
+ invariant(Extent::HeaderSize() < 1000);
+ invariant(las->loc.getOfs() >= 1000);
-namespace {
- void accumulateExtentSizeRequirements(const LocAndSize* las, std::map<int, size_t>* sizes) {
- if (!las)
- return;
-
- while (!las->loc.isNull()) {
- // We require passed in offsets to be > 1000 to leave room for Extent headers.
- invariant(Extent::HeaderSize() < 1000);
- invariant(las->loc.getOfs() >= 1000);
-
- const size_t end = las->loc.getOfs() + las->size;
- size_t& sizeNeeded = (*sizes)[las->loc.a()];
- sizeNeeded = std::max(sizeNeeded, end);
- las++;
- }
+ const size_t end = las->loc.getOfs() + las->size;
+ size_t& sizeNeeded = (*sizes)[las->loc.a()];
+ sizeNeeded = std::max(sizeNeeded, end);
+ las++;
}
+}
- void printRecList(OperationContext* txn,
- const ExtentManager* em,
- const RecordStoreV1MetaData* md) {
- log() << " *** BEGIN ACTUAL RECORD LIST *** ";
- DiskLoc extLoc = md->firstExtent(txn);
- std::set<DiskLoc> seenLocs;
- while (!extLoc.isNull()) {
- Extent* ext = em->getExtent(extLoc, true);
- DiskLoc actualLoc = ext->firstRecord;
- while (!actualLoc.isNull()) {
- const MmapV1RecordHeader* actualRec = em->recordForV1(actualLoc);
- const int actualSize = actualRec->lengthWithHeaders();
-
- log() << "loc: " << actualLoc // <--hex
- << " (" << actualLoc.getOfs() << ")"
- << " size: " << actualSize
- << " prev: " << actualRec->prevOfs()
- << " next: " << actualRec->nextOfs()
- << (actualLoc == md->capFirstNewRecord() ? " (CAP_FIRST_NEW)" : "")
- ;
-
- const bool foundCycle = !seenLocs.insert(actualLoc).second;
- invariant(!foundCycle);
-
- const int nextOfs = actualRec->nextOfs();
- actualLoc = (nextOfs == DiskLoc::NullOfs ? DiskLoc()
- : DiskLoc(actualLoc.a(), nextOfs));
- }
- extLoc = ext->xnext;
+void printRecList(OperationContext* txn, const ExtentManager* em, const RecordStoreV1MetaData* md) {
+ log() << " *** BEGIN ACTUAL RECORD LIST *** ";
+ DiskLoc extLoc = md->firstExtent(txn);
+ std::set<DiskLoc> seenLocs;
+ while (!extLoc.isNull()) {
+ Extent* ext = em->getExtent(extLoc, true);
+ DiskLoc actualLoc = ext->firstRecord;
+ while (!actualLoc.isNull()) {
+ const MmapV1RecordHeader* actualRec = em->recordForV1(actualLoc);
+ const int actualSize = actualRec->lengthWithHeaders();
+
+ log() << "loc: " << actualLoc // <--hex
+ << " (" << actualLoc.getOfs() << ")"
+ << " size: " << actualSize << " prev: " << actualRec->prevOfs()
+ << " next: " << actualRec->nextOfs()
+ << (actualLoc == md->capFirstNewRecord() ? " (CAP_FIRST_NEW)" : "");
+
+ const bool foundCycle = !seenLocs.insert(actualLoc).second;
+ invariant(!foundCycle);
+
+ const int nextOfs = actualRec->nextOfs();
+ actualLoc = (nextOfs == DiskLoc::NullOfs ? DiskLoc() : DiskLoc(actualLoc.a(), nextOfs));
}
- log() << " *** END ACTUAL RECORD LIST *** ";
+ extLoc = ext->xnext;
}
+ log() << " *** END ACTUAL RECORD LIST *** ";
+}
- void printDRecList(const ExtentManager* em, const RecordStoreV1MetaData* md) {
- log() << " *** BEGIN ACTUAL DELETED RECORD LIST *** ";
- std::set<DiskLoc> seenLocs;
- for (int bucketIdx = 0; bucketIdx < RecordStoreV1Base::Buckets; bucketIdx++) {
- DiskLoc actualLoc = md->deletedListEntry(bucketIdx);
- while (!actualLoc.isNull()) {
- const DeletedRecord* actualDrec = &em->recordForV1(actualLoc)->asDeleted();
- const int actualSize = actualDrec->lengthWithHeaders();
+void printDRecList(const ExtentManager* em, const RecordStoreV1MetaData* md) {
+ log() << " *** BEGIN ACTUAL DELETED RECORD LIST *** ";
+ std::set<DiskLoc> seenLocs;
+ for (int bucketIdx = 0; bucketIdx < RecordStoreV1Base::Buckets; bucketIdx++) {
+ DiskLoc actualLoc = md->deletedListEntry(bucketIdx);
+ while (!actualLoc.isNull()) {
+ const DeletedRecord* actualDrec = &em->recordForV1(actualLoc)->asDeleted();
+ const int actualSize = actualDrec->lengthWithHeaders();
- log() << "loc: " << actualLoc // <--hex
- << " (" << actualLoc.getOfs() << ")"
- << " size: " << actualSize
- << " bucket: " << bucketIdx
- << " next: " << actualDrec->nextDeleted();
+ log() << "loc: " << actualLoc // <--hex
+ << " (" << actualLoc.getOfs() << ")"
+ << " size: " << actualSize << " bucket: " << bucketIdx
+ << " next: " << actualDrec->nextDeleted();
- const bool foundCycle = !seenLocs.insert(actualLoc).second;
- invariant(!foundCycle);
+ const bool foundCycle = !seenLocs.insert(actualLoc).second;
+ invariant(!foundCycle);
- actualLoc = actualDrec->nextDeleted();
- }
-
- // Only print bucket 0 in capped collections since it contains all deleted records
- if (md->isCapped())
- break;
+ actualLoc = actualDrec->nextDeleted();
}
- log() << " *** END ACTUAL DELETED RECORD LIST *** ";
+
+ // Only print bucket 0 in capped collections since it contains all deleted records
+ if (md->isCapped())
+ break;
}
+ log() << " *** END ACTUAL DELETED RECORD LIST *** ";
+}
}
- void initializeV1RS(OperationContext* txn,
- const LocAndSize* records,
- const LocAndSize* drecs,
- const LocAndSize* legacyGrabBag,
- DummyExtentManager* em,
- DummyRecordStoreV1MetaData* md) {
- invariant(records || drecs); // if both are NULL nothing is being created...
-
- // Need to start with a blank slate
- invariant(em->numFiles() == 0);
- invariant(md->firstExtent(txn).isNull());
-
- // pre-allocate extents (even extents that aren't part of this RS)
- {
- typedef std::map<int, size_t> ExtentSizes;
- ExtentSizes extentSizes;
- accumulateExtentSizeRequirements(records, &extentSizes);
- accumulateExtentSizeRequirements(drecs, &extentSizes);
- accumulateExtentSizeRequirements(legacyGrabBag, &extentSizes);
- invariant(!extentSizes.empty());
-
- const int maxExtent = extentSizes.rbegin()->first;
- for (int i = 0; i <= maxExtent; i++) {
- const size_t size = extentSizes.count(i) ? extentSizes[i] : 0;
- const DiskLoc loc = em->allocateExtent(txn, md->isCapped(), size, 0);
-
- // This function and assertState depend on these details of DummyExtentManager
- invariant(loc.a() == i);
- invariant(loc.getOfs() == 0);
- }
-
- // link together extents that should be part of this RS
- md->setFirstExtent(txn, DiskLoc(extentSizes.begin()->first, 0));
- md->setLastExtent(txn, DiskLoc(extentSizes.rbegin()->first, 0));
- for (ExtentSizes::iterator it = extentSizes.begin();
- boost::next(it) != extentSizes.end(); /* ++it */ ) {
- const int a = it->first;
- ++it;
- const int b = it->first;
- em->getExtent(DiskLoc(a, 0))->xnext = DiskLoc(b, 0);
- em->getExtent(DiskLoc(b, 0))->xprev = DiskLoc(a, 0);
- }
+void initializeV1RS(OperationContext* txn,
+ const LocAndSize* records,
+ const LocAndSize* drecs,
+ const LocAndSize* legacyGrabBag,
+ DummyExtentManager* em,
+ DummyRecordStoreV1MetaData* md) {
+ invariant(records || drecs); // if both are NULL nothing is being created...
+
+ // Need to start with a blank slate
+ invariant(em->numFiles() == 0);
+ invariant(md->firstExtent(txn).isNull());
+
+ // pre-allocate extents (even extents that aren't part of this RS)
+ {
+ typedef std::map<int, size_t> ExtentSizes;
+ ExtentSizes extentSizes;
+ accumulateExtentSizeRequirements(records, &extentSizes);
+ accumulateExtentSizeRequirements(drecs, &extentSizes);
+ accumulateExtentSizeRequirements(legacyGrabBag, &extentSizes);
+ invariant(!extentSizes.empty());
+
+ const int maxExtent = extentSizes.rbegin()->first;
+ for (int i = 0; i <= maxExtent; i++) {
+ const size_t size = extentSizes.count(i) ? extentSizes[i] : 0;
+ const DiskLoc loc = em->allocateExtent(txn, md->isCapped(), size, 0);
+
+ // This function and assertState depend on these details of DummyExtentManager
+ invariant(loc.a() == i);
+ invariant(loc.getOfs() == 0);
+ }
- // This signals "done allocating new extents".
- if (md->isCapped())
- md->setDeletedListEntry(txn, 1, DiskLoc());
+ // link together extents that should be part of this RS
+ md->setFirstExtent(txn, DiskLoc(extentSizes.begin()->first, 0));
+ md->setLastExtent(txn, DiskLoc(extentSizes.rbegin()->first, 0));
+ for (ExtentSizes::iterator it = extentSizes.begin(); boost::next(it) != extentSizes.end();
+ /* ++it */) {
+ const int a = it->first;
+ ++it;
+ const int b = it->first;
+ em->getExtent(DiskLoc(a, 0))->xnext = DiskLoc(b, 0);
+ em->getExtent(DiskLoc(b, 0))->xprev = DiskLoc(a, 0);
}
- if (records && !records[0].loc.isNull()) {
- int recIdx = 0;
- DiskLoc extLoc = md->firstExtent(txn);
- while (!extLoc.isNull()) {
- Extent* ext = em->getExtent(extLoc);
- int prevOfs = DiskLoc::NullOfs;
- while (extLoc.a() == records[recIdx].loc.a()) { // for all records in this extent
- const DiskLoc loc = records[recIdx].loc;
- const int size = records[recIdx].size;;
- invariant(size >= MmapV1RecordHeader::HeaderSize);
+ // This signals "done allocating new extents".
+ if (md->isCapped())
+ md->setDeletedListEntry(txn, 1, DiskLoc());
+ }
- md->incrementStats(txn, size - MmapV1RecordHeader::HeaderSize, 1);
+ if (records && !records[0].loc.isNull()) {
+ int recIdx = 0;
+ DiskLoc extLoc = md->firstExtent(txn);
+ while (!extLoc.isNull()) {
+ Extent* ext = em->getExtent(extLoc);
+ int prevOfs = DiskLoc::NullOfs;
+ while (extLoc.a() == records[recIdx].loc.a()) { // for all records in this extent
+ const DiskLoc loc = records[recIdx].loc;
+ const int size = records[recIdx].size;
+ ;
+ invariant(size >= MmapV1RecordHeader::HeaderSize);
- if (ext->firstRecord.isNull())
- ext->firstRecord = loc;
+ md->incrementStats(txn, size - MmapV1RecordHeader::HeaderSize, 1);
- MmapV1RecordHeader* rec = em->recordForV1(loc);
- rec->lengthWithHeaders() = size;
- rec->extentOfs() = 0;
+ if (ext->firstRecord.isNull())
+ ext->firstRecord = loc;
- rec->prevOfs() = prevOfs;
- prevOfs = loc.getOfs();
+ MmapV1RecordHeader* rec = em->recordForV1(loc);
+ rec->lengthWithHeaders() = size;
+ rec->extentOfs() = 0;
- const DiskLoc nextLoc = records[recIdx + 1].loc;
- if (nextLoc.a() == loc.a()) { // if next is in same extent
- rec->nextOfs() = nextLoc.getOfs();
- }
- else {
- rec->nextOfs() = DiskLoc::NullOfs;
- ext->lastRecord = loc;
- }
+ rec->prevOfs() = prevOfs;
+ prevOfs = loc.getOfs();
- recIdx++;
+ const DiskLoc nextLoc = records[recIdx + 1].loc;
+ if (nextLoc.a() == loc.a()) { // if next is in same extent
+ rec->nextOfs() = nextLoc.getOfs();
+ } else {
+ rec->nextOfs() = DiskLoc::NullOfs;
+ ext->lastRecord = loc;
}
- extLoc = ext->xnext;
+
+ recIdx++;
}
- invariant(records[recIdx].loc.isNull());
+ extLoc = ext->xnext;
}
-
- if (drecs && !drecs[0].loc.isNull()) {
- int drecIdx = 0;
- DiskLoc* prevNextPtr = NULL;
- int lastBucket = -1;
- while (!drecs[drecIdx].loc.isNull()) {
- const DiskLoc loc = drecs[drecIdx].loc;
- const int size = drecs[drecIdx].size;
- invariant(size >= MmapV1RecordHeader::HeaderSize);
- const int bucket = RecordStoreV1Base::bucket(size);
-
- if (md->isCapped()) {
- // All drecs form a single list in bucket 0
- if (prevNextPtr == NULL) {
- md->setDeletedListEntry(txn, 0, loc);
- }
- else {
- *prevNextPtr = loc;
- }
-
- if (loc.a() < md->capExtent().a()
- && drecs[drecIdx + 1].loc.a() == md->capExtent().a()) {
- // Bucket 1 is known as cappedLastDelRecLastExtent
- md->setDeletedListEntry(txn, 1, loc);
- }
- }
- else if (bucket != lastBucket) {
- invariant(bucket > lastBucket); // if this fails, drecs weren't sorted by bucket
- md->setDeletedListEntry(txn, bucket, loc);
- lastBucket = bucket;
- }
- else {
+ invariant(records[recIdx].loc.isNull());
+ }
+
+ if (drecs && !drecs[0].loc.isNull()) {
+ int drecIdx = 0;
+ DiskLoc* prevNextPtr = NULL;
+ int lastBucket = -1;
+ while (!drecs[drecIdx].loc.isNull()) {
+ const DiskLoc loc = drecs[drecIdx].loc;
+ const int size = drecs[drecIdx].size;
+ invariant(size >= MmapV1RecordHeader::HeaderSize);
+ const int bucket = RecordStoreV1Base::bucket(size);
+
+ if (md->isCapped()) {
+ // All drecs form a single list in bucket 0
+ if (prevNextPtr == NULL) {
+ md->setDeletedListEntry(txn, 0, loc);
+ } else {
*prevNextPtr = loc;
}
- DeletedRecord* drec = &em->recordForV1(loc)->asDeleted();
- drec->lengthWithHeaders() = size;
- drec->extentOfs() = 0;
- drec->nextDeleted() = DiskLoc();
- prevNextPtr = &drec->nextDeleted();
-
- drecIdx++;
+ if (loc.a() < md->capExtent().a() &&
+ drecs[drecIdx + 1].loc.a() == md->capExtent().a()) {
+ // Bucket 1 is known as cappedLastDelRecLastExtent
+ md->setDeletedListEntry(txn, 1, loc);
+ }
+ } else if (bucket != lastBucket) {
+ invariant(bucket > lastBucket); // if this fails, drecs weren't sorted by bucket
+ md->setDeletedListEntry(txn, bucket, loc);
+ lastBucket = bucket;
+ } else {
+ *prevNextPtr = loc;
}
- }
- if (legacyGrabBag && !legacyGrabBag[0].loc.isNull()) {
- invariant(!md->isCapped()); // capped should have an empty legacy grab bag.
+ DeletedRecord* drec = &em->recordForV1(loc)->asDeleted();
+ drec->lengthWithHeaders() = size;
+ drec->extentOfs() = 0;
+ drec->nextDeleted() = DiskLoc();
+ prevNextPtr = &drec->nextDeleted();
- int grabBagIdx = 0;
- DiskLoc* prevNextPtr = NULL;
- while (!legacyGrabBag[grabBagIdx].loc.isNull()) {
- const DiskLoc loc = legacyGrabBag[grabBagIdx].loc;
- const int size = legacyGrabBag[grabBagIdx].size;
- invariant(size >= MmapV1RecordHeader::HeaderSize);
+ drecIdx++;
+ }
+ }
- if (grabBagIdx == 0) {
- md->setDeletedListLegacyGrabBag(txn, loc);
- }
- else {
- *prevNextPtr = loc;
- }
+ if (legacyGrabBag && !legacyGrabBag[0].loc.isNull()) {
+ invariant(!md->isCapped()); // capped should have an empty legacy grab bag.
- DeletedRecord* drec = &em->recordForV1(loc)->asDeleted();
- drec->lengthWithHeaders() = size;
- drec->extentOfs() = 0;
- drec->nextDeleted() = DiskLoc();
- prevNextPtr = &drec->nextDeleted();
+ int grabBagIdx = 0;
+ DiskLoc* prevNextPtr = NULL;
+ while (!legacyGrabBag[grabBagIdx].loc.isNull()) {
+ const DiskLoc loc = legacyGrabBag[grabBagIdx].loc;
+ const int size = legacyGrabBag[grabBagIdx].size;
+ invariant(size >= MmapV1RecordHeader::HeaderSize);
- grabBagIdx++;
+ if (grabBagIdx == 0) {
+ md->setDeletedListLegacyGrabBag(txn, loc);
+ } else {
+ *prevNextPtr = loc;
}
- }
- // Make sure we set everything up as requested.
- assertStateV1RS(txn, records, drecs, legacyGrabBag, em, md);
+ DeletedRecord* drec = &em->recordForV1(loc)->asDeleted();
+ drec->lengthWithHeaders() = size;
+ drec->extentOfs() = 0;
+ drec->nextDeleted() = DiskLoc();
+ prevNextPtr = &drec->nextDeleted();
+
+ grabBagIdx++;
+ }
}
- void assertStateV1RS(OperationContext* txn,
- const LocAndSize* records,
- const LocAndSize* drecs,
- const LocAndSize* legacyGrabBag,
- const ExtentManager* em,
- const DummyRecordStoreV1MetaData* md) {
- invariant(records || drecs); // if both are NULL nothing is being asserted...
-
- try {
- if (records) {
- long long dataSize = 0;
- long long numRecs = 0;
-
- int recIdx = 0;
-
- DiskLoc extLoc = md->firstExtent(txn);
- while (!extLoc.isNull()) { // for each Extent
- Extent* ext = em->getExtent(extLoc, true);
- int expectedPrevOfs = DiskLoc::NullOfs;
- DiskLoc actualLoc = ext->firstRecord;
- while (!actualLoc.isNull()) { // for each MmapV1RecordHeader in this Extent
- const MmapV1RecordHeader* actualRec = em->recordForV1(actualLoc);
- const int actualSize = actualRec->lengthWithHeaders();
-
- dataSize += actualSize - MmapV1RecordHeader::HeaderSize;
- numRecs += 1;
-
- ASSERT_EQUALS(actualLoc, records[recIdx].loc);
- ASSERT_EQUALS(actualSize, records[recIdx].size);
-
- ASSERT_EQUALS(actualRec->extentOfs(), extLoc.getOfs());
- ASSERT_EQUALS(actualRec->prevOfs(), expectedPrevOfs);
- expectedPrevOfs = actualLoc.getOfs();
-
- recIdx++;
- const int nextOfs = actualRec->nextOfs();
- actualLoc = (nextOfs == DiskLoc::NullOfs ? DiskLoc()
- : DiskLoc(actualLoc.a(), nextOfs));
- }
+ // Make sure we set everything up as requested.
+ assertStateV1RS(txn, records, drecs, legacyGrabBag, em, md);
+}
- if (ext->xnext.isNull()) {
- ASSERT_EQUALS(md->lastExtent(txn), extLoc);
- }
+void assertStateV1RS(OperationContext* txn,
+ const LocAndSize* records,
+ const LocAndSize* drecs,
+ const LocAndSize* legacyGrabBag,
+ const ExtentManager* em,
+ const DummyRecordStoreV1MetaData* md) {
+ invariant(records || drecs); // if both are NULL nothing is being asserted...
- extLoc = ext->xnext;
- }
+ try {
+ if (records) {
+ long long dataSize = 0;
+ long long numRecs = 0;
- // both the expected and actual record lists must be done at this point
- ASSERT_EQUALS(records[recIdx].loc, DiskLoc());
+ int recIdx = 0;
- ASSERT_EQUALS(dataSize, md->dataSize());
- ASSERT_EQUALS(numRecs, md->numRecords());
- }
+ DiskLoc extLoc = md->firstExtent(txn);
+ while (!extLoc.isNull()) { // for each Extent
+ Extent* ext = em->getExtent(extLoc, true);
+ int expectedPrevOfs = DiskLoc::NullOfs;
+ DiskLoc actualLoc = ext->firstRecord;
+ while (!actualLoc.isNull()) { // for each MmapV1RecordHeader in this Extent
+ const MmapV1RecordHeader* actualRec = em->recordForV1(actualLoc);
+ const int actualSize = actualRec->lengthWithHeaders();
- if (drecs) {
- int drecIdx = 0;
- for (int bucketIdx = 0; bucketIdx < RecordStoreV1Base::Buckets; bucketIdx++) {
- DiskLoc actualLoc = md->deletedListEntry(bucketIdx);
-
- if (md->isCapped() && bucketIdx == 1) {
- // In capped collections, the 2nd bucket (index 1) points to the drec before
- // the first drec in the capExtent. If the capExtent is the first Extent,
- // it should be Null.
-
- if (md->capExtent() == md->firstExtent(txn)) {
- ASSERT_EQUALS(actualLoc, DiskLoc());
- }
- else {
- ASSERT_NOT_EQUALS(actualLoc.a(), md->capExtent().a());
- const DeletedRecord* actualDrec =
- &em->recordForV1(actualLoc)->asDeleted();
- ASSERT_EQUALS(actualDrec->nextDeleted().a(), md->capExtent().a());
- }
-
- // Don't do normal checking of bucket 1 in capped collections. Checking
- // other buckets to verify that they are Null.
- continue;
- }
+ dataSize += actualSize - MmapV1RecordHeader::HeaderSize;
+ numRecs += 1;
- while (!actualLoc.isNull()) {
- const DeletedRecord* actualDrec = &em->recordForV1(actualLoc)->asDeleted();
- const int actualSize = actualDrec->lengthWithHeaders();
+ ASSERT_EQUALS(actualLoc, records[recIdx].loc);
+ ASSERT_EQUALS(actualSize, records[recIdx].size);
+
+ ASSERT_EQUALS(actualRec->extentOfs(), extLoc.getOfs());
+ ASSERT_EQUALS(actualRec->prevOfs(), expectedPrevOfs);
+ expectedPrevOfs = actualLoc.getOfs();
+
+ recIdx++;
+ const int nextOfs = actualRec->nextOfs();
+ actualLoc =
+ (nextOfs == DiskLoc::NullOfs ? DiskLoc() : DiskLoc(actualLoc.a(), nextOfs));
+ }
+
+ if (ext->xnext.isNull()) {
+ ASSERT_EQUALS(md->lastExtent(txn), extLoc);
+ }
- ASSERT_EQUALS(actualLoc, drecs[drecIdx].loc);
- ASSERT_EQUALS(actualSize, drecs[drecIdx].size);
+ extLoc = ext->xnext;
+ }
- // Make sure the drec is correct
- ASSERT_EQUALS(actualDrec->extentOfs(), 0);
+ // both the expected and actual record lists must be done at this point
+ ASSERT_EQUALS(records[recIdx].loc, DiskLoc());
- // in capped collections all drecs are linked into a single list in bucket 0
- ASSERT_EQUALS(bucketIdx, md->isCapped()
- ? 0
- : RecordStoreV1Base::bucket(actualSize));
+ ASSERT_EQUALS(dataSize, md->dataSize());
+ ASSERT_EQUALS(numRecs, md->numRecords());
+ }
- drecIdx++;
- actualLoc = actualDrec->nextDeleted();
+ if (drecs) {
+ int drecIdx = 0;
+ for (int bucketIdx = 0; bucketIdx < RecordStoreV1Base::Buckets; bucketIdx++) {
+ DiskLoc actualLoc = md->deletedListEntry(bucketIdx);
+
+ if (md->isCapped() && bucketIdx == 1) {
+ // In capped collections, the 2nd bucket (index 1) points to the drec before
+ // the first drec in the capExtent. If the capExtent is the first Extent,
+ // it should be Null.
+
+ if (md->capExtent() == md->firstExtent(txn)) {
+ ASSERT_EQUALS(actualLoc, DiskLoc());
+ } else {
+ ASSERT_NOT_EQUALS(actualLoc.a(), md->capExtent().a());
+ const DeletedRecord* actualDrec = &em->recordForV1(actualLoc)->asDeleted();
+ ASSERT_EQUALS(actualDrec->nextDeleted().a(), md->capExtent().a());
}
+
+ // Don't do normal checking of bucket 1 in capped collections. Checking
+ // other buckets to verify that they are Null.
+ continue;
}
- // both the expected and actual deleted lists must be done at this point
- ASSERT_EQUALS(drecs[drecIdx].loc, DiskLoc());
- }
- if (legacyGrabBag) {
- int grabBagIdx = 0;
- DiskLoc actualLoc = md->deletedListLegacyGrabBag();
while (!actualLoc.isNull()) {
const DeletedRecord* actualDrec = &em->recordForV1(actualLoc)->asDeleted();
const int actualSize = actualDrec->lengthWithHeaders();
- ASSERT_EQUALS(actualLoc, legacyGrabBag[grabBagIdx].loc);
- ASSERT_EQUALS(actualSize, legacyGrabBag[grabBagIdx].size);
+ ASSERT_EQUALS(actualLoc, drecs[drecIdx].loc);
+ ASSERT_EQUALS(actualSize, drecs[drecIdx].size);
+
+ // Make sure the drec is correct
+ ASSERT_EQUALS(actualDrec->extentOfs(), 0);
+
+ // in capped collections all drecs are linked into a single list in bucket 0
+ ASSERT_EQUALS(bucketIdx,
+ md->isCapped() ? 0 : RecordStoreV1Base::bucket(actualSize));
- grabBagIdx++;
+ drecIdx++;
actualLoc = actualDrec->nextDeleted();
}
-
- // both the expected and actual deleted lists must be done at this point
- ASSERT_EQUALS(legacyGrabBag[grabBagIdx].loc, DiskLoc());
- }
- else {
- // Unless a test is actually using the grabBag it should be empty
- ASSERT_EQUALS(md->deletedListLegacyGrabBag(), DiskLoc());
}
+ // both the expected and actual deleted lists must be done at this point
+ ASSERT_EQUALS(drecs[drecIdx].loc, DiskLoc());
}
- catch (...) {
- // If a test fails, provide extra info to make debugging easier
- printRecList(txn, em, md);
- printDRecList(em, md);
- throw;
+
+ if (legacyGrabBag) {
+ int grabBagIdx = 0;
+ DiskLoc actualLoc = md->deletedListLegacyGrabBag();
+ while (!actualLoc.isNull()) {
+ const DeletedRecord* actualDrec = &em->recordForV1(actualLoc)->asDeleted();
+ const int actualSize = actualDrec->lengthWithHeaders();
+
+ ASSERT_EQUALS(actualLoc, legacyGrabBag[grabBagIdx].loc);
+ ASSERT_EQUALS(actualSize, legacyGrabBag[grabBagIdx].size);
+
+ grabBagIdx++;
+ actualLoc = actualDrec->nextDeleted();
+ }
+
+ // both the expected and actual deleted lists must be done at this point
+ ASSERT_EQUALS(legacyGrabBag[grabBagIdx].loc, DiskLoc());
+ } else {
+ // Unless a test is actually using the grabBag it should be empty
+ ASSERT_EQUALS(md->deletedListLegacyGrabBag(), DiskLoc());
}
+ } catch (...) {
+ // If a test fails, provide extra info to make debugging easier
+ printRecList(txn, em, md);
+ printDRecList(em, md);
+ throw;
}
}
+}
diff --git a/src/mongo/db/storage/mmap_v1/record_store_v1_test_help.h b/src/mongo/db/storage/mmap_v1/record_store_v1_test_help.h
index f37969c1ca6..0a038f9e9f3 100644
--- a/src/mongo/db/storage/mmap_v1/record_store_v1_test_help.h
+++ b/src/mongo/db/storage/mmap_v1/record_store_v1_test_help.h
@@ -37,169 +37,162 @@
namespace mongo {
- class DummyRecordStoreV1MetaData : public RecordStoreV1MetaData {
- public:
- DummyRecordStoreV1MetaData( bool capped, int userFlags );
- virtual ~DummyRecordStoreV1MetaData(){}
+class DummyRecordStoreV1MetaData : public RecordStoreV1MetaData {
+public:
+ DummyRecordStoreV1MetaData(bool capped, int userFlags);
+ virtual ~DummyRecordStoreV1MetaData() {}
- virtual const DiskLoc& capExtent() const;
- virtual void setCapExtent( OperationContext* txn, const DiskLoc& loc );
+ virtual const DiskLoc& capExtent() const;
+ virtual void setCapExtent(OperationContext* txn, const DiskLoc& loc);
- virtual const DiskLoc& capFirstNewRecord() const;
- virtual void setCapFirstNewRecord( OperationContext* txn, const DiskLoc& loc );
+ virtual const DiskLoc& capFirstNewRecord() const;
+ virtual void setCapFirstNewRecord(OperationContext* txn, const DiskLoc& loc);
- virtual long long dataSize() const;
- virtual long long numRecords() const;
+ virtual long long dataSize() const;
+ virtual long long numRecords() const;
- virtual void incrementStats( OperationContext* txn,
- long long dataSizeIncrement,
- long long numRecordsIncrement );
+ virtual void incrementStats(OperationContext* txn,
+ long long dataSizeIncrement,
+ long long numRecordsIncrement);
- virtual void setStats( OperationContext* txn,
- long long dataSize,
- long long numRecords );
+ virtual void setStats(OperationContext* txn, long long dataSize, long long numRecords);
- virtual DiskLoc deletedListEntry( int bucket ) const;
- virtual void setDeletedListEntry( OperationContext* txn,
- int bucket,
- const DiskLoc& loc );
+ virtual DiskLoc deletedListEntry(int bucket) const;
+ virtual void setDeletedListEntry(OperationContext* txn, int bucket, const DiskLoc& loc);
- virtual DiskLoc deletedListLegacyGrabBag() const;
- virtual void setDeletedListLegacyGrabBag(OperationContext* txn, const DiskLoc& loc);
+ virtual DiskLoc deletedListLegacyGrabBag() const;
+ virtual void setDeletedListLegacyGrabBag(OperationContext* txn, const DiskLoc& loc);
- virtual void orphanDeletedList(OperationContext* txn);
+ virtual void orphanDeletedList(OperationContext* txn);
- virtual const DiskLoc& firstExtent( OperationContext* txn ) const;
- virtual void setFirstExtent( OperationContext* txn, const DiskLoc& loc );
+ virtual const DiskLoc& firstExtent(OperationContext* txn) const;
+ virtual void setFirstExtent(OperationContext* txn, const DiskLoc& loc);
- virtual const DiskLoc& lastExtent( OperationContext* txn ) const;
- virtual void setLastExtent( OperationContext* txn, const DiskLoc& loc );
+ virtual const DiskLoc& lastExtent(OperationContext* txn) const;
+ virtual void setLastExtent(OperationContext* txn, const DiskLoc& loc);
- virtual bool isCapped() const;
+ virtual bool isCapped() const;
- virtual bool isUserFlagSet( int flag ) const;
- virtual int userFlags() const { return _userFlags; }
- virtual bool setUserFlag( OperationContext* txn, int flag );
- virtual bool clearUserFlag( OperationContext* txn, int flag );
- virtual bool replaceUserFlags( OperationContext* txn, int flags );
+ virtual bool isUserFlagSet(int flag) const;
+ virtual int userFlags() const {
+ return _userFlags;
+ }
+ virtual bool setUserFlag(OperationContext* txn, int flag);
+ virtual bool clearUserFlag(OperationContext* txn, int flag);
+ virtual bool replaceUserFlags(OperationContext* txn, int flags);
- virtual int lastExtentSize( OperationContext* txn ) const;
- virtual void setLastExtentSize( OperationContext* txn, int newMax );
+ virtual int lastExtentSize(OperationContext* txn) const;
+ virtual void setLastExtentSize(OperationContext* txn, int newMax);
- virtual long long maxCappedDocs() const;
+ virtual long long maxCappedDocs() const;
- protected:
+protected:
+ DiskLoc _capExtent;
+ DiskLoc _capFirstNewRecord;
- DiskLoc _capExtent;
- DiskLoc _capFirstNewRecord;
+ long long _dataSize;
+ long long _numRecords;
- long long _dataSize;
- long long _numRecords;
+ DiskLoc _firstExtent;
+ DiskLoc _lastExtent;
- DiskLoc _firstExtent;
- DiskLoc _lastExtent;
+ bool _capped;
+ int _userFlags;
+ long long _maxCappedDocs;
- bool _capped;
- int _userFlags;
- long long _maxCappedDocs;
+ int _lastExtentSize;
+ double _paddingFactor;
- int _lastExtentSize;
- double _paddingFactor;
+ std::vector<DiskLoc> _deletedLists;
+ DiskLoc _deletedListLegacyGrabBag;
+};
- std::vector<DiskLoc> _deletedLists;
- DiskLoc _deletedListLegacyGrabBag;
- };
+class DummyExtentManager : public ExtentManager {
+public:
+ virtual ~DummyExtentManager();
- class DummyExtentManager : public ExtentManager {
- public:
- virtual ~DummyExtentManager();
+ virtual Status init(OperationContext* txn);
- virtual Status init(OperationContext* txn);
+ virtual int numFiles() const;
+ virtual long long fileSize() const;
- virtual int numFiles() const;
- virtual long long fileSize() const;
+ virtual DiskLoc allocateExtent(OperationContext* txn, bool capped, int size, bool enforceQuota);
- virtual DiskLoc allocateExtent( OperationContext* txn,
- bool capped,
- int size,
- bool enforceQuota );
+ virtual void freeExtents(OperationContext* txn, DiskLoc firstExt, DiskLoc lastExt);
- virtual void freeExtents( OperationContext* txn,
- DiskLoc firstExt, DiskLoc lastExt );
+ virtual void freeExtent(OperationContext* txn, DiskLoc extent);
- virtual void freeExtent( OperationContext* txn, DiskLoc extent );
+ virtual void freeListStats(OperationContext* txn,
+ int* numExtents,
+ int64_t* totalFreeSizeBytes) const;
- virtual void freeListStats(OperationContext* txn,
- int* numExtents,
- int64_t* totalFreeSizeBytes) const;
+ virtual MmapV1RecordHeader* recordForV1(const DiskLoc& loc) const;
- virtual MmapV1RecordHeader* recordForV1( const DiskLoc& loc ) const;
+ virtual std::unique_ptr<RecordFetcher> recordNeedsFetch(const DiskLoc& loc) const final;
- virtual std::unique_ptr<RecordFetcher> recordNeedsFetch( const DiskLoc& loc ) const final;
+ virtual Extent* extentForV1(const DiskLoc& loc) const;
- virtual Extent* extentForV1( const DiskLoc& loc ) const;
+ virtual DiskLoc extentLocForV1(const DiskLoc& loc) const;
- virtual DiskLoc extentLocForV1( const DiskLoc& loc ) const;
+ virtual Extent* getExtent(const DiskLoc& loc, bool doSanityCheck = true) const;
- virtual Extent* getExtent( const DiskLoc& loc, bool doSanityCheck = true ) const;
+ virtual int maxSize() const;
- virtual int maxSize() const;
+ virtual CacheHint* cacheHint(const DiskLoc& extentLoc, const HintType& hint);
- virtual CacheHint* cacheHint( const DiskLoc& extentLoc, const HintType& hint );
+protected:
+ struct ExtentInfo {
+ char* data;
+ size_t length;
+ };
- protected:
- struct ExtentInfo {
- char* data;
- size_t length;
- };
+ std::vector<ExtentInfo> _extents;
+};
- std::vector<ExtentInfo> _extents;
- };
-
- struct LocAndSize {
- DiskLoc loc;
- int size; // with headers
- };
+struct LocAndSize {
+ DiskLoc loc;
+ int size; // with headers
+};
- /**
- * Creates a V1 storage/mmap_v1 with the passed in records and DeletedRecords (drecs).
- *
- * List of LocAndSize are terminated by a Null DiskLoc. Passing a NULL pointer is shorthand for
- * an empty list. Each extent gets it's own DiskLoc file number. DiskLoc Offsets must be > 1000.
- *
- * records must be sorted by extent/file. offsets within an extent can be in any order.
- *
- * In a simple RS, drecs must be grouped into size-buckets, but the ordering within the size
- * buckets is up to you.
- *
- * In a capped collection, all drecs form a single list and must be grouped by extent, with each
- * extent having at least one drec. capFirstNewRecord() and capExtent() *must* be correctly set
- * on md before calling.
- *
- * You are responsible for ensuring the records and drecs don't overlap.
- *
- * ExtentManager and MetaData must both be empty.
- */
- void initializeV1RS(OperationContext* txn,
- const LocAndSize* records,
- const LocAndSize* drecs,
- const LocAndSize* legacyGrabBag,
- DummyExtentManager* em,
- DummyRecordStoreV1MetaData* md);
-
- /**
- * Asserts that the V1RecordStore defined by md has the passed in records and drecs in the
- * correct order.
- *
- * List of LocAndSize are terminated by a Null DiskLoc. Passing a NULL pointer means don't check
- * that list.
- */
- void assertStateV1RS(OperationContext* txn,
- const LocAndSize* records,
- const LocAndSize* drecs,
- const LocAndSize* legacyGrabBag,
- const ExtentManager* em,
- const DummyRecordStoreV1MetaData* md);
+/**
+ * Creates a V1 storage/mmap_v1 with the passed in records and DeletedRecords (drecs).
+ *
+ * List of LocAndSize are terminated by a Null DiskLoc. Passing a NULL pointer is shorthand for
+ * an empty list. Each extent gets it's own DiskLoc file number. DiskLoc Offsets must be > 1000.
+ *
+ * records must be sorted by extent/file. offsets within an extent can be in any order.
+ *
+ * In a simple RS, drecs must be grouped into size-buckets, but the ordering within the size
+ * buckets is up to you.
+ *
+ * In a capped collection, all drecs form a single list and must be grouped by extent, with each
+ * extent having at least one drec. capFirstNewRecord() and capExtent() *must* be correctly set
+ * on md before calling.
+ *
+ * You are responsible for ensuring the records and drecs don't overlap.
+ *
+ * ExtentManager and MetaData must both be empty.
+ */
+void initializeV1RS(OperationContext* txn,
+ const LocAndSize* records,
+ const LocAndSize* drecs,
+ const LocAndSize* legacyGrabBag,
+ DummyExtentManager* em,
+ DummyRecordStoreV1MetaData* md);
+
+/**
+ * Asserts that the V1RecordStore defined by md has the passed in records and drecs in the
+ * correct order.
+ *
+ * List of LocAndSize are terminated by a Null DiskLoc. Passing a NULL pointer means don't check
+ * that list.
+ */
+void assertStateV1RS(OperationContext* txn,
+ const LocAndSize* records,
+ const LocAndSize* drecs,
+ const LocAndSize* legacyGrabBag,
+ const ExtentManager* em,
+ const DummyRecordStoreV1MetaData* md);
} // namespace mongo
diff --git a/src/mongo/db/storage/mmap_v1/repair_database.cpp b/src/mongo/db/storage/mmap_v1/repair_database.cpp
index aa83636ae6b..6db0a4e15da 100644
--- a/src/mongo/db/storage/mmap_v1/repair_database.cpp
+++ b/src/mongo/db/storage/mmap_v1/repair_database.cpp
@@ -55,427 +55,415 @@
namespace mongo {
- using std::unique_ptr;
- using std::endl;
- using std::map;
- using std::string;
- using std::stringstream;
- using std::vector;
-
- typedef boost::filesystem::path Path;
-
- // inheritable class to implement an operation that may be applied to all
- // files in a database using _applyOpToDataFiles()
- class FileOp {
- public:
- virtual ~FileOp() {}
- // Return true if file exists and operation successful
- virtual bool apply( const boost::filesystem::path &p ) = 0;
- virtual const char * op() const = 0;
- };
-
- void _applyOpToDataFiles(const string& database, FileOp &fo, bool afterAllocator = false,
- const string& path = storageGlobalParams.dbpath);
-
- void _deleteDataFiles(const std::string& database) {
- if (storageGlobalParams.directoryperdb) {
- FileAllocator::get()->waitUntilFinished();
- MONGO_ASSERT_ON_EXCEPTION_WITH_MSG(
- boost::filesystem::remove_all(
- boost::filesystem::path(storageGlobalParams.dbpath) / database),
- "delete data files with a directoryperdb");
- return;
- }
- class : public FileOp {
- virtual bool apply( const boost::filesystem::path &p ) {
- return boost::filesystem::remove( p );
- }
- virtual const char * op() const {
- return "remove";
- }
- } deleter;
- _applyOpToDataFiles( database, deleter, true );
+using std::unique_ptr;
+using std::endl;
+using std::map;
+using std::string;
+using std::stringstream;
+using std::vector;
+
+typedef boost::filesystem::path Path;
+
+// inheritable class to implement an operation that may be applied to all
+// files in a database using _applyOpToDataFiles()
+class FileOp {
+public:
+ virtual ~FileOp() {}
+ // Return true if file exists and operation successful
+ virtual bool apply(const boost::filesystem::path& p) = 0;
+ virtual const char* op() const = 0;
+};
+
+void _applyOpToDataFiles(const string& database,
+ FileOp& fo,
+ bool afterAllocator = false,
+ const string& path = storageGlobalParams.dbpath);
+
+void _deleteDataFiles(const std::string& database) {
+ if (storageGlobalParams.directoryperdb) {
+ FileAllocator::get()->waitUntilFinished();
+ MONGO_ASSERT_ON_EXCEPTION_WITH_MSG(
+ boost::filesystem::remove_all(boost::filesystem::path(storageGlobalParams.dbpath) /
+ database),
+ "delete data files with a directoryperdb");
+ return;
}
-
- void boostRenameWrapper( const Path &from, const Path &to ) {
- try {
- boost::filesystem::rename( from, to );
+ class : public FileOp {
+ virtual bool apply(const boost::filesystem::path& p) {
+ return boost::filesystem::remove(p);
}
- catch ( const boost::filesystem::filesystem_error & ) {
- // boost rename doesn't work across partitions
- boost::filesystem::copy_file( from, to);
- boost::filesystem::remove( from );
+ virtual const char* op() const {
+ return "remove";
}
}
+ deleter;
+ _applyOpToDataFiles(database, deleter, true);
+}
- // back up original database files to 'temp' dir
- void _renameForBackup( const std::string& database, const Path &reservedPath ) {
- Path newPath( reservedPath );
- if (storageGlobalParams.directoryperdb)
- newPath /= database;
- class Renamer : public FileOp {
- public:
- Renamer( const Path &newPath ) : newPath_( newPath ) {}
- private:
- const boost::filesystem::path &newPath_;
- virtual bool apply( const Path &p ) {
- if ( !boost::filesystem::exists( p ) )
- return false;
- boostRenameWrapper( p, newPath_ / ( p.leaf().string() + ".bak" ) );
- return true;
- }
- virtual const char * op() const {
- return "renaming";
- }
- } renamer( newPath );
- _applyOpToDataFiles( database, renamer, true );
+void boostRenameWrapper(const Path& from, const Path& to) {
+ try {
+ boost::filesystem::rename(from, to);
+ } catch (const boost::filesystem::filesystem_error&) {
+ // boost rename doesn't work across partitions
+ boost::filesystem::copy_file(from, to);
+ boost::filesystem::remove(from);
}
+}
- intmax_t dbSize( const string& database ) {
- class SizeAccumulator : public FileOp {
- public:
- SizeAccumulator() : totalSize_( 0 ) {}
- intmax_t size() const {
- return totalSize_;
- }
- private:
- virtual bool apply( const boost::filesystem::path &p ) {
- if ( !boost::filesystem::exists( p ) )
- return false;
- totalSize_ += boost::filesystem::file_size( p );
- return true;
- }
- virtual const char *op() const {
- return "checking size";
- }
- intmax_t totalSize_;
- };
- SizeAccumulator sa;
- _applyOpToDataFiles( database, sa );
- return sa.size();
- }
+// back up original database files to 'temp' dir
+void _renameForBackup(const std::string& database, const Path& reservedPath) {
+ Path newPath(reservedPath);
+ if (storageGlobalParams.directoryperdb)
+ newPath /= database;
+ class Renamer : public FileOp {
+ public:
+ Renamer(const Path& newPath) : newPath_(newPath) {}
- // move temp files to standard data dir
- void _replaceWithRecovered( const string& database, const char *reservedPathString ) {
- Path newPath(storageGlobalParams.dbpath);
- if (storageGlobalParams.directoryperdb)
- newPath /= database;
- class Replacer : public FileOp {
- public:
- Replacer( const Path &newPath ) : newPath_( newPath ) {}
- private:
- const boost::filesystem::path &newPath_;
- virtual bool apply( const Path &p ) {
- if ( !boost::filesystem::exists( p ) )
- return false;
- boostRenameWrapper( p, newPath_ / p.leaf() );
- return true;
- }
- virtual const char * op() const {
- return "renaming";
- }
- } replacer( newPath );
- _applyOpToDataFiles( database, replacer, true, reservedPathString );
- }
+ private:
+ const boost::filesystem::path& newPath_;
+ virtual bool apply(const Path& p) {
+ if (!boost::filesystem::exists(p))
+ return false;
+ boostRenameWrapper(p, newPath_ / (p.leaf().string() + ".bak"));
+ return true;
+ }
+ virtual const char* op() const {
+ return "renaming";
+ }
+ } renamer(newPath);
+ _applyOpToDataFiles(database, renamer, true);
+}
- // generate a directory name for storing temp data files
- Path uniqueReservedPath( const char *prefix ) {
- Path repairPath = Path(storageGlobalParams.repairpath);
- Path reservedPath;
- int i = 0;
- bool exists = false;
- do {
- stringstream ss;
- ss << prefix << "_repairDatabase_" << i++;
- reservedPath = repairPath / ss.str();
- MONGO_ASSERT_ON_EXCEPTION( exists = boost::filesystem::exists( reservedPath ) );
+intmax_t dbSize(const string& database) {
+ class SizeAccumulator : public FileOp {
+ public:
+ SizeAccumulator() : totalSize_(0) {}
+ intmax_t size() const {
+ return totalSize_;
}
- while ( exists );
- return reservedPath;
- }
- void _applyOpToDataFiles( const string& database, FileOp &fo, bool afterAllocator, const string& path ) {
- if ( afterAllocator )
- FileAllocator::get()->waitUntilFinished();
- string c = database;
- c += '.';
- boost::filesystem::path p(path);
- if (storageGlobalParams.directoryperdb)
- p /= database;
- boost::filesystem::path q;
- q = p / (c+"ns");
- bool ok = false;
- MONGO_ASSERT_ON_EXCEPTION( ok = fo.apply( q ) );
- if ( ok ) {
- LOG(2) << fo.op() << " file " << q.string() << endl;
+ private:
+ virtual bool apply(const boost::filesystem::path& p) {
+ if (!boost::filesystem::exists(p))
+ return false;
+ totalSize_ += boost::filesystem::file_size(p);
+ return true;
}
- int i = 0;
- int extra = 10; // should not be necessary, this is defensive in case there are missing files
- while ( 1 ) {
- verify( i <= DiskLoc::MaxFiles );
- stringstream ss;
- ss << c << i;
- q = p / ss.str();
- MONGO_ASSERT_ON_EXCEPTION( ok = fo.apply(q) );
- if ( ok ) {
- if ( extra != 10 ) {
- LOG(1) << fo.op() << " file " << q.string() << endl;
- log() << " _applyOpToDataFiles() warning: extra == " << extra << endl;
- }
- }
- else if ( --extra <= 0 )
- break;
- i++;
+ virtual const char* op() const {
+ return "checking size";
}
- }
+ intmax_t totalSize_;
+ };
+ SizeAccumulator sa;
+ _applyOpToDataFiles(database, sa);
+ return sa.size();
+}
- class RepairFileDeleter {
+// move temp files to standard data dir
+void _replaceWithRecovered(const string& database, const char* reservedPathString) {
+ Path newPath(storageGlobalParams.dbpath);
+ if (storageGlobalParams.directoryperdb)
+ newPath /= database;
+ class Replacer : public FileOp {
public:
- RepairFileDeleter( OperationContext* txn,
- const string& dbName,
- const string& pathString,
- const Path& path )
- : _txn(txn),
- _dbName( dbName ),
- _pathString( pathString ),
- _path( path ),
- _success( false ) {
+ Replacer(const Path& newPath) : newPath_(newPath) {}
+
+ private:
+ const boost::filesystem::path& newPath_;
+ virtual bool apply(const Path& p) {
+ if (!boost::filesystem::exists(p))
+ return false;
+ boostRenameWrapper(p, newPath_ / p.leaf());
+ return true;
}
+ virtual const char* op() const {
+ return "renaming";
+ }
+ } replacer(newPath);
+ _applyOpToDataFiles(database, replacer, true, reservedPathString);
+}
+
+// generate a directory name for storing temp data files
+Path uniqueReservedPath(const char* prefix) {
+ Path repairPath = Path(storageGlobalParams.repairpath);
+ Path reservedPath;
+ int i = 0;
+ bool exists = false;
+ do {
+ stringstream ss;
+ ss << prefix << "_repairDatabase_" << i++;
+ reservedPath = repairPath / ss.str();
+ MONGO_ASSERT_ON_EXCEPTION(exists = boost::filesystem::exists(reservedPath));
+ } while (exists);
+ return reservedPath;
+}
- ~RepairFileDeleter() {
- if ( _success )
- return;
+void _applyOpToDataFiles(const string& database,
+ FileOp& fo,
+ bool afterAllocator,
+ const string& path) {
+ if (afterAllocator)
+ FileAllocator::get()->waitUntilFinished();
+ string c = database;
+ c += '.';
+ boost::filesystem::path p(path);
+ if (storageGlobalParams.directoryperdb)
+ p /= database;
+ boost::filesystem::path q;
+ q = p / (c + "ns");
+ bool ok = false;
+ MONGO_ASSERT_ON_EXCEPTION(ok = fo.apply(q));
+ if (ok) {
+ LOG(2) << fo.op() << " file " << q.string() << endl;
+ }
+ int i = 0;
+ int extra = 10; // should not be necessary, this is defensive in case there are missing files
+ while (1) {
+ verify(i <= DiskLoc::MaxFiles);
+ stringstream ss;
+ ss << c << i;
+ q = p / ss.str();
+ MONGO_ASSERT_ON_EXCEPTION(ok = fo.apply(q));
+ if (ok) {
+ if (extra != 10) {
+ LOG(1) << fo.op() << " file " << q.string() << endl;
+ log() << " _applyOpToDataFiles() warning: extra == " << extra << endl;
+ }
+ } else if (--extra <= 0)
+ break;
+ i++;
+ }
+}
- log() << "cleaning up failed repair "
- << "db: " << _dbName << " path: " << _pathString;
+class RepairFileDeleter {
+public:
+ RepairFileDeleter(OperationContext* txn,
+ const string& dbName,
+ const string& pathString,
+ const Path& path)
+ : _txn(txn), _dbName(dbName), _pathString(pathString), _path(path), _success(false) {}
- try {
- getDur().syncDataAndTruncateJournal(_txn);
+ ~RepairFileDeleter() {
+ if (_success)
+ return;
- // need both in case journaling is disabled
- MongoFile::flushAll(true);
+ log() << "cleaning up failed repair "
+ << "db: " << _dbName << " path: " << _pathString;
- MONGO_ASSERT_ON_EXCEPTION( boost::filesystem::remove_all( _path ) );
- }
- catch ( DBException& e ) {
- error() << "RepairFileDeleter failed to cleanup: " << e;
- error() << "aborting";
- fassertFailed( 17402 );
- }
- }
+ try {
+ getDur().syncDataAndTruncateJournal(_txn);
+
+ // need both in case journaling is disabled
+ MongoFile::flushAll(true);
- void success() {
- _success = true;
+ MONGO_ASSERT_ON_EXCEPTION(boost::filesystem::remove_all(_path));
+ } catch (DBException& e) {
+ error() << "RepairFileDeleter failed to cleanup: " << e;
+ error() << "aborting";
+ fassertFailed(17402);
}
+ }
- private:
- OperationContext* _txn;
- string _dbName;
- string _pathString;
- Path _path;
- bool _success;
- };
+ void success() {
+ _success = true;
+ }
- Status MMAPV1Engine::repairDatabase( OperationContext* txn,
- const std::string& dbName,
- bool preserveClonedFilesOnFailure,
- bool backupOriginalFiles ) {
- unique_ptr<RepairFileDeleter> repairFileDeleter;
+private:
+ OperationContext* _txn;
+ string _dbName;
+ string _pathString;
+ Path _path;
+ bool _success;
+};
+
+Status MMAPV1Engine::repairDatabase(OperationContext* txn,
+ const std::string& dbName,
+ bool preserveClonedFilesOnFailure,
+ bool backupOriginalFiles) {
+ unique_ptr<RepairFileDeleter> repairFileDeleter;
+
+ // Must be done before and after repair
+ getDur().syncDataAndTruncateJournal(txn);
+
+ intmax_t totalSize = dbSize(dbName);
+ intmax_t freeSize = File::freeSpace(storageGlobalParams.repairpath);
+
+ if (freeSize > -1 && freeSize < totalSize) {
+ return Status(ErrorCodes::OutOfDiskSpace,
+ str::stream()
+ << "Cannot repair database " << dbName << " having size: " << totalSize
+ << " (bytes) because free disk space is: " << freeSize << " (bytes)");
+ }
- // Must be done before and after repair
- getDur().syncDataAndTruncateJournal(txn);
+ txn->checkForInterrupt();
- intmax_t totalSize = dbSize( dbName );
- intmax_t freeSize = File::freeSpace(storageGlobalParams.repairpath);
+ Path reservedPath = uniqueReservedPath(
+ (preserveClonedFilesOnFailure || backupOriginalFiles) ? "backup" : "_tmp");
+ bool created = false;
+ MONGO_ASSERT_ON_EXCEPTION(created = boost::filesystem::create_directory(reservedPath));
+ invariant(created);
+ string reservedPathString = reservedPath.string();
- if ( freeSize > -1 && freeSize < totalSize ) {
- return Status( ErrorCodes::OutOfDiskSpace,
- str::stream() << "Cannot repair database " << dbName
- << " having size: " << totalSize
- << " (bytes) because free disk space is: " << freeSize << " (bytes)" );
- }
+ if (!preserveClonedFilesOnFailure)
+ repairFileDeleter.reset(
+ new RepairFileDeleter(txn, dbName, reservedPathString, reservedPath));
- txn->checkForInterrupt();
+ {
+ Database* originalDatabase = dbHolder().openDb(txn, dbName);
+ if (originalDatabase == NULL) {
+ return Status(ErrorCodes::NamespaceNotFound, "database does not exist to repair");
+ }
- Path reservedPath =
- uniqueReservedPath( ( preserveClonedFilesOnFailure || backupOriginalFiles ) ?
- "backup" : "_tmp" );
- bool created = false;
- MONGO_ASSERT_ON_EXCEPTION( created = boost::filesystem::create_directory( reservedPath ) );
- invariant( created );
- string reservedPathString = reservedPath.string();
+ unique_ptr<MMAPV1DatabaseCatalogEntry> dbEntry;
+ unique_ptr<Database> tempDatabase;
- if ( !preserveClonedFilesOnFailure )
- repairFileDeleter.reset( new RepairFileDeleter( txn,
- dbName,
- reservedPathString,
- reservedPath ) );
+ // Must call this before MMAPV1DatabaseCatalogEntry's destructor closes the DB files
+ ON_BLOCK_EXIT(&dur::DurableInterface::syncDataAndTruncateJournal, &getDur(), txn);
{
- Database* originalDatabase = dbHolder().openDb(txn, dbName);
- if (originalDatabase == NULL) {
- return Status(ErrorCodes::NamespaceNotFound, "database does not exist to repair");
- }
-
- unique_ptr<MMAPV1DatabaseCatalogEntry> dbEntry;
- unique_ptr<Database> tempDatabase;
+ dbEntry.reset(new MMAPV1DatabaseCatalogEntry(
+ txn, dbName, reservedPathString, storageGlobalParams.directoryperdb, true));
+ tempDatabase.reset(new Database(txn, dbName, dbEntry.get()));
+ }
- // Must call this before MMAPV1DatabaseCatalogEntry's destructor closes the DB files
- ON_BLOCK_EXIT(&dur::DurableInterface::syncDataAndTruncateJournal, &getDur(), txn);
+ map<string, CollectionOptions> namespacesToCopy;
+ {
+ string ns = dbName + ".system.namespaces";
+ OldClientContext ctx(txn, ns);
+ Collection* coll = originalDatabase->getCollection(ns);
+ if (coll) {
+ auto cursor = coll->getCursor(txn);
+ while (auto record = cursor->next()) {
+ BSONObj obj = record->data.releaseToBson();
- {
- dbEntry.reset(new MMAPV1DatabaseCatalogEntry(txn,
- dbName,
- reservedPathString,
- storageGlobalParams.directoryperdb,
- true));
- tempDatabase.reset( new Database(txn, dbName, dbEntry.get()));
- }
+ string ns = obj["name"].String();
- map<string,CollectionOptions> namespacesToCopy;
- {
- string ns = dbName + ".system.namespaces";
- OldClientContext ctx(txn, ns );
- Collection* coll = originalDatabase->getCollection( ns );
- if ( coll ) {
- auto cursor = coll->getCursor(txn);
- while (auto record = cursor->next()) {
- BSONObj obj = record->data.releaseToBson();
-
- string ns = obj["name"].String();
-
- NamespaceString nss( ns );
- if ( nss.isSystem() ) {
- if ( nss.isSystemDotIndexes() )
- continue;
- if ( nss.coll() == "system.namespaces" )
- continue;
- }
-
- if ( !nss.isNormal() )
+ NamespaceString nss(ns);
+ if (nss.isSystem()) {
+ if (nss.isSystemDotIndexes())
continue;
+ if (nss.coll() == "system.namespaces")
+ continue;
+ }
+
+ if (!nss.isNormal())
+ continue;
- CollectionOptions options;
- if ( obj["options"].isABSONObj() ) {
- Status status = options.parse( obj["options"].Obj() );
- if ( !status.isOK() )
- return status;
- }
- namespacesToCopy[ns] = options;
+ CollectionOptions options;
+ if (obj["options"].isABSONObj()) {
+ Status status = options.parse(obj["options"].Obj());
+ if (!status.isOK())
+ return status;
}
+ namespacesToCopy[ns] = options;
}
}
+ }
- for ( map<string,CollectionOptions>::const_iterator i = namespacesToCopy.begin();
- i != namespacesToCopy.end();
- ++i ) {
- string ns = i->first;
- CollectionOptions options = i->second;
-
- Collection* tempCollection = NULL;
- {
- WriteUnitOfWork wunit(txn);
- tempCollection = tempDatabase->createCollection(txn, ns, options, false);
- wunit.commit();
- }
+ for (map<string, CollectionOptions>::const_iterator i = namespacesToCopy.begin();
+ i != namespacesToCopy.end();
+ ++i) {
+ string ns = i->first;
+ CollectionOptions options = i->second;
- OldClientContext readContext(txn, ns, originalDatabase);
- Collection* originalCollection = originalDatabase->getCollection( ns );
- invariant( originalCollection );
-
- // data
-
- // TODO SERVER-14812 add a mode that drops duplicates rather than failing
- MultiIndexBlock indexer(txn, tempCollection );
- {
- vector<BSONObj> indexes;
- IndexCatalog::IndexIterator ii =
- originalCollection->getIndexCatalog()->getIndexIterator( txn, false );
- while ( ii.more() ) {
- IndexDescriptor* desc = ii.next();
- indexes.push_back( desc->infoObj() );
- }
+ Collection* tempCollection = NULL;
+ {
+ WriteUnitOfWork wunit(txn);
+ tempCollection = tempDatabase->createCollection(txn, ns, options, false);
+ wunit.commit();
+ }
- Status status = indexer.init( indexes );
- if (!status.isOK()) {
- return status;
- }
- }
+ OldClientContext readContext(txn, ns, originalDatabase);
+ Collection* originalCollection = originalDatabase->getCollection(ns);
+ invariant(originalCollection);
- auto cursor = originalCollection->getCursor(txn);
- while (auto record = cursor->next()) {
- BSONObj doc = record->data.releaseToBson();
-
- WriteUnitOfWork wunit(txn);
- StatusWith<RecordId> result = tempCollection->insertDocument(txn,
- doc,
- &indexer,
- false);
- if ( !result.isOK() )
- return result.getStatus();
-
- wunit.commit();
- txn->checkForInterrupt();
- }
-
- Status status = indexer.doneInserting();
- if (!status.isOK())
- return status;
+ // data
- {
- WriteUnitOfWork wunit(txn);
- indexer.commit();
- wunit.commit();
+ // TODO SERVER-14812 add a mode that drops duplicates rather than failing
+ MultiIndexBlock indexer(txn, tempCollection);
+ {
+ vector<BSONObj> indexes;
+ IndexCatalog::IndexIterator ii =
+ originalCollection->getIndexCatalog()->getIndexIterator(txn, false);
+ while (ii.more()) {
+ IndexDescriptor* desc = ii.next();
+ indexes.push_back(desc->infoObj());
}
+ Status status = indexer.init(indexes);
+ if (!status.isOK()) {
+ return status;
+ }
}
- getDur().syncDataAndTruncateJournal(txn);
+ auto cursor = originalCollection->getCursor(txn);
+ while (auto record = cursor->next()) {
+ BSONObj doc = record->data.releaseToBson();
- // need both in case journaling is disabled
- MongoFile::flushAll(true);
+ WriteUnitOfWork wunit(txn);
+ StatusWith<RecordId> result =
+ tempCollection->insertDocument(txn, doc, &indexer, false);
+ if (!result.isOK())
+ return result.getStatus();
+
+ wunit.commit();
+ txn->checkForInterrupt();
+ }
+
+ Status status = indexer.doneInserting();
+ if (!status.isOK())
+ return status;
- txn->checkForInterrupt();
+ {
+ WriteUnitOfWork wunit(txn);
+ indexer.commit();
+ wunit.commit();
+ }
}
- // at this point if we abort, we don't want to delete new files
- // as they might be the only copies
+ getDur().syncDataAndTruncateJournal(txn);
- if ( repairFileDeleter.get() )
- repairFileDeleter->success();
+ // need both in case journaling is disabled
+ MongoFile::flushAll(true);
- // Close the database so we can rename/delete the original data files
- dbHolder().close(txn, dbName);
+ txn->checkForInterrupt();
+ }
- if ( backupOriginalFiles ) {
- _renameForBackup( dbName, reservedPath );
- }
- else {
- // first make new directory before deleting data
- Path newDir = Path(storageGlobalParams.dbpath) / dbName;
- MONGO_ASSERT_ON_EXCEPTION(boost::filesystem::create_directory(newDir));
+ // at this point if we abort, we don't want to delete new files
+ // as they might be the only copies
- // this deletes old files
- _deleteDataFiles( dbName );
+ if (repairFileDeleter.get())
+ repairFileDeleter->success();
- if ( !boost::filesystem::exists(newDir) ) {
- // we deleted because of directoryperdb
- // re-create
- MONGO_ASSERT_ON_EXCEPTION(boost::filesystem::create_directory(newDir));
- }
- }
+ // Close the database so we can rename/delete the original data files
+ dbHolder().close(txn, dbName);
+
+ if (backupOriginalFiles) {
+ _renameForBackup(dbName, reservedPath);
+ } else {
+ // first make new directory before deleting data
+ Path newDir = Path(storageGlobalParams.dbpath) / dbName;
+ MONGO_ASSERT_ON_EXCEPTION(boost::filesystem::create_directory(newDir));
- _replaceWithRecovered( dbName, reservedPathString.c_str() );
+ // this deletes old files
+ _deleteDataFiles(dbName);
- if (!backupOriginalFiles) {
- MONGO_ASSERT_ON_EXCEPTION(boost::filesystem::remove_all(reservedPath));
+ if (!boost::filesystem::exists(newDir)) {
+ // we deleted because of directoryperdb
+ // re-create
+ MONGO_ASSERT_ON_EXCEPTION(boost::filesystem::create_directory(newDir));
}
+ }
- // Reopen the database so it's discoverable
- dbHolder().openDb(txn, dbName);
+ _replaceWithRecovered(dbName, reservedPathString.c_str());
- return Status::OK();
+ if (!backupOriginalFiles) {
+ MONGO_ASSERT_ON_EXCEPTION(boost::filesystem::remove_all(reservedPath));
}
+ // Reopen the database so it's discoverable
+ dbHolder().openDb(txn, dbName);
+ return Status::OK();
+}
}
diff --git a/src/mongo/db/storage/oplog_hack.cpp b/src/mongo/db/storage/oplog_hack.cpp
index b25268e4271..e42946e5611 100644
--- a/src/mongo/db/storage/oplog_hack.cpp
+++ b/src/mongo/db/storage/oplog_hack.cpp
@@ -42,40 +42,40 @@
namespace mongo {
namespace oploghack {
- StatusWith<RecordId> keyForOptime(const Timestamp& opTime) {
- // Make sure secs and inc wouldn't be negative if treated as signed. This ensures that they
- // don't sort differently when put in a RecordId. It also avoids issues with Null/Invalid
- // RecordIds
- if (opTime.getSecs() > uint32_t(std::numeric_limits<int32_t>::max()))
- return StatusWith<RecordId>(ErrorCodes::BadValue, "ts secs too high");
+StatusWith<RecordId> keyForOptime(const Timestamp& opTime) {
+ // Make sure secs and inc wouldn't be negative if treated as signed. This ensures that they
+ // don't sort differently when put in a RecordId. It also avoids issues with Null/Invalid
+ // RecordIds
+ if (opTime.getSecs() > uint32_t(std::numeric_limits<int32_t>::max()))
+ return StatusWith<RecordId>(ErrorCodes::BadValue, "ts secs too high");
- if (opTime.getInc() > uint32_t(std::numeric_limits<int32_t>::max()))
- return StatusWith<RecordId>(ErrorCodes::BadValue, "ts inc too high");
+ if (opTime.getInc() > uint32_t(std::numeric_limits<int32_t>::max()))
+ return StatusWith<RecordId>(ErrorCodes::BadValue, "ts inc too high");
- const RecordId out = RecordId(opTime.getSecs(), opTime.getInc());
- if (out <= RecordId::min())
- return StatusWith<RecordId>(ErrorCodes::BadValue, "ts too low");
- if (out >= RecordId::max())
- return StatusWith<RecordId>(ErrorCodes::BadValue, "ts too high");
+ const RecordId out = RecordId(opTime.getSecs(), opTime.getInc());
+ if (out <= RecordId::min())
+ return StatusWith<RecordId>(ErrorCodes::BadValue, "ts too low");
+ if (out >= RecordId::max())
+ return StatusWith<RecordId>(ErrorCodes::BadValue, "ts too high");
- return StatusWith<RecordId>(out);
- }
+ return StatusWith<RecordId>(out);
+}
- /**
- * data and len must be the arguments from RecordStore::insert() on an oplog collection.
- */
- StatusWith<RecordId> extractKey(const char* data, int len) {
- DEV invariant(validateBSON(data, len).isOK());
+/**
+ * data and len must be the arguments from RecordStore::insert() on an oplog collection.
+ */
+StatusWith<RecordId> extractKey(const char* data, int len) {
+ DEV invariant(validateBSON(data, len).isOK());
- const BSONObj obj(data);
- const BSONElement elem = obj["ts"];
- if (elem.eoo())
- return StatusWith<RecordId>(ErrorCodes::BadValue, "no ts field");
- if (elem.type() != bsonTimestamp)
- return StatusWith<RecordId>(ErrorCodes::BadValue, "ts must be a Timestamp");
+ const BSONObj obj(data);
+ const BSONElement elem = obj["ts"];
+ if (elem.eoo())
+ return StatusWith<RecordId>(ErrorCodes::BadValue, "no ts field");
+ if (elem.type() != bsonTimestamp)
+ return StatusWith<RecordId>(ErrorCodes::BadValue, "ts must be a Timestamp");
- return keyForOptime(elem.timestamp());
- }
+ return keyForOptime(elem.timestamp());
+}
} // namespace oploghack
-} // namespace mongo
+} // namespace mongo
diff --git a/src/mongo/db/storage/oplog_hack.h b/src/mongo/db/storage/oplog_hack.h
index 92138959304..11f9e07264f 100644
--- a/src/mongo/db/storage/oplog_hack.h
+++ b/src/mongo/db/storage/oplog_hack.h
@@ -32,21 +32,21 @@
#include "mongo/base/status_with.h"
namespace mongo {
- class RecordId;
- class Timestamp;
+class RecordId;
+class Timestamp;
namespace oploghack {
- /**
- * Converts Timestamp to a RecordId in an unspecified manor that is safe to use as the key to
- * in a RecordStore.
- */
- StatusWith<RecordId> keyForOptime(const Timestamp& opTime);
+/**
+ * Converts Timestamp to a RecordId in an unspecified manor that is safe to use as the key to
+ * in a RecordStore.
+ */
+StatusWith<RecordId> keyForOptime(const Timestamp& opTime);
- /**
- * data and len must be the arguments from RecordStore::insert() on an oplog collection.
- */
- StatusWith<RecordId> extractKey(const char* data, int len);
+/**
+ * data and len must be the arguments from RecordStore::insert() on an oplog collection.
+ */
+StatusWith<RecordId> extractKey(const char* data, int len);
} // namespace oploghack
} // namespace mongo
diff --git a/src/mongo/db/storage/paths.cpp b/src/mongo/db/storage/paths.cpp
index cb2913c6b06..2f6fb4d4a77 100644
--- a/src/mongo/db/storage/paths.cpp
+++ b/src/mongo/db/storage/paths.cpp
@@ -35,79 +35,79 @@
namespace mongo {
- /** from a full path */
- RelativePath RelativePath::fromFullPath(boost::filesystem::path dbp,
- boost::filesystem::path f) {
- // filesystem::path normalizes / and backslash
- std::string fullpath = f.string();
- std::string relative = str::after(fullpath, dbp.string());
- if( relative.empty() ) {
- log() << "warning file is not under db path? " << fullpath << ' ' << dbp.string();
- RelativePath rp;
- rp._p = fullpath;
- return rp;
- }
- if( str::startsWith(relative, "/") || str::startsWith(relative, "\\") ) {
- relative.erase(0, 1);
- }
+/** from a full path */
+RelativePath RelativePath::fromFullPath(boost::filesystem::path dbp, boost::filesystem::path f) {
+ // filesystem::path normalizes / and backslash
+ std::string fullpath = f.string();
+ std::string relative = str::after(fullpath, dbp.string());
+ if (relative.empty()) {
+ log() << "warning file is not under db path? " << fullpath << ' ' << dbp.string();
RelativePath rp;
- rp._p = relative;
+ rp._p = fullpath;
return rp;
}
+ if (str::startsWith(relative, "/") || str::startsWith(relative, "\\")) {
+ relative.erase(0, 1);
+ }
+ RelativePath rp;
+ rp._p = relative;
+ return rp;
+}
- dev_t getPartition(const std::string& path){
- struct stat stats;
-
- if (stat(path.c_str(), &stats) != 0){
- uasserted(13646, str::stream() << "stat() failed for file: " << path << " " << errnoWithDescription());
- }
+dev_t getPartition(const std::string& path) {
+ struct stat stats;
- return stats.st_dev;
+ if (stat(path.c_str(), &stats) != 0) {
+ uasserted(13646,
+ str::stream() << "stat() failed for file: " << path << " "
+ << errnoWithDescription());
}
- void flushMyDirectory(const boost::filesystem::path& file) {
-#ifdef __linux__ // this isn't needed elsewhere
- static bool _warnedAboutFilesystem = false;
- // if called without a fully qualified path it asserts; that makes mongoperf fail.
- // so make a warning. need a better solution longer term.
- // massert(13652, str::stream() << "Couldn't find parent dir for file: " << file.string(),);
- if (!file.has_branch_path()) {
- log() << "warning flushMyDirectory couldn't find parent dir for file: "
- << file.string();
- return;
- }
+ return stats.st_dev;
+}
+
+void flushMyDirectory(const boost::filesystem::path& file) {
+#ifdef __linux__ // this isn't needed elsewhere
+ static bool _warnedAboutFilesystem = false;
+ // if called without a fully qualified path it asserts; that makes mongoperf fail.
+ // so make a warning. need a better solution longer term.
+ // massert(13652, str::stream() << "Couldn't find parent dir for file: " << file.string(),);
+ if (!file.has_branch_path()) {
+ log() << "warning flushMyDirectory couldn't find parent dir for file: " << file.string();
+ return;
+ }
- boost::filesystem::path dir = file.branch_path(); // parent_path in new boosts
+ boost::filesystem::path dir = file.branch_path(); // parent_path in new boosts
- LOG(1) << "flushing directory " << dir.string();
+ LOG(1) << "flushing directory " << dir.string();
- int fd = ::open(dir.string().c_str(), O_RDONLY); // DO NOT THROW OR ASSERT BEFORE CLOSING
- massert(13650, str::stream() << "Couldn't open directory '" << dir.string()
- << "' for flushing: " << errnoWithDescription(),
- fd >= 0);
- if (fsync(fd) != 0) {
- int e = errno;
- if (e == EINVAL) { // indicates filesystem does not support synchronization
- if (!_warnedAboutFilesystem) {
- log() << "\tWARNING: This file system is not supported. For further information"
- << " see:"
- << startupWarningsLog;
- log() << "\t\t\thttp://dochub.mongodb.org/core/unsupported-filesystems"
- << startupWarningsLog;
- log() << "\t\tPlease notify MongoDB, Inc. if an unlisted filesystem generated "
- << "this warning." << startupWarningsLog;
- _warnedAboutFilesystem = true;
- }
- }
- else {
- close(fd);
- massert(13651, str::stream() << "Couldn't fsync directory '" << dir.string()
- << "': " << errnoWithDescription(e),
- false);
+ int fd = ::open(dir.string().c_str(), O_RDONLY); // DO NOT THROW OR ASSERT BEFORE CLOSING
+ massert(13650,
+ str::stream() << "Couldn't open directory '" << dir.string()
+ << "' for flushing: " << errnoWithDescription(),
+ fd >= 0);
+ if (fsync(fd) != 0) {
+ int e = errno;
+ if (e == EINVAL) { // indicates filesystem does not support synchronization
+ if (!_warnedAboutFilesystem) {
+ log() << "\tWARNING: This file system is not supported. For further information"
+ << " see:" << startupWarningsLog;
+ log() << "\t\t\thttp://dochub.mongodb.org/core/unsupported-filesystems"
+ << startupWarningsLog;
+ log() << "\t\tPlease notify MongoDB, Inc. if an unlisted filesystem generated "
+ << "this warning." << startupWarningsLog;
+ _warnedAboutFilesystem = true;
}
+ } else {
+ close(fd);
+ massert(13651,
+ str::stream() << "Couldn't fsync directory '" << dir.string()
+ << "': " << errnoWithDescription(e),
+ false);
}
- close(fd);
-#endif
}
+ close(fd);
+#endif
+}
}
diff --git a/src/mongo/db/storage/paths.h b/src/mongo/db/storage/paths.h
index 8286c920566..e662ab41bf9 100644
--- a/src/mongo/db/storage/paths.h
+++ b/src/mongo/db/storage/paths.h
@@ -40,54 +40,61 @@
#include "mongo/db/storage_options.h"
namespace mongo {
-
- using namespace mongoutils;
-
- /** this is very much like a boost::path. however, we define a new type to get some type
- checking. if you want to say 'my param MUST be a relative path", use this.
- */
- struct RelativePath {
- std::string _p;
-
- bool empty() const { return _p.empty(); }
-
- static RelativePath fromRelativePath(const std::string& f) {
- RelativePath rp;
- rp._p = f;
- return rp;
- }
-
- /**
- * Returns path relative to 'dbpath' from a full path 'f'.
- */
- static RelativePath fromFullPath(boost::filesystem::path dbpath,
- boost::filesystem::path f);
-
- std::string toString() const { return _p; }
-
- bool operator!=(const RelativePath& r) const { return _p != r._p; }
- bool operator==(const RelativePath& r) const { return _p == r._p; }
- bool operator<(const RelativePath& r) const { return _p < r._p; }
-
- std::string asFullPath() const {
- boost::filesystem::path x(storageGlobalParams.dbpath);
- x /= _p;
- return x.string();
- }
-
- };
-
- dev_t getPartition(const std::string& path);
-
- inline bool onSamePartition(const std::string& path1, const std::string& path2){
- dev_t dev1 = getPartition(path1);
- dev_t dev2 = getPartition(path2);
-
- return dev1 == dev2;
+
+using namespace mongoutils;
+
+/** this is very much like a boost::path. however, we define a new type to get some type
+ checking. if you want to say 'my param MUST be a relative path", use this.
+*/
+struct RelativePath {
+ std::string _p;
+
+ bool empty() const {
+ return _p.empty();
+ }
+
+ static RelativePath fromRelativePath(const std::string& f) {
+ RelativePath rp;
+ rp._p = f;
+ return rp;
}
- void flushMyDirectory(const boost::filesystem::path& file);
+ /**
+ * Returns path relative to 'dbpath' from a full path 'f'.
+ */
+ static RelativePath fromFullPath(boost::filesystem::path dbpath, boost::filesystem::path f);
+
+ std::string toString() const {
+ return _p;
+ }
+
+ bool operator!=(const RelativePath& r) const {
+ return _p != r._p;
+ }
+ bool operator==(const RelativePath& r) const {
+ return _p == r._p;
+ }
+ bool operator<(const RelativePath& r) const {
+ return _p < r._p;
+ }
+
+ std::string asFullPath() const {
+ boost::filesystem::path x(storageGlobalParams.dbpath);
+ x /= _p;
+ return x.string();
+ }
+};
+
+dev_t getPartition(const std::string& path);
+
+inline bool onSamePartition(const std::string& path1, const std::string& path2) {
+ dev_t dev1 = getPartition(path1);
+ dev_t dev2 = getPartition(path2);
+
+ return dev1 == dev2;
+}
- boost::filesystem::path ensureParentDirCreated(const boost::filesystem::path& p);
+void flushMyDirectory(const boost::filesystem::path& file);
+boost::filesystem::path ensureParentDirCreated(const boost::filesystem::path& p);
}
diff --git a/src/mongo/db/storage/record_data.h b/src/mongo/db/storage/record_data.h
index 612408f84c6..03409d911ea 100644
--- a/src/mongo/db/storage/record_data.h
+++ b/src/mongo/db/storage/record_data.h
@@ -35,58 +35,69 @@
namespace mongo {
- // TODO: Does this need to have move support?
- /**
- * A replacement for the Record class. This class represents data in a record store.
- * The _dataPtr attribute is used to manage memory ownership. If _dataPtr is NULL, then
- * the memory pointed to by _data is owned by the RecordStore. If _dataPtr is not NULL, then
- * it must point to the same array as _data.
- */
- class RecordData {
- public:
- RecordData() : _data( NULL ), _size( 0 ) {}
- RecordData(const char* data, int size): _data(data), _size(size) { }
-
- RecordData(SharedBuffer ownedData, int size)
- : _data(ownedData.get()), _size(size), _ownedData(std::move(ownedData)) {
- }
-
- const char* data() const { return _data; }
-
- int size() const { return _size; }
-
- /**
- * Returns true if this owns its own memory, and false otherwise
- */
- bool isOwned() const { return _ownedData.get(); }
-
- SharedBuffer releaseBuffer() {
- return std::move(_ownedData);
- }
-
- BSONObj toBson() const { return isOwned() ? BSONObj(_ownedData) : BSONObj(_data); }
-
- BSONObj releaseToBson() { return isOwned() ? BSONObj(releaseBuffer()) : BSONObj(_data); }
-
- // TODO uncomment once we require compilers that support overloading for rvalue this.
- // BSONObj toBson() && { return releaseToBson(); }
+// TODO: Does this need to have move support?
+/**
+ * A replacement for the Record class. This class represents data in a record store.
+ * The _dataPtr attribute is used to manage memory ownership. If _dataPtr is NULL, then
+ * the memory pointed to by _data is owned by the RecordStore. If _dataPtr is not NULL, then
+ * it must point to the same array as _data.
+ */
+class RecordData {
+public:
+ RecordData() : _data(NULL), _size(0) {}
+ RecordData(const char* data, int size) : _data(data), _size(size) {}
- RecordData getOwned() const {
- if (isOwned()) return *this;
- auto buffer = SharedBuffer::allocate(_size);
- memcpy(buffer.get(), _data, _size);
- return RecordData(buffer, _size);
- }
+ RecordData(SharedBuffer ownedData, int size)
+ : _data(ownedData.get()), _size(size), _ownedData(std::move(ownedData)) {}
- void makeOwned() {
- if (isOwned()) return;
- *this = getOwned();
- }
+ const char* data() const {
+ return _data;
+ }
- private:
- const char* _data;
- int _size;
- SharedBuffer _ownedData;
- };
+ int size() const {
+ return _size;
+ }
-} // namespace mongo
+ /**
+ * Returns true if this owns its own memory, and false otherwise
+ */
+ bool isOwned() const {
+ return _ownedData.get();
+ }
+
+ SharedBuffer releaseBuffer() {
+ return std::move(_ownedData);
+ }
+
+ BSONObj toBson() const {
+ return isOwned() ? BSONObj(_ownedData) : BSONObj(_data);
+ }
+
+ BSONObj releaseToBson() {
+ return isOwned() ? BSONObj(releaseBuffer()) : BSONObj(_data);
+ }
+
+ // TODO uncomment once we require compilers that support overloading for rvalue this.
+ // BSONObj toBson() && { return releaseToBson(); }
+
+ RecordData getOwned() const {
+ if (isOwned())
+ return *this;
+ auto buffer = SharedBuffer::allocate(_size);
+ memcpy(buffer.get(), _data, _size);
+ return RecordData(buffer, _size);
+ }
+
+ void makeOwned() {
+ if (isOwned())
+ return;
+ *this = getOwned();
+ }
+
+private:
+ const char* _data;
+ int _size;
+ SharedBuffer _ownedData;
+};
+
+} // namespace mongo
diff --git a/src/mongo/db/storage/record_fetcher.h b/src/mongo/db/storage/record_fetcher.h
index 9255e805b10..66c626ea4d5 100644
--- a/src/mongo/db/storage/record_fetcher.h
+++ b/src/mongo/db/storage/record_fetcher.h
@@ -30,26 +30,26 @@
namespace mongo {
+/**
+ * Used for yielding while data is fetched from disk.
+ *
+ * @see RecordStore::recordNeedsFetch
+ */
+class RecordFetcher {
+public:
+ virtual ~RecordFetcher() {}
+
/**
- * Used for yielding while data is fetched from disk.
- *
- * @see RecordStore::recordNeedsFetch
+ * Performs any setup which is needed prior to yielding locks.
*/
- class RecordFetcher {
- public:
- virtual ~RecordFetcher() { }
+ virtual void setup() = 0;
- /**
- * Performs any setup which is needed prior to yielding locks.
- */
- virtual void setup() = 0;
-
- /**
- * Called after locks are yielded in order to bring data into memory.
- *
- * Should not be called more than once.
- */
- virtual void fetch() = 0;
- };
+ /**
+ * Called after locks are yielded in order to bring data into memory.
+ *
+ * Should not be called more than once.
+ */
+ virtual void fetch() = 0;
+};
-} // namespace mongo
+} // namespace mongo
diff --git a/src/mongo/db/storage/record_store.h b/src/mongo/db/storage/record_store.h
index 5f4285995df..1f5a3f27556 100644
--- a/src/mongo/db/storage/record_store.h
+++ b/src/mongo/db/storage/record_store.h
@@ -41,491 +41,504 @@
namespace mongo {
- class CappedDocumentDeleteCallback;
- class Collection;
- struct CompactOptions;
- struct CompactStats;
- class DocWriter;
- class MAdvise;
- class NamespaceDetails;
- class OperationContext;
- class RecordFetcher;
-
- class RecordStoreCompactAdaptor;
- class RecordStore;
-
- struct ValidateResults;
- class ValidateAdaptor;
+class CappedDocumentDeleteCallback;
+class Collection;
+struct CompactOptions;
+struct CompactStats;
+class DocWriter;
+class MAdvise;
+class NamespaceDetails;
+class OperationContext;
+class RecordFetcher;
+
+class RecordStoreCompactAdaptor;
+class RecordStore;
+
+struct ValidateResults;
+class ValidateAdaptor;
+
+/**
+ * Allows inserting a Record "in-place" without creating a copy ahead of time.
+ */
+class DocWriter {
+public:
+ virtual ~DocWriter() {}
+ virtual void writeDocument(char* buf) const = 0;
+ virtual size_t documentSize() const = 0;
+ virtual bool addPadding() const {
+ return true;
+ }
+};
+
+/**
+ * @see RecordStore::updateRecord
+ */
+class UpdateNotifier {
+public:
+ virtual ~UpdateNotifier() {}
+ virtual Status recordStoreGoingToMove(OperationContext* txn,
+ const RecordId& oldLocation,
+ const char* oldBuffer,
+ size_t oldSize) = 0;
+ virtual Status recordStoreGoingToUpdateInPlace(OperationContext* txn, const RecordId& loc) = 0;
+};
+
+/**
+ * The data items stored in a RecordStore.
+ */
+struct Record {
+ RecordId id;
+ RecordData data;
+};
+
+/**
+ * Retrieves Records from a RecordStore.
+ *
+ * A cursor is constructed with a direction flag with the following effects:
+ * - The direction that next() moves.
+ * - If a restore cannot return to the saved position, cursors will be positioned on the
+ * closest position *after* the query in the direction of the scan.
+ *
+ * A cursor is tied to a transaction, such as the OperationContext or a WriteUnitOfWork
+ * inside that context. Any cursor acquired inside a transaction is invalid outside
+ * of that transaction, instead use the save and restore methods to reestablish the cursor.
+ *
+ * Any method other than invalidate and the save methods may throw WriteConflict exception. If
+ * that happens, the cursor may not be used again until it has been saved and successfully
+ * restored. If next() or restore() throw a WCE the cursor's position will be the same as before
+ * the call (strong exception guarantee). All other methods leave the cursor in a valid state
+ * but with an unspecified position (basic exception guarantee). If any exception other than
+ * WCE is thrown, the cursor must be destroyed, which is guaranteed not to leak any resources.
+ *
+ * Any returned unowned BSON is only valid until the next call to any method on this
+ * interface.
+ *
+ * Implementations may override any default implementation if they can provide a more
+ * efficient implementation.
+ */
+class RecordCursor {
+public:
+ virtual ~RecordCursor() = default;
/**
- * Allows inserting a Record "in-place" without creating a copy ahead of time.
+ * Moves forward and returns the new data or boost::none if there is no more data.
+ * Continues returning boost::none once it reaches EOF.
*/
- class DocWriter {
- public:
- virtual ~DocWriter() {}
- virtual void writeDocument( char* buf ) const = 0;
- virtual size_t documentSize() const = 0;
- virtual bool addPadding() const { return true; }
- };
+ virtual boost::optional<Record> next() = 0;
+
+ //
+ // Seeking
+ //
+ // Warning: MMAPv1 cannot detect if RecordIds are valid. Therefore callers should only pass
+ // potentially deleted RecordIds to seek methods if they know that MMAPv1 is not the current
+ // storage engine. All new storage engines must support detecting the existence of Records.
+ //
/**
- * @see RecordStore::updateRecord
+ * Seeks to a Record with the provided id.
+ *
+ * If an exact match can't be found, boost::none will be returned and the resulting position
+ * of the cursor is unspecified.
+ */
+ virtual boost::optional<Record> seekExact(const RecordId& id) = 0;
+
+ //
+ // Saving and restoring state
+ //
+
+ /**
+ * Prepares for state changes in underlying data in a way that allows the cursor's
+ * current position to be restored.
+ *
+ * It is safe to call savePositioned multiple times in a row.
+ * No other method (excluding destructor) may be called until successfully restored.
*/
- class UpdateNotifier {
- public:
- virtual ~UpdateNotifier(){}
- virtual Status recordStoreGoingToMove( OperationContext* txn,
- const RecordId& oldLocation,
- const char* oldBuffer,
- size_t oldSize ) = 0;
- virtual Status recordStoreGoingToUpdateInPlace( OperationContext* txn,
- const RecordId& loc ) = 0;
- };
+ virtual void savePositioned() = 0;
/**
- * The data items stored in a RecordStore.
+ * Prepares for state changes in underlying data without necessarily saving the current
+ * state.
+ *
+ * The cursor's position when restored is unspecified. Caller is expected to seek rather
+ * than call next() following the restore.
+ *
+ * It is safe to call saveUnpositioned multiple times in a row.
+ * No other method (excluding destructor) may be called until successfully restored.
+ */
+ virtual void saveUnpositioned() {
+ savePositioned();
+ }
+
+ /**
+ * Recovers from potential state changes in underlying data.
+ *
+ * Returns false if it is invalid to continue using this iterator. This usually means that
+ * capped deletes have caught up to the position of this iterator and continuing could
+ * result in missed data.
+ *
+ * If the former position no longer exists, but it is safe to continue iterating, the
+ * following call to next() will return the next closest position in the direction of the
+ * scan, if any.
+ *
+ * This handles restoring after either savePositioned() or saveUnpositioned().
+ */
+ virtual bool restore(OperationContext* txn) = 0;
+
+ /**
+ * Inform the cursor that this id is being invalidated.
+ * Must be called between save and restore.
+ *
+ * WARNING: Storage engines other than MMAPv1 should not depend on this being called.
+ */
+ virtual void invalidate(const RecordId& id){};
+
+ //
+ // RecordFetchers
+ //
+ // Storage engines which do not support document-level locking hold locks at collection or
+ // database granularity. As an optimization, these locks can be yielded when a record needs
+ // to be fetched from secondary storage. If this method returns non-NULL, then it indicates
+ // that the query system layer should yield its locks, following the protocol defined by the
+ // RecordFetcher class, so that a potential page fault is triggered out of the lock.
+ //
+ // Storage engines which support document-level locking need not implement this.
+ //
+ // TODO see if these can be replaced by WriteConflictException.
+ //
+
+ /**
+ * Returns a RecordFetcher if needed for a call to next() or none if unneeded.
+ */
+ virtual std::unique_ptr<RecordFetcher> fetcherForNext() const {
+ return {};
+ }
+
+ /**
+ * Returns a RecordFetcher if needed to fetch the provided Record or none if unneeded.
+ */
+ virtual std::unique_ptr<RecordFetcher> fetcherForId(const RecordId& id) const {
+ return {};
+ }
+};
+
+/**
+ * A RecordStore provides an abstraction used for storing documents in a collection,
+ * or entries in an index. In storage engines implementing the KVEngine, record stores
+ * are also used for implementing catalogs.
+ *
+ * Many methods take an OperationContext parameter. This contains the RecoveryUnit, with
+ * all RecordStore specific transaction information, as well as the LockState. Methods that take
+ * an OperationContext may throw a WriteConflictException.
+ */
+class RecordStore {
+ MONGO_DISALLOW_COPYING(RecordStore);
+
+public:
+ RecordStore(StringData ns) : _ns(ns.toString()) {}
+
+ virtual ~RecordStore() {}
+
+ // META
+
+ // name of the RecordStore implementation
+ virtual const char* name() const = 0;
+
+ virtual const std::string& ns() const {
+ return _ns;
+ }
+
+ /**
+ * The dataSize is an approximation of the sum of the sizes (in bytes) of the
+ * documents or entries in the recordStore.
+ */
+ virtual long long dataSize(OperationContext* txn) const = 0;
+
+ /**
+ * Total number of record in the RecordStore. You may need to cache it, so this call
+ * takes constant time, as it is called often.
+ */
+ virtual long long numRecords(OperationContext* txn) const = 0;
+
+ virtual bool isCapped() const = 0;
+
+ virtual void setCappedDeleteCallback(CappedDocumentDeleteCallback*) {
+ invariant(false);
+ }
+
+ /**
+ * @param extraInfo - optional more debug info
+ * @param level - optional, level of debug info to put in (higher is more)
+ * @return total estimate size (in bytes) on stable storage
+ */
+ virtual int64_t storageSize(OperationContext* txn,
+ BSONObjBuilder* extraInfo = NULL,
+ int infoLevel = 0) const = 0;
+
+ // CRUD related
+
+ /**
+ * Get the RecordData at loc, which must exist.
+ *
+ * If unowned data is returned, it is valid until the next modification of this Record or
+ * the lock on this collection is released.
+ *
+ * In general, prefer findRecord or RecordCursor::seekExact since they can tell you if a
+ * record has been removed.
*/
- struct Record {
- RecordId id;
+ virtual RecordData dataFor(OperationContext* txn, const RecordId& loc) const {
RecordData data;
- };
+ invariant(findRecord(txn, loc, &data));
+ return data;
+ }
/**
- * Retrieves Records from a RecordStore.
+ * @param out - If the record exists, the contents of this are set.
+ * @return true iff there is a Record for loc
*
- * A cursor is constructed with a direction flag with the following effects:
- * - The direction that next() moves.
- * - If a restore cannot return to the saved position, cursors will be positioned on the
- * closest position *after* the query in the direction of the scan.
+ * If unowned data is returned, it is valid until the next modification of this Record or
+ * the lock on this collection is released.
*
- * A cursor is tied to a transaction, such as the OperationContext or a WriteUnitOfWork
- * inside that context. Any cursor acquired inside a transaction is invalid outside
- * of that transaction, instead use the save and restore methods to reestablish the cursor.
+ * In general prefer RecordCursor::seekExact since it can avoid copying data in more
+ * storageEngines.
*
- * Any method other than invalidate and the save methods may throw WriteConflict exception. If
- * that happens, the cursor may not be used again until it has been saved and successfully
- * restored. If next() or restore() throw a WCE the cursor's position will be the same as before
- * the call (strong exception guarantee). All other methods leave the cursor in a valid state
- * but with an unspecified position (basic exception guarantee). If any exception other than
- * WCE is thrown, the cursor must be destroyed, which is guaranteed not to leak any resources.
+ * Warning: MMAPv1 cannot detect if RecordIds are valid. Therefore callers should only pass
+ * potentially deleted RecordIds to seek methods if they know that MMAPv1 is not the current
+ * storage engine. All new storage engines must support detecting the existence of Records.
+ */
+ virtual bool findRecord(OperationContext* txn, const RecordId& loc, RecordData* out) const {
+ auto cursor = getCursor(txn);
+ auto record = cursor->seekExact(loc);
+ if (!record)
+ return false;
+
+ record->data.makeOwned(); // Unowned data expires when cursor goes out of scope.
+ *out = std::move(record->data);
+ return true;
+ }
+
+ virtual void deleteRecord(OperationContext* txn, const RecordId& dl) = 0;
+
+ virtual StatusWith<RecordId> insertRecord(OperationContext* txn,
+ const char* data,
+ int len,
+ bool enforceQuota) = 0;
+
+ virtual StatusWith<RecordId> insertRecord(OperationContext* txn,
+ const DocWriter* doc,
+ bool enforceQuota) = 0;
+
+ /**
+ * @param notifier - Only used by record stores which do not support doc-locking.
+ * In the case of a document move, this is called after the document
+ * has been written to the new location, but before it is deleted from
+ * the old location.
+ * In the case of an in-place update, this is called just before the
+ * in-place write occurs.
+ * @return Status or RecordId, RecordId might be different
+ */
+ virtual StatusWith<RecordId> updateRecord(OperationContext* txn,
+ const RecordId& oldLocation,
+ const char* data,
+ int len,
+ bool enforceQuota,
+ UpdateNotifier* notifier) = 0;
+
+ /**
+ * @return Returns 'false' if this record store does not implement
+ * 'updatewithDamages'. If this method returns false, 'updateWithDamages' must not be
+ * called, and all updates must be routed through 'updateRecord' above. This allows the
+ * update framework to avoid doing the work of damage tracking if the underlying record
+ * store cannot utilize that information.
+ */
+ virtual bool updateWithDamagesSupported() const = 0;
+
+ virtual Status updateWithDamages(OperationContext* txn,
+ const RecordId& loc,
+ const RecordData& oldRec,
+ const char* damageSource,
+ const mutablebson::DamageVector& damages) = 0;
+
+ /**
+ * Returns a new cursor over this record store.
*
- * Any returned unowned BSON is only valid until the next call to any method on this
- * interface.
+ * The cursor is logically positioned before the first (or last if !forward) Record in the
+ * collection so that Record will be returned on the first call to next(). Implementations
+ * are allowed to lazily seek to the first Record when next() is called rather than doing
+ * it on construction.
+ */
+ virtual std::unique_ptr<RecordCursor> getCursor(OperationContext* txn,
+ bool forward = true) const = 0;
+
+ /**
+ * Constructs a cursor over a potentially corrupted store, which can be used to salvage
+ * damaged records. The iterator might return every record in the store if all of them
+ * are reachable and not corrupted. Returns NULL if not supported.
*
- * Implementations may override any default implementation if they can provide a more
- * efficient implementation.
+ * Repair cursors are only required to support forward scanning, so it is illegal to call
+ * seekExact() on the returned cursor.
*/
- class RecordCursor {
- public:
- virtual ~RecordCursor() = default;
-
- /**
- * Moves forward and returns the new data or boost::none if there is no more data.
- * Continues returning boost::none once it reaches EOF.
- */
- virtual boost::optional<Record> next() = 0;
-
- //
- // Seeking
- //
- // Warning: MMAPv1 cannot detect if RecordIds are valid. Therefore callers should only pass
- // potentially deleted RecordIds to seek methods if they know that MMAPv1 is not the current
- // storage engine. All new storage engines must support detecting the existence of Records.
- //
-
- /**
- * Seeks to a Record with the provided id.
- *
- * If an exact match can't be found, boost::none will be returned and the resulting position
- * of the cursor is unspecified.
- */
- virtual boost::optional<Record> seekExact(const RecordId& id) = 0;
-
- //
- // Saving and restoring state
- //
-
- /**
- * Prepares for state changes in underlying data in a way that allows the cursor's
- * current position to be restored.
- *
- * It is safe to call savePositioned multiple times in a row.
- * No other method (excluding destructor) may be called until successfully restored.
- */
- virtual void savePositioned() = 0;
-
- /**
- * Prepares for state changes in underlying data without necessarily saving the current
- * state.
- *
- * The cursor's position when restored is unspecified. Caller is expected to seek rather
- * than call next() following the restore.
- *
- * It is safe to call saveUnpositioned multiple times in a row.
- * No other method (excluding destructor) may be called until successfully restored.
- */
- virtual void saveUnpositioned() { savePositioned(); }
-
- /**
- * Recovers from potential state changes in underlying data.
- *
- * Returns false if it is invalid to continue using this iterator. This usually means that
- * capped deletes have caught up to the position of this iterator and continuing could
- * result in missed data.
- *
- * If the former position no longer exists, but it is safe to continue iterating, the
- * following call to next() will return the next closest position in the direction of the
- * scan, if any.
- *
- * This handles restoring after either savePositioned() or saveUnpositioned().
- */
- virtual bool restore(OperationContext* txn) = 0;
-
- /**
- * Inform the cursor that this id is being invalidated.
- * Must be called between save and restore.
- *
- * WARNING: Storage engines other than MMAPv1 should not depend on this being called.
- */
- virtual void invalidate(const RecordId& id) {};
-
- //
- // RecordFetchers
- //
- // Storage engines which do not support document-level locking hold locks at collection or
- // database granularity. As an optimization, these locks can be yielded when a record needs
- // to be fetched from secondary storage. If this method returns non-NULL, then it indicates
- // that the query system layer should yield its locks, following the protocol defined by the
- // RecordFetcher class, so that a potential page fault is triggered out of the lock.
- //
- // Storage engines which support document-level locking need not implement this.
- //
- // TODO see if these can be replaced by WriteConflictException.
- //
-
- /**
- * Returns a RecordFetcher if needed for a call to next() or none if unneeded.
- */
- virtual std::unique_ptr<RecordFetcher> fetcherForNext() const { return {}; }
-
- /**
- * Returns a RecordFetcher if needed to fetch the provided Record or none if unneeded.
- */
- virtual std::unique_ptr<RecordFetcher> fetcherForId(const RecordId& id) const { return {}; }
- };
+ virtual std::unique_ptr<RecordCursor> getCursorForRepair(OperationContext* txn) const {
+ return {};
+ }
/**
- * A RecordStore provides an abstraction used for storing documents in a collection,
- * or entries in an index. In storage engines implementing the KVEngine, record stores
- * are also used for implementing catalogs.
+ * Returns many RecordCursors that partition the RecordStore into many disjoint sets.
+ * Iterating all returned RecordCursors is equivalent to iterating the full store.
*
- * Many methods take an OperationContext parameter. This contains the RecoveryUnit, with
- * all RecordStore specific transaction information, as well as the LockState. Methods that take
- * an OperationContext may throw a WriteConflictException.
+ * Partition cursors are only required to support forward scanning, so it is illegal to call
+ * seekExact() on any of the returned cursors.
+ *
+ * WARNING: the first call to restore() on each cursor may (but is not guaranteed to) be on
+ * a different RecoveryUnit than the initial save. This will be made more sane as part of
+ * SERVER-17364.
+ */
+ virtual std::vector<std::unique_ptr<RecordCursor>> getManyCursors(OperationContext* txn) const {
+ std::vector<std::unique_ptr<RecordCursor>> out(1);
+ out[0] = getCursor(txn);
+ return out;
+ }
+
+ // higher level
+
+
+ /**
+ * removes all Records
*/
- class RecordStore {
- MONGO_DISALLOW_COPYING(RecordStore);
- public:
- RecordStore( StringData ns ) : _ns(ns.toString()) { }
-
- virtual ~RecordStore() { }
-
- // META
-
- // name of the RecordStore implementation
- virtual const char* name() const = 0;
-
- virtual const std::string& ns() const { return _ns; }
-
- /**
- * The dataSize is an approximation of the sum of the sizes (in bytes) of the
- * documents or entries in the recordStore.
- */
- virtual long long dataSize(OperationContext* txn) const = 0;
-
- /**
- * Total number of record in the RecordStore. You may need to cache it, so this call
- * takes constant time, as it is called often.
- */
- virtual long long numRecords(OperationContext* txn) const = 0;
-
- virtual bool isCapped() const = 0;
-
- virtual void setCappedDeleteCallback(CappedDocumentDeleteCallback*) {invariant( false );}
-
- /**
- * @param extraInfo - optional more debug info
- * @param level - optional, level of debug info to put in (higher is more)
- * @return total estimate size (in bytes) on stable storage
- */
- virtual int64_t storageSize( OperationContext* txn,
- BSONObjBuilder* extraInfo = NULL,
- int infoLevel = 0 ) const = 0;
-
- // CRUD related
-
- /**
- * Get the RecordData at loc, which must exist.
- *
- * If unowned data is returned, it is valid until the next modification of this Record or
- * the lock on this collection is released.
- *
- * In general, prefer findRecord or RecordCursor::seekExact since they can tell you if a
- * record has been removed.
- */
- virtual RecordData dataFor(OperationContext* txn, const RecordId& loc) const {
- RecordData data;
- invariant(findRecord(txn, loc, &data));
- return data;
- }
-
- /**
- * @param out - If the record exists, the contents of this are set.
- * @return true iff there is a Record for loc
- *
- * If unowned data is returned, it is valid until the next modification of this Record or
- * the lock on this collection is released.
- *
- * In general prefer RecordCursor::seekExact since it can avoid copying data in more
- * storageEngines.
- *
- * Warning: MMAPv1 cannot detect if RecordIds are valid. Therefore callers should only pass
- * potentially deleted RecordIds to seek methods if they know that MMAPv1 is not the current
- * storage engine. All new storage engines must support detecting the existence of Records.
- */
- virtual bool findRecord(OperationContext* txn,
- const RecordId& loc,
- RecordData* out) const {
- auto cursor = getCursor(txn);
- auto record = cursor->seekExact(loc);
- if (!record) return false;
-
- record->data.makeOwned(); // Unowned data expires when cursor goes out of scope.
- *out = std::move(record->data);
- return true;
- }
-
- virtual void deleteRecord( OperationContext* txn, const RecordId& dl ) = 0;
-
- virtual StatusWith<RecordId> insertRecord( OperationContext* txn,
- const char* data,
- int len,
- bool enforceQuota ) = 0;
-
- virtual StatusWith<RecordId> insertRecord( OperationContext* txn,
- const DocWriter* doc,
- bool enforceQuota ) = 0;
-
- /**
- * @param notifier - Only used by record stores which do not support doc-locking.
- * In the case of a document move, this is called after the document
- * has been written to the new location, but before it is deleted from
- * the old location.
- * In the case of an in-place update, this is called just before the
- * in-place write occurs.
- * @return Status or RecordId, RecordId might be different
- */
- virtual StatusWith<RecordId> updateRecord( OperationContext* txn,
- const RecordId& oldLocation,
- const char* data,
- int len,
- bool enforceQuota,
- UpdateNotifier* notifier ) = 0;
-
- /**
- * @return Returns 'false' if this record store does not implement
- * 'updatewithDamages'. If this method returns false, 'updateWithDamages' must not be
- * called, and all updates must be routed through 'updateRecord' above. This allows the
- * update framework to avoid doing the work of damage tracking if the underlying record
- * store cannot utilize that information.
- */
- virtual bool updateWithDamagesSupported() const = 0;
-
- virtual Status updateWithDamages( OperationContext* txn,
- const RecordId& loc,
- const RecordData& oldRec,
- const char* damageSource,
- const mutablebson::DamageVector& damages ) = 0;
-
- /**
- * Returns a new cursor over this record store.
- *
- * The cursor is logically positioned before the first (or last if !forward) Record in the
- * collection so that Record will be returned on the first call to next(). Implementations
- * are allowed to lazily seek to the first Record when next() is called rather than doing
- * it on construction.
- */
- virtual std::unique_ptr<RecordCursor> getCursor(OperationContext* txn,
- bool forward = true) const = 0;
-
- /**
- * Constructs a cursor over a potentially corrupted store, which can be used to salvage
- * damaged records. The iterator might return every record in the store if all of them
- * are reachable and not corrupted. Returns NULL if not supported.
- *
- * Repair cursors are only required to support forward scanning, so it is illegal to call
- * seekExact() on the returned cursor.
- */
- virtual std::unique_ptr<RecordCursor> getCursorForRepair( OperationContext* txn ) const {
- return {};
- }
-
- /**
- * Returns many RecordCursors that partition the RecordStore into many disjoint sets.
- * Iterating all returned RecordCursors is equivalent to iterating the full store.
- *
- * Partition cursors are only required to support forward scanning, so it is illegal to call
- * seekExact() on any of the returned cursors.
- *
- * WARNING: the first call to restore() on each cursor may (but is not guaranteed to) be on
- * a different RecoveryUnit than the initial save. This will be made more sane as part of
- * SERVER-17364.
- */
- virtual std::vector<std::unique_ptr<RecordCursor>> getManyCursors(
- OperationContext* txn) const {
- std::vector<std::unique_ptr<RecordCursor>> out(1);
- out[0] = getCursor(txn);
- return out;
- }
-
- // higher level
-
-
- /**
- * removes all Records
- */
- virtual Status truncate( OperationContext* txn ) = 0;
-
- /**
- * Truncate documents newer than the document at 'end' from the capped
- * collection. The collection cannot be completely emptied using this
- * function. An assertion will be thrown if that is attempted.
- * @param inclusive - Truncate 'end' as well iff true
- * XXX: this will go away soon, just needed to move for now
- */
- virtual void temp_cappedTruncateAfter(OperationContext* txn,
- RecordId end,
- bool inclusive) = 0;
-
- /**
- * does this RecordStore support the compact operation?
- *
- * If you return true, you must provide implementations of all compact methods.
- */
- virtual bool compactSupported() const { return false; }
-
- /**
- * Does compact() leave RecordIds alone or can they change.
- *
- * Only called if compactSupported() returns true.
- */
- virtual bool compactsInPlace() const { invariant(false); }
-
- /**
- * Attempt to reduce the storage space used by this RecordStore.
- *
- * Only called if compactSupported() returns true.
- * No RecordStoreCompactAdaptor will be passed if compactsInPlace() returns true.
- */
- virtual Status compact( OperationContext* txn,
- RecordStoreCompactAdaptor* adaptor,
- const CompactOptions* options,
- CompactStats* stats ) {
- invariant(false);
- }
-
- /**
- * @param full - does more checks
- * @param scanData - scans each document
- * @return OK if the validate run successfully
- * OK will be returned even if corruption is found
- * deatils will be in result
- */
- virtual Status validate( OperationContext* txn,
- bool full, bool scanData,
- ValidateAdaptor* adaptor,
- ValidateResults* results, BSONObjBuilder* output ) = 0;
-
- /**
- * @param scaleSize - amount by which to scale size metrics
- * appends any custom stats from the RecordStore or other unique stats
- */
- virtual void appendCustomStats( OperationContext* txn,
- BSONObjBuilder* result,
- double scale ) const = 0;
-
- /**
- * Load all data into cache.
- * What cache depends on implementation.
- *
- * If the underlying storage engine does not support the operation,
- * returns ErrorCodes::CommandNotSupported
- *
- * @param output (optional) - where to put detailed stats
- */
- virtual Status touch( OperationContext* txn, BSONObjBuilder* output ) const {
- return Status(ErrorCodes::CommandNotSupported,
- "this storage engine does not support touch");
- }
-
- /**
- * Return the RecordId of an oplog entry as close to startingPosition as possible without
- * being higher. If there are no entries <= startingPosition, return RecordId().
- *
- * If you don't implement the oplogStartHack, just use the default implementation which
- * returns boost::none.
- */
- virtual boost::optional<RecordId> oplogStartHack(OperationContext* txn,
- const RecordId& startingPosition) const {
- return boost::none;
- }
-
- /**
- * When we write to an oplog, we call this so that if the storage engine
- * supports doc locking, it can manage the visibility of oplog entries to ensure
- * they are ordered.
- */
- virtual Status oplogDiskLocRegister( OperationContext* txn,
- const Timestamp& opTime ) {
- return Status::OK();
- }
-
- /**
- * Called after a repair operation is run with the recomputed numRecords and dataSize.
- */
- virtual void updateStatsAfterRepair(OperationContext* txn,
- long long numRecords,
- long long dataSize) = 0;
-
- protected:
- std::string _ns;
- };
-
- class RecordStoreCompactAdaptor {
- public:
- virtual ~RecordStoreCompactAdaptor(){}
- virtual bool isDataValid( const RecordData& recData ) = 0;
- virtual size_t dataSize( const RecordData& recData ) = 0;
- virtual void inserted( const RecordData& recData, const RecordId& newLocation ) = 0;
- };
-
- struct ValidateResults {
- ValidateResults() {
- valid = true;
- }
- bool valid;
- std::vector<std::string> errors;
- };
+ virtual Status truncate(OperationContext* txn) = 0;
/**
- * This is so when a RecordStore is validating all records
- * it can call back to someone to check if a record is valid.
- * The actual data contained in a Record is totally opaque to the implementation.
+ * Truncate documents newer than the document at 'end' from the capped
+ * collection. The collection cannot be completely emptied using this
+ * function. An assertion will be thrown if that is attempted.
+ * @param inclusive - Truncate 'end' as well iff true
+ * XXX: this will go away soon, just needed to move for now
*/
- class ValidateAdaptor {
- public:
- virtual ~ValidateAdaptor(){}
+ virtual void temp_cappedTruncateAfter(OperationContext* txn, RecordId end, bool inclusive) = 0;
- virtual Status validate( const RecordData& recordData, size_t* dataSize ) = 0;
- };
+ /**
+ * does this RecordStore support the compact operation?
+ *
+ * If you return true, you must provide implementations of all compact methods.
+ */
+ virtual bool compactSupported() const {
+ return false;
+ }
+
+ /**
+ * Does compact() leave RecordIds alone or can they change.
+ *
+ * Only called if compactSupported() returns true.
+ */
+ virtual bool compactsInPlace() const {
+ invariant(false);
+ }
+
+ /**
+ * Attempt to reduce the storage space used by this RecordStore.
+ *
+ * Only called if compactSupported() returns true.
+ * No RecordStoreCompactAdaptor will be passed if compactsInPlace() returns true.
+ */
+ virtual Status compact(OperationContext* txn,
+ RecordStoreCompactAdaptor* adaptor,
+ const CompactOptions* options,
+ CompactStats* stats) {
+ invariant(false);
+ }
+
+ /**
+ * @param full - does more checks
+ * @param scanData - scans each document
+ * @return OK if the validate run successfully
+ * OK will be returned even if corruption is found
+ * deatils will be in result
+ */
+ virtual Status validate(OperationContext* txn,
+ bool full,
+ bool scanData,
+ ValidateAdaptor* adaptor,
+ ValidateResults* results,
+ BSONObjBuilder* output) = 0;
+
+ /**
+ * @param scaleSize - amount by which to scale size metrics
+ * appends any custom stats from the RecordStore or other unique stats
+ */
+ virtual void appendCustomStats(OperationContext* txn,
+ BSONObjBuilder* result,
+ double scale) const = 0;
+
+ /**
+ * Load all data into cache.
+ * What cache depends on implementation.
+ *
+ * If the underlying storage engine does not support the operation,
+ * returns ErrorCodes::CommandNotSupported
+ *
+ * @param output (optional) - where to put detailed stats
+ */
+ virtual Status touch(OperationContext* txn, BSONObjBuilder* output) const {
+ return Status(ErrorCodes::CommandNotSupported,
+ "this storage engine does not support touch");
+ }
+
+ /**
+ * Return the RecordId of an oplog entry as close to startingPosition as possible without
+ * being higher. If there are no entries <= startingPosition, return RecordId().
+ *
+ * If you don't implement the oplogStartHack, just use the default implementation which
+ * returns boost::none.
+ */
+ virtual boost::optional<RecordId> oplogStartHack(OperationContext* txn,
+ const RecordId& startingPosition) const {
+ return boost::none;
+ }
+
+ /**
+ * When we write to an oplog, we call this so that if the storage engine
+ * supports doc locking, it can manage the visibility of oplog entries to ensure
+ * they are ordered.
+ */
+ virtual Status oplogDiskLocRegister(OperationContext* txn, const Timestamp& opTime) {
+ return Status::OK();
+ }
+
+ /**
+ * Called after a repair operation is run with the recomputed numRecords and dataSize.
+ */
+ virtual void updateStatsAfterRepair(OperationContext* txn,
+ long long numRecords,
+ long long dataSize) = 0;
+
+protected:
+ std::string _ns;
+};
+
+class RecordStoreCompactAdaptor {
+public:
+ virtual ~RecordStoreCompactAdaptor() {}
+ virtual bool isDataValid(const RecordData& recData) = 0;
+ virtual size_t dataSize(const RecordData& recData) = 0;
+ virtual void inserted(const RecordData& recData, const RecordId& newLocation) = 0;
+};
+
+struct ValidateResults {
+ ValidateResults() {
+ valid = true;
+ }
+ bool valid;
+ std::vector<std::string> errors;
+};
+
+/**
+ * This is so when a RecordStore is validating all records
+ * it can call back to someone to check if a record is valid.
+ * The actual data contained in a Record is totally opaque to the implementation.
+ */
+class ValidateAdaptor {
+public:
+ virtual ~ValidateAdaptor() {}
+
+ virtual Status validate(const RecordData& recordData, size_t* dataSize) = 0;
+};
}
diff --git a/src/mongo/db/storage/record_store_test_datafor.cpp b/src/mongo/db/storage/record_store_test_datafor.cpp
index 7af9534acb5..4bb9a0fbca1 100644
--- a/src/mongo/db/storage/record_store_test_datafor.cpp
+++ b/src/mongo/db/storage/record_store_test_datafor.cpp
@@ -41,98 +41,94 @@ using std::stringstream;
namespace mongo {
- using std::unique_ptr;
+using std::unique_ptr;
- // Insert a record and verify its contents by calling dataFor()
- // on the returned RecordId.
- TEST( RecordStoreTestHarness, DataFor ) {
- unique_ptr<HarnessHelper> harnessHelper( newHarnessHelper() );
- unique_ptr<RecordStore> rs( harnessHelper->newNonCappedRecordStore() );
+// Insert a record and verify its contents by calling dataFor()
+// on the returned RecordId.
+TEST(RecordStoreTestHarness, DataFor) {
+ unique_ptr<HarnessHelper> harnessHelper(newHarnessHelper());
+ unique_ptr<RecordStore> rs(harnessHelper->newNonCappedRecordStore());
- {
- unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- ASSERT_EQUALS( 0, rs->numRecords( opCtx.get() ) );
- }
+ {
+ unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
+ ASSERT_EQUALS(0, rs->numRecords(opCtx.get()));
+ }
- string data = "record-";
- RecordId loc;
+ string data = "record-";
+ RecordId loc;
+ {
+ unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
{
- unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- {
- WriteUnitOfWork uow( opCtx.get() );
- StatusWith<RecordId> res = rs->insertRecord( opCtx.get(),
- data.c_str(),
- data.size() + 1,
- false );
- ASSERT_OK( res.getStatus() );
- loc = res.getValue();
- uow.commit();
- }
+ WriteUnitOfWork uow(opCtx.get());
+ StatusWith<RecordId> res =
+ rs->insertRecord(opCtx.get(), data.c_str(), data.size() + 1, false);
+ ASSERT_OK(res.getStatus());
+ loc = res.getValue();
+ uow.commit();
}
+ }
- {
- unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- ASSERT_EQUALS( 1, rs->numRecords( opCtx.get() ) );
- }
+ {
+ unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
+ ASSERT_EQUALS(1, rs->numRecords(opCtx.get()));
+ }
+ {
+ unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
{
- unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- {
- RecordData record = rs->dataFor( opCtx.get(), loc );
- ASSERT_EQUALS( data.size() + 1, static_cast<size_t>( record.size() ) );
- ASSERT_EQUALS( data, record.data() );
- }
+ RecordData record = rs->dataFor(opCtx.get(), loc);
+ ASSERT_EQUALS(data.size() + 1, static_cast<size_t>(record.size()));
+ ASSERT_EQUALS(data, record.data());
}
}
+}
- // Insert multiple records and verify their contents by calling dataFor()
- // on each of the returned RecordIds.
- TEST( RecordStoreTestHarness, DataForMultiple ) {
- unique_ptr<HarnessHelper> harnessHelper( newHarnessHelper() );
- unique_ptr<RecordStore> rs( harnessHelper->newNonCappedRecordStore() );
+// Insert multiple records and verify their contents by calling dataFor()
+// on each of the returned RecordIds.
+TEST(RecordStoreTestHarness, DataForMultiple) {
+ unique_ptr<HarnessHelper> harnessHelper(newHarnessHelper());
+ unique_ptr<RecordStore> rs(harnessHelper->newNonCappedRecordStore());
+ {
+ unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
+ ASSERT_EQUALS(0, rs->numRecords(opCtx.get()));
+ }
+
+ const int nToInsert = 10;
+ RecordId locs[nToInsert];
+ for (int i = 0; i < nToInsert; i++) {
+ unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
{
- unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- ASSERT_EQUALS( 0, rs->numRecords( opCtx.get() ) );
+ stringstream ss;
+ ss << "record----" << i;
+ string data = ss.str();
+
+ WriteUnitOfWork uow(opCtx.get());
+ StatusWith<RecordId> res =
+ rs->insertRecord(opCtx.get(), data.c_str(), data.size() + 1, false);
+ ASSERT_OK(res.getStatus());
+ locs[i] = res.getValue();
+ uow.commit();
}
+ }
- const int nToInsert = 10;
- RecordId locs[nToInsert];
- for ( int i = 0; i < nToInsert; i++ ) {
- unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- {
- stringstream ss;
- ss << "record----" << i;
- string data = ss.str();
-
- WriteUnitOfWork uow( opCtx.get() );
- StatusWith<RecordId> res = rs->insertRecord( opCtx.get(),
- data.c_str(),
- data.size() + 1,
- false );
- ASSERT_OK( res.getStatus() );
- locs[i] = res.getValue();
- uow.commit();
- }
- }
+ {
+ unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
+ ASSERT_EQUALS(nToInsert, rs->numRecords(opCtx.get()));
+ }
+ for (int i = 0; i < nToInsert; i++) {
+ unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
{
- unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- ASSERT_EQUALS( nToInsert, rs->numRecords( opCtx.get() ) );
- }
+ stringstream ss;
+ ss << "record----" << i;
+ string data = ss.str();
- for ( int i = 0; i < nToInsert; i++ ) {
- unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- {
- stringstream ss;
- ss << "record----" << i;
- string data = ss.str();
-
- RecordData record = rs->dataFor( opCtx.get(), locs[i] );
- ASSERT_EQUALS( data.size() + 1, static_cast<size_t>( record.size() ) );
- ASSERT_EQUALS( data, record.data() );
- }
+ RecordData record = rs->dataFor(opCtx.get(), locs[i]);
+ ASSERT_EQUALS(data.size() + 1, static_cast<size_t>(record.size()));
+ ASSERT_EQUALS(data, record.data());
}
}
+}
-} // namespace mongo
+} // namespace mongo
diff --git a/src/mongo/db/storage/record_store_test_datasize.cpp b/src/mongo/db/storage/record_store_test_datasize.cpp
index 247adb92189..b39d62d27b3 100644
--- a/src/mongo/db/storage/record_store_test_datasize.cpp
+++ b/src/mongo/db/storage/record_store_test_datasize.cpp
@@ -39,61 +39,59 @@ using std::stringstream;
namespace mongo {
- using std::unique_ptr;
+using std::unique_ptr;
- // Verify that an empty collection takes up no space.
- TEST( RecordStoreTestHarness, DataSizeEmpty ) {
- unique_ptr<HarnessHelper> harnessHelper( newHarnessHelper() );
- unique_ptr<RecordStore> rs( harnessHelper->newNonCappedRecordStore() );
+// Verify that an empty collection takes up no space.
+TEST(RecordStoreTestHarness, DataSizeEmpty) {
+ unique_ptr<HarnessHelper> harnessHelper(newHarnessHelper());
+ unique_ptr<RecordStore> rs(harnessHelper->newNonCappedRecordStore());
- {
- unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- ASSERT_EQUALS( 0, rs->numRecords( opCtx.get() ) );
- }
-
- {
- unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- ASSERT( rs->dataSize( opCtx.get() ) == 0 );
- }
+ {
+ unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
+ ASSERT_EQUALS(0, rs->numRecords(opCtx.get()));
}
- // Verify that a nonempty collection takes up some space.
- TEST( RecordStoreTestHarness, DataSizeNonEmpty ) {
- unique_ptr<HarnessHelper> harnessHelper( newHarnessHelper() );
- unique_ptr<RecordStore> rs( harnessHelper->newNonCappedRecordStore() );
+ {
+ unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
+ ASSERT(rs->dataSize(opCtx.get()) == 0);
+ }
+}
- {
- unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- ASSERT_EQUALS( 0, rs->numRecords( opCtx.get() ) );
- }
+// Verify that a nonempty collection takes up some space.
+TEST(RecordStoreTestHarness, DataSizeNonEmpty) {
+ unique_ptr<HarnessHelper> harnessHelper(newHarnessHelper());
+ unique_ptr<RecordStore> rs(harnessHelper->newNonCappedRecordStore());
- int nToInsert = 10;
- for ( int i = 0; i < nToInsert; i++ ) {
- unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- {
- stringstream ss;
- ss << "record " << i;
- string data = ss.str();
-
- WriteUnitOfWork uow( opCtx.get() );
- StatusWith<RecordId> res = rs->insertRecord( opCtx.get(),
- data.c_str(),
- data.size() + 1,
- false );
- ASSERT_OK( res.getStatus() );
- uow.commit();
- }
- }
+ {
+ unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
+ ASSERT_EQUALS(0, rs->numRecords(opCtx.get()));
+ }
+ int nToInsert = 10;
+ for (int i = 0; i < nToInsert; i++) {
+ unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
{
- unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- ASSERT_EQUALS( nToInsert, rs->numRecords( opCtx.get() ) );
+ stringstream ss;
+ ss << "record " << i;
+ string data = ss.str();
+
+ WriteUnitOfWork uow(opCtx.get());
+ StatusWith<RecordId> res =
+ rs->insertRecord(opCtx.get(), data.c_str(), data.size() + 1, false);
+ ASSERT_OK(res.getStatus());
+ uow.commit();
}
+ }
- {
- unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- ASSERT( rs->dataSize( opCtx.get() ) > 0 );
- }
+ {
+ unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
+ ASSERT_EQUALS(nToInsert, rs->numRecords(opCtx.get()));
+ }
+
+ {
+ unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
+ ASSERT(rs->dataSize(opCtx.get()) > 0);
}
+}
-} // namespace mongo
+} // namespace mongo
diff --git a/src/mongo/db/storage/record_store_test_deleterecord.cpp b/src/mongo/db/storage/record_store_test_deleterecord.cpp
index d2a978d733f..87249d51a17 100644
--- a/src/mongo/db/storage/record_store_test_deleterecord.cpp
+++ b/src/mongo/db/storage/record_store_test_deleterecord.cpp
@@ -41,102 +41,98 @@ using std::stringstream;
namespace mongo {
- using std::unique_ptr;
+using std::unique_ptr;
- // Insert a record and try to delete it.
- TEST( RecordStoreTestHarness, DeleteRecord ) {
- unique_ptr<HarnessHelper> harnessHelper( newHarnessHelper() );
- unique_ptr<RecordStore> rs( harnessHelper->newNonCappedRecordStore() );
+// Insert a record and try to delete it.
+TEST(RecordStoreTestHarness, DeleteRecord) {
+ unique_ptr<HarnessHelper> harnessHelper(newHarnessHelper());
+ unique_ptr<RecordStore> rs(harnessHelper->newNonCappedRecordStore());
- {
- unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- ASSERT_EQUALS( 0, rs->numRecords( opCtx.get() ) );
- }
-
- string data = "my record";
- RecordId loc;
- {
- unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- {
- WriteUnitOfWork uow( opCtx.get() );
- StatusWith<RecordId> res = rs->insertRecord( opCtx.get(),
- data.c_str(),
- data.size() + 1,
- false );
- ASSERT_OK( res.getStatus() );
- loc = res.getValue();
- uow.commit();
- }
- }
+ {
+ unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
+ ASSERT_EQUALS(0, rs->numRecords(opCtx.get()));
+ }
+ string data = "my record";
+ RecordId loc;
+ {
+ unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
{
- unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- ASSERT_EQUALS( 1, rs->numRecords( opCtx.get() ) );
+ WriteUnitOfWork uow(opCtx.get());
+ StatusWith<RecordId> res =
+ rs->insertRecord(opCtx.get(), data.c_str(), data.size() + 1, false);
+ ASSERT_OK(res.getStatus());
+ loc = res.getValue();
+ uow.commit();
}
+ }
- {
- unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- {
- WriteUnitOfWork uow( opCtx.get() );
- rs->deleteRecord( opCtx.get(), loc );
- uow.commit();
- }
- }
+ {
+ unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
+ ASSERT_EQUALS(1, rs->numRecords(opCtx.get()));
+ }
+ {
+ unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
{
- unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- ASSERT_EQUALS( 0, rs->numRecords( opCtx.get() ) );
+ WriteUnitOfWork uow(opCtx.get());
+ rs->deleteRecord(opCtx.get(), loc);
+ uow.commit();
}
}
- // Insert multiple records and try to delete them.
- TEST( RecordStoreTestHarness, DeleteMultipleRecords ) {
- unique_ptr<HarnessHelper> harnessHelper( newHarnessHelper() );
- unique_ptr<RecordStore> rs( harnessHelper->newNonCappedRecordStore() );
+ {
+ unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
+ ASSERT_EQUALS(0, rs->numRecords(opCtx.get()));
+ }
+}
- {
- unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- ASSERT_EQUALS( 0, rs->numRecords( opCtx.get() ) );
- }
+// Insert multiple records and try to delete them.
+TEST(RecordStoreTestHarness, DeleteMultipleRecords) {
+ unique_ptr<HarnessHelper> harnessHelper(newHarnessHelper());
+ unique_ptr<RecordStore> rs(harnessHelper->newNonCappedRecordStore());
- const int nToInsert = 10;
- RecordId locs[nToInsert];
- for ( int i = 0; i < nToInsert; i++ ) {
- unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- {
- stringstream ss;
- ss << "record " << i;
- string data = ss.str();
-
- WriteUnitOfWork uow( opCtx.get() );
- StatusWith<RecordId> res = rs->insertRecord( opCtx.get(),
- data.c_str(),
- data.size() + 1,
- false );
- ASSERT_OK( res.getStatus() );
- locs[i] = res.getValue();
- uow.commit();
- }
- }
+ {
+ unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
+ ASSERT_EQUALS(0, rs->numRecords(opCtx.get()));
+ }
+ const int nToInsert = 10;
+ RecordId locs[nToInsert];
+ for (int i = 0; i < nToInsert; i++) {
+ unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
{
- unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- ASSERT_EQUALS( nToInsert, rs->numRecords( opCtx.get() ) );
+ stringstream ss;
+ ss << "record " << i;
+ string data = ss.str();
+
+ WriteUnitOfWork uow(opCtx.get());
+ StatusWith<RecordId> res =
+ rs->insertRecord(opCtx.get(), data.c_str(), data.size() + 1, false);
+ ASSERT_OK(res.getStatus());
+ locs[i] = res.getValue();
+ uow.commit();
}
+ }
- for ( int i = 0; i < nToInsert; i++ ) {
- unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- {
- WriteUnitOfWork uow( opCtx.get() );
- rs->deleteRecord( opCtx.get(), locs[i] );
- uow.commit();
- }
- }
+ {
+ unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
+ ASSERT_EQUALS(nToInsert, rs->numRecords(opCtx.get()));
+ }
+ for (int i = 0; i < nToInsert; i++) {
+ unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
{
- unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- ASSERT_EQUALS( 0, rs->numRecords( opCtx.get() ) );
+ WriteUnitOfWork uow(opCtx.get());
+ rs->deleteRecord(opCtx.get(), locs[i]);
+ uow.commit();
}
}
-} // namespace mongo
+ {
+ unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
+ ASSERT_EQUALS(0, rs->numRecords(opCtx.get()));
+ }
+}
+
+} // namespace mongo
diff --git a/src/mongo/db/storage/record_store_test_docwriter.h b/src/mongo/db/storage/record_store_test_docwriter.h
index b52cfd97335..b6032f2509a 100644
--- a/src/mongo/db/storage/record_store_test_docwriter.h
+++ b/src/mongo/db/storage/record_store_test_docwriter.h
@@ -35,26 +35,28 @@
namespace mongo {
namespace {
- class StringDocWriter : public DocWriter {
- public:
- StringDocWriter( const std::string &data, bool padding )
- : _data( data ), _padding( padding ) {
- }
+class StringDocWriter : public DocWriter {
+public:
+ StringDocWriter(const std::string& data, bool padding) : _data(data), _padding(padding) {}
- ~StringDocWriter() { }
+ ~StringDocWriter() {}
- void writeDocument( char *buf ) const {
- memcpy( buf, _data.c_str(), documentSize() );
- }
+ void writeDocument(char* buf) const {
+ memcpy(buf, _data.c_str(), documentSize());
+ }
- size_t documentSize() const { return _data.size() + 1; }
+ size_t documentSize() const {
+ return _data.size() + 1;
+ }
- bool addPadding() const { return _padding; }
+ bool addPadding() const {
+ return _padding;
+ }
- private:
- std::string _data;
- bool _padding;
- };
+private:
+ std::string _data;
+ bool _padding;
+};
-} // namespace
-} // namespace mongo
+} // namespace
+} // namespace mongo
diff --git a/src/mongo/db/storage/record_store_test_harness.cpp b/src/mongo/db/storage/record_store_test_harness.cpp
index b3a4495b2e1..b448bc7d1c0 100644
--- a/src/mongo/db/storage/record_store_test_harness.cpp
+++ b/src/mongo/db/storage/record_store_test_harness.cpp
@@ -36,407 +36,395 @@
namespace mongo {
- using std::unique_ptr;
- using std::string;
+using std::unique_ptr;
+using std::string;
- TEST( RecordStoreTestHarness, Simple1 ) {
- unique_ptr<HarnessHelper> harnessHelper( newHarnessHelper() );
- unique_ptr<RecordStore> rs( harnessHelper->newNonCappedRecordStore() );
+TEST(RecordStoreTestHarness, Simple1) {
+ unique_ptr<HarnessHelper> harnessHelper(newHarnessHelper());
+ unique_ptr<RecordStore> rs(harnessHelper->newNonCappedRecordStore());
- {
- unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- ASSERT_EQUALS( 0, rs->numRecords( opCtx.get() ) );
- }
+ {
+ unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
+ ASSERT_EQUALS(0, rs->numRecords(opCtx.get()));
+ }
- string s = "eliot was here";
+ string s = "eliot was here";
- RecordId loc1;
+ RecordId loc1;
+ {
+ unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
{
- unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- {
- WriteUnitOfWork uow( opCtx.get() );
- StatusWith<RecordId> res = rs->insertRecord( opCtx.get(), s.c_str(), s.size() + 1, false );
- ASSERT_OK( res.getStatus() );
- loc1 = res.getValue();
- uow.commit();
- }
-
- ASSERT_EQUALS( s, rs->dataFor( opCtx.get(), loc1 ).data() );
+ WriteUnitOfWork uow(opCtx.get());
+ StatusWith<RecordId> res =
+ rs->insertRecord(opCtx.get(), s.c_str(), s.size() + 1, false);
+ ASSERT_OK(res.getStatus());
+ loc1 = res.getValue();
+ uow.commit();
}
- {
- unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- ASSERT_EQUALS( s, rs->dataFor( opCtx.get(), loc1 ).data() );
- ASSERT_EQUALS( 1, rs->numRecords( opCtx.get() ) );
-
- RecordData rd;
- ASSERT( !rs->findRecord( opCtx.get(), RecordId(111,17), &rd ) );
- ASSERT( rd.data() == NULL );
+ ASSERT_EQUALS(s, rs->dataFor(opCtx.get(), loc1).data());
+ }
- ASSERT( rs->findRecord( opCtx.get(), loc1, &rd ) );
- ASSERT_EQUALS( s, rd.data() );
- }
+ {
+ unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
+ ASSERT_EQUALS(s, rs->dataFor(opCtx.get(), loc1).data());
+ ASSERT_EQUALS(1, rs->numRecords(opCtx.get()));
- {
- unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- {
- WriteUnitOfWork uow( opCtx.get() );
- StatusWith<RecordId> res = rs->insertRecord( opCtx.get(), s.c_str(), s.size() + 1, false );
- ASSERT_OK( res.getStatus() );
- uow.commit();
- }
+ RecordData rd;
+ ASSERT(!rs->findRecord(opCtx.get(), RecordId(111, 17), &rd));
+ ASSERT(rd.data() == NULL);
- }
+ ASSERT(rs->findRecord(opCtx.get(), loc1, &rd));
+ ASSERT_EQUALS(s, rd.data());
+ }
+ {
+ unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
{
- unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- ASSERT_EQUALS( 2, rs->numRecords( opCtx.get() ) );
+ WriteUnitOfWork uow(opCtx.get());
+ StatusWith<RecordId> res =
+ rs->insertRecord(opCtx.get(), s.c_str(), s.size() + 1, false);
+ ASSERT_OK(res.getStatus());
+ uow.commit();
}
}
- namespace {
- class DummyDocWriter : public DocWriter {
- public:
- virtual ~DummyDocWriter(){}
- virtual void writeDocument( char* buf ) const {
- memcpy( buf, "eliot", 6 );
- }
- virtual size_t documentSize() const { return 6; }
- virtual bool addPadding() const { return false; }
+ {
+ unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
+ ASSERT_EQUALS(2, rs->numRecords(opCtx.get()));
+ }
+}
- };
+namespace {
+class DummyDocWriter : public DocWriter {
+public:
+ virtual ~DummyDocWriter() {}
+ virtual void writeDocument(char* buf) const {
+ memcpy(buf, "eliot", 6);
+ }
+ virtual size_t documentSize() const {
+ return 6;
+ }
+ virtual bool addPadding() const {
+ return false;
}
+};
+}
- TEST( RecordStoreTestHarness, Simple1InsertDocWroter ) {
- unique_ptr<HarnessHelper> harnessHelper( newHarnessHelper() );
- unique_ptr<RecordStore> rs( harnessHelper->newNonCappedRecordStore() );
-
- RecordId loc1;
+TEST(RecordStoreTestHarness, Simple1InsertDocWroter) {
+ unique_ptr<HarnessHelper> harnessHelper(newHarnessHelper());
+ unique_ptr<RecordStore> rs(harnessHelper->newNonCappedRecordStore());
- {
- unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
-
- {
- WriteUnitOfWork uow( opCtx.get() );
- DummyDocWriter dw;
- StatusWith<RecordId> res = rs->insertRecord( opCtx.get(), &dw, false );
- ASSERT_OK( res.getStatus() );
- loc1 = res.getValue();
- uow.commit();
- }
+ RecordId loc1;
- ASSERT_EQUALS( string("eliot"), rs->dataFor( opCtx.get(), loc1 ).data() );
- }
- }
-
- TEST( RecordStoreTestHarness, Delete1 ) {
- unique_ptr<HarnessHelper> harnessHelper( newHarnessHelper() );
- unique_ptr<RecordStore> rs( harnessHelper->newNonCappedRecordStore() );
+ {
+ unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
{
- unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- ASSERT_EQUALS( 0, rs->numRecords( opCtx.get() ) );
+ WriteUnitOfWork uow(opCtx.get());
+ DummyDocWriter dw;
+ StatusWith<RecordId> res = rs->insertRecord(opCtx.get(), &dw, false);
+ ASSERT_OK(res.getStatus());
+ loc1 = res.getValue();
+ uow.commit();
}
- string s = "eliot was here";
+ ASSERT_EQUALS(string("eliot"), rs->dataFor(opCtx.get(), loc1).data());
+ }
+}
- RecordId loc;
- {
- unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
-
- {
- WriteUnitOfWork uow( opCtx.get() );
- StatusWith<RecordId> res = rs->insertRecord( opCtx.get(), s.c_str(), s.size() + 1, false );
- ASSERT_OK( res.getStatus() );
- loc = res.getValue();
- uow.commit();
- }
+TEST(RecordStoreTestHarness, Delete1) {
+ unique_ptr<HarnessHelper> harnessHelper(newHarnessHelper());
+ unique_ptr<RecordStore> rs(harnessHelper->newNonCappedRecordStore());
- ASSERT_EQUALS( s, rs->dataFor( opCtx.get(), loc ).data() );
+ {
+ unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
+ ASSERT_EQUALS(0, rs->numRecords(opCtx.get()));
+ }
- }
+ string s = "eliot was here";
- {
- unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- ASSERT_EQUALS( 1, rs->numRecords( opCtx.get() ) );
- }
+ RecordId loc;
+ {
+ unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
{
- unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
-
- {
- WriteUnitOfWork uow( opCtx.get() );
- rs->deleteRecord( opCtx.get(), loc );
- uow.commit();
- }
-
- ASSERT_EQUALS( 0, rs->numRecords( opCtx.get() ) );
+ WriteUnitOfWork uow(opCtx.get());
+ StatusWith<RecordId> res =
+ rs->insertRecord(opCtx.get(), s.c_str(), s.size() + 1, false);
+ ASSERT_OK(res.getStatus());
+ loc = res.getValue();
+ uow.commit();
}
+ ASSERT_EQUALS(s, rs->dataFor(opCtx.get(), loc).data());
+ }
+
+ {
+ unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
+ ASSERT_EQUALS(1, rs->numRecords(opCtx.get()));
}
- TEST( RecordStoreTestHarness, Delete2 ) {
- unique_ptr<HarnessHelper> harnessHelper( newHarnessHelper() );
- unique_ptr<RecordStore> rs( harnessHelper->newNonCappedRecordStore() );
+ {
+ unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
{
- unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- ASSERT_EQUALS( 0, rs->numRecords( opCtx.get() ) );
+ WriteUnitOfWork uow(opCtx.get());
+ rs->deleteRecord(opCtx.get(), loc);
+ uow.commit();
}
- string s = "eliot was here";
+ ASSERT_EQUALS(0, rs->numRecords(opCtx.get()));
+ }
+}
- RecordId loc;
- {
- unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
-
- {
- WriteUnitOfWork uow( opCtx.get() );
- StatusWith<RecordId> res = rs->insertRecord( opCtx.get(), s.c_str(), s.size() + 1, false );
- ASSERT_OK( res.getStatus() );
- res = rs->insertRecord( opCtx.get(), s.c_str(), s.size() + 1, false );
- ASSERT_OK( res.getStatus() );
- loc = res.getValue();
- uow.commit();
- }
+TEST(RecordStoreTestHarness, Delete2) {
+ unique_ptr<HarnessHelper> harnessHelper(newHarnessHelper());
+ unique_ptr<RecordStore> rs(harnessHelper->newNonCappedRecordStore());
- }
+ {
+ unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
+ ASSERT_EQUALS(0, rs->numRecords(opCtx.get()));
+ }
- {
- unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- ASSERT_EQUALS( s, rs->dataFor( opCtx.get(), loc ).data() );
- ASSERT_EQUALS( 2, rs->numRecords( opCtx.get() ) );
- }
+ string s = "eliot was here";
+
+ RecordId loc;
+ {
+ unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
{
- unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- {
- WriteUnitOfWork uow( opCtx.get() );
- rs->deleteRecord( opCtx.get(), loc );
- uow.commit();
- }
+ WriteUnitOfWork uow(opCtx.get());
+ StatusWith<RecordId> res =
+ rs->insertRecord(opCtx.get(), s.c_str(), s.size() + 1, false);
+ ASSERT_OK(res.getStatus());
+ res = rs->insertRecord(opCtx.get(), s.c_str(), s.size() + 1, false);
+ ASSERT_OK(res.getStatus());
+ loc = res.getValue();
+ uow.commit();
}
}
- TEST( RecordStoreTestHarness, Update1 ) {
- unique_ptr<HarnessHelper> harnessHelper( newHarnessHelper() );
- unique_ptr<RecordStore> rs( harnessHelper->newNonCappedRecordStore() );
+ {
+ unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
+ ASSERT_EQUALS(s, rs->dataFor(opCtx.get(), loc).data());
+ ASSERT_EQUALS(2, rs->numRecords(opCtx.get()));
+ }
+ {
+ unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
{
- unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- ASSERT_EQUALS( 0, rs->numRecords( opCtx.get() ) );
+ WriteUnitOfWork uow(opCtx.get());
+ rs->deleteRecord(opCtx.get(), loc);
+ uow.commit();
}
+ }
+}
- string s1 = "eliot was here";
- string s2 = "eliot was here again";
+TEST(RecordStoreTestHarness, Update1) {
+ unique_ptr<HarnessHelper> harnessHelper(newHarnessHelper());
+ unique_ptr<RecordStore> rs(harnessHelper->newNonCappedRecordStore());
- RecordId loc;
- {
- unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- {
- WriteUnitOfWork uow( opCtx.get() );
- StatusWith<RecordId> res = rs->insertRecord( opCtx.get(),
- s1.c_str(), s1.size() + 1,
- false );
- ASSERT_OK( res.getStatus() );
- loc = res.getValue();
- uow.commit();
- }
+ {
+ unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
+ ASSERT_EQUALS(0, rs->numRecords(opCtx.get()));
+ }
- }
+ string s1 = "eliot was here";
+ string s2 = "eliot was here again";
+ RecordId loc;
+ {
+ unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
{
- unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- ASSERT_EQUALS( s1, rs->dataFor( opCtx.get(), loc ).data() );
+ WriteUnitOfWork uow(opCtx.get());
+ StatusWith<RecordId> res =
+ rs->insertRecord(opCtx.get(), s1.c_str(), s1.size() + 1, false);
+ ASSERT_OK(res.getStatus());
+ loc = res.getValue();
+ uow.commit();
}
+ }
- {
- unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- {
- WriteUnitOfWork uow( opCtx.get() );
- StatusWith<RecordId> res = rs->updateRecord( opCtx.get(), loc,
- s2.c_str(), s2.size() + 1,
- false, NULL );
- ASSERT_OK( res.getStatus() );
- loc = res.getValue();
- uow.commit();
- }
-
- }
+ {
+ unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
+ ASSERT_EQUALS(s1, rs->dataFor(opCtx.get(), loc).data());
+ }
+ {
+ unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
{
- unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- ASSERT_EQUALS( 1, rs->numRecords( opCtx.get() ) );
- ASSERT_EQUALS( s2, rs->dataFor( opCtx.get(), loc ).data() );
+ WriteUnitOfWork uow(opCtx.get());
+ StatusWith<RecordId> res =
+ rs->updateRecord(opCtx.get(), loc, s2.c_str(), s2.size() + 1, false, NULL);
+ ASSERT_OK(res.getStatus());
+ loc = res.getValue();
+ uow.commit();
}
-
}
- TEST( RecordStoreTestHarness, UpdateInPlace1 ) {
- unique_ptr<HarnessHelper> harnessHelper( newHarnessHelper() );
- unique_ptr<RecordStore> rs( harnessHelper->newNonCappedRecordStore() );
+ {
+ unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
+ ASSERT_EQUALS(1, rs->numRecords(opCtx.get()));
+ ASSERT_EQUALS(s2, rs->dataFor(opCtx.get(), loc).data());
+ }
+}
- if (!rs->updateWithDamagesSupported())
- return;
+TEST(RecordStoreTestHarness, UpdateInPlace1) {
+ unique_ptr<HarnessHelper> harnessHelper(newHarnessHelper());
+ unique_ptr<RecordStore> rs(harnessHelper->newNonCappedRecordStore());
- string s1 = "aaa111bbb";
- string s2 = "aaa222bbb";
+ if (!rs->updateWithDamagesSupported())
+ return;
- RecordId loc;
- const RecordData s1Rec(s1.c_str(), s1.size() + 1);
- {
- unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- {
- WriteUnitOfWork uow( opCtx.get() );
- StatusWith<RecordId> res = rs->insertRecord( opCtx.get(),
- s1Rec.data(),
- s1Rec.size(),
- -1 );
- ASSERT_OK( res.getStatus() );
- loc = res.getValue();
- uow.commit();
- }
-
- }
+ string s1 = "aaa111bbb";
+ string s2 = "aaa222bbb";
+ RecordId loc;
+ const RecordData s1Rec(s1.c_str(), s1.size() + 1);
+ {
+ unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
{
- unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- ASSERT_EQUALS( s1, rs->dataFor( opCtx.get(), loc ).data() );
+ WriteUnitOfWork uow(opCtx.get());
+ StatusWith<RecordId> res =
+ rs->insertRecord(opCtx.get(), s1Rec.data(), s1Rec.size(), -1);
+ ASSERT_OK(res.getStatus());
+ loc = res.getValue();
+ uow.commit();
}
+ }
- {
- unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- {
- WriteUnitOfWork uow( opCtx.get() );
- const char* damageSource = "222";
- mutablebson::DamageVector dv;
- dv.push_back( mutablebson::DamageEvent() );
- dv[0].sourceOffset = 0;
- dv[0].targetOffset = 3;
- dv[0].size = 3;
- Status res = rs->updateWithDamages( opCtx.get(),
- loc,
- s1Rec,
- damageSource,
- dv );
- ASSERT_OK( res );
- uow.commit();
- }
- }
+ {
+ unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
+ ASSERT_EQUALS(s1, rs->dataFor(opCtx.get(), loc).data());
+ }
+ {
+ unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
{
- unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- ASSERT_EQUALS( s2, rs->dataFor( opCtx.get(), loc ).data() );
+ WriteUnitOfWork uow(opCtx.get());
+ const char* damageSource = "222";
+ mutablebson::DamageVector dv;
+ dv.push_back(mutablebson::DamageEvent());
+ dv[0].sourceOffset = 0;
+ dv[0].targetOffset = 3;
+ dv[0].size = 3;
+ Status res = rs->updateWithDamages(opCtx.get(), loc, s1Rec, damageSource, dv);
+ ASSERT_OK(res);
+ uow.commit();
}
}
+ {
+ unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
+ ASSERT_EQUALS(s2, rs->dataFor(opCtx.get(), loc).data());
+ }
+}
- TEST( RecordStoreTestHarness, Truncate1 ) {
- unique_ptr<HarnessHelper> harnessHelper( newHarnessHelper() );
- unique_ptr<RecordStore> rs( harnessHelper->newNonCappedRecordStore() );
-
- {
- unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- ASSERT_EQUALS( 0, rs->numRecords( opCtx.get() ) );
- }
-
- string s = "eliot was here";
- RecordId loc;
- {
- unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- {
- WriteUnitOfWork uow( opCtx.get() );
- StatusWith<RecordId> res = rs->insertRecord( opCtx.get(), s.c_str(), s.size() + 1, false );
- ASSERT_OK( res.getStatus() );
- loc = res.getValue();
- uow.commit();
- }
+TEST(RecordStoreTestHarness, Truncate1) {
+ unique_ptr<HarnessHelper> harnessHelper(newHarnessHelper());
+ unique_ptr<RecordStore> rs(harnessHelper->newNonCappedRecordStore());
- }
+ {
+ unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
+ ASSERT_EQUALS(0, rs->numRecords(opCtx.get()));
+ }
+ string s = "eliot was here";
+ RecordId loc;
+ {
+ unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
{
- unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- ASSERT_EQUALS( s, rs->dataFor( opCtx.get(), loc ).data() );
+ WriteUnitOfWork uow(opCtx.get());
+ StatusWith<RecordId> res =
+ rs->insertRecord(opCtx.get(), s.c_str(), s.size() + 1, false);
+ ASSERT_OK(res.getStatus());
+ loc = res.getValue();
+ uow.commit();
}
+ }
- {
- unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- ASSERT_EQUALS( 1, rs->numRecords( opCtx.get() ) );
- }
- {
- unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- {
- WriteUnitOfWork uow( opCtx.get() );
- rs->truncate( opCtx.get() );
- uow.commit();
- }
+ {
+ unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
+ ASSERT_EQUALS(s, rs->dataFor(opCtx.get(), loc).data());
+ }
- }
+ {
+ unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
+ ASSERT_EQUALS(1, rs->numRecords(opCtx.get()));
+ }
+ {
+ unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
{
- unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- ASSERT_EQUALS( 0, rs->numRecords( opCtx.get() ) );
+ WriteUnitOfWork uow(opCtx.get());
+ rs->truncate(opCtx.get());
+ uow.commit();
}
+ }
+ {
+ unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
+ ASSERT_EQUALS(0, rs->numRecords(opCtx.get()));
}
+}
- TEST( RecordStoreTestHarness, Cursor1 ) {
- const int N = 10;
+TEST(RecordStoreTestHarness, Cursor1) {
+ const int N = 10;
- unique_ptr<HarnessHelper> harnessHelper( newHarnessHelper() );
- unique_ptr<RecordStore> rs( harnessHelper->newNonCappedRecordStore() );
+ unique_ptr<HarnessHelper> harnessHelper(newHarnessHelper());
+ unique_ptr<RecordStore> rs(harnessHelper->newNonCappedRecordStore());
- {
- unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- ASSERT_EQUALS( 0, rs->numRecords( opCtx.get() ) );
- }
+ {
+ unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
+ ASSERT_EQUALS(0, rs->numRecords(opCtx.get()));
+ }
+ {
+ unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
{
- unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- {
- WriteUnitOfWork uow( opCtx.get() );
- for ( int i = 0; i < N; i++ ) {
- string s = str::stream() << "eliot" << i;
- ASSERT_OK( rs->insertRecord( opCtx.get(), s.c_str(), s.size() + 1, false ).getStatus() );
- }
- uow.commit();
+ WriteUnitOfWork uow(opCtx.get());
+ for (int i = 0; i < N; i++) {
+ string s = str::stream() << "eliot" << i;
+ ASSERT_OK(
+ rs->insertRecord(opCtx.get(), s.c_str(), s.size() + 1, false).getStatus());
}
+ uow.commit();
}
+ }
- {
- unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- ASSERT_EQUALS( N, rs->numRecords( opCtx.get() ) );
- }
+ {
+ unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
+ ASSERT_EQUALS(N, rs->numRecords(opCtx.get()));
+ }
- {
- int x = 0;
- unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- auto cursor = rs->getCursor(opCtx.get());
- while (auto record = cursor->next()) {
- string s = str::stream() << "eliot" << x++;
- ASSERT_EQUALS(s, record->data.data());
- }
- ASSERT_EQUALS( N, x );
- ASSERT(!cursor->next());
+ {
+ int x = 0;
+ unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
+ auto cursor = rs->getCursor(opCtx.get());
+ while (auto record = cursor->next()) {
+ string s = str::stream() << "eliot" << x++;
+ ASSERT_EQUALS(s, record->data.data());
}
+ ASSERT_EQUALS(N, x);
+ ASSERT(!cursor->next());
+ }
- {
- int x = N;
- unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- auto cursor = rs->getCursor(opCtx.get(), false);
- while (auto record = cursor->next()) {
- string s = str::stream() << "eliot" << --x;
- ASSERT_EQUALS(s, record->data.data());
- }
- ASSERT_EQUALS( 0, x );
- ASSERT(!cursor->next());
+ {
+ int x = N;
+ unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
+ auto cursor = rs->getCursor(opCtx.get(), false);
+ while (auto record = cursor->next()) {
+ string s = str::stream() << "eliot" << --x;
+ ASSERT_EQUALS(s, record->data.data());
}
-
+ ASSERT_EQUALS(0, x);
+ ASSERT(!cursor->next());
}
-
+}
}
diff --git a/src/mongo/db/storage/record_store_test_harness.h b/src/mongo/db/storage/record_store_test_harness.h
index 003e7c398a1..e3cd758e545 100644
--- a/src/mongo/db/storage/record_store_test_harness.h
+++ b/src/mongo/db/storage/record_store_test_harness.h
@@ -35,25 +35,25 @@
namespace mongo {
- class RecordStore;
- class RecoveryUnit;
+class RecordStore;
+class RecoveryUnit;
- class HarnessHelper {
- public:
- HarnessHelper() : _serviceContext(), _client(_serviceContext.makeClient("hh")) {}
- virtual ~HarnessHelper(){}
+class HarnessHelper {
+public:
+ HarnessHelper() : _serviceContext(), _client(_serviceContext.makeClient("hh")) {}
+ virtual ~HarnessHelper() {}
- virtual RecordStore* newNonCappedRecordStore() = 0;
- virtual RecoveryUnit* newRecoveryUnit() = 0;
+ virtual RecordStore* newNonCappedRecordStore() = 0;
+ virtual RecoveryUnit* newRecoveryUnit() = 0;
- virtual OperationContext* newOperationContext() {
- return new OperationContextNoop(_client.get(), 1, newRecoveryUnit());
- }
+ virtual OperationContext* newOperationContext() {
+ return new OperationContextNoop(_client.get(), 1, newRecoveryUnit());
+ }
- private:
- ServiceContextNoop _serviceContext;
- ServiceContext::UniqueClient _client;
- };
+private:
+ ServiceContextNoop _serviceContext;
+ ServiceContext::UniqueClient _client;
+};
- HarnessHelper* newHarnessHelper();
+HarnessHelper* newHarnessHelper();
}
diff --git a/src/mongo/db/storage/record_store_test_insertrecord.cpp b/src/mongo/db/storage/record_store_test_insertrecord.cpp
index 18f72f8bab7..4f62256c8e1 100644
--- a/src/mongo/db/storage/record_store_test_insertrecord.cpp
+++ b/src/mongo/db/storage/record_store_test_insertrecord.cpp
@@ -42,144 +42,136 @@ using std::stringstream;
namespace mongo {
- using std::unique_ptr;
+using std::unique_ptr;
- // Insert a record and verify the number of entries in the collection is 1.
- TEST( RecordStoreTestHarness, InsertRecord ) {
- unique_ptr<HarnessHelper> harnessHelper( newHarnessHelper() );
- unique_ptr<RecordStore> rs( harnessHelper->newNonCappedRecordStore() );
+// Insert a record and verify the number of entries in the collection is 1.
+TEST(RecordStoreTestHarness, InsertRecord) {
+ unique_ptr<HarnessHelper> harnessHelper(newHarnessHelper());
+ unique_ptr<RecordStore> rs(harnessHelper->newNonCappedRecordStore());
- {
- unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- ASSERT_EQUALS( 0, rs->numRecords( opCtx.get() ) );
- }
+ {
+ unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
+ ASSERT_EQUALS(0, rs->numRecords(opCtx.get()));
+ }
- string data = "my record";
- RecordId loc;
+ string data = "my record";
+ RecordId loc;
+ {
+ unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
{
- unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- {
- WriteUnitOfWork uow( opCtx.get() );
- StatusWith<RecordId> res = rs->insertRecord( opCtx.get(),
- data.c_str(),
- data.size() + 1,
- false );
- ASSERT_OK( res.getStatus() );
- loc = res.getValue();
- uow.commit();
- }
+ WriteUnitOfWork uow(opCtx.get());
+ StatusWith<RecordId> res =
+ rs->insertRecord(opCtx.get(), data.c_str(), data.size() + 1, false);
+ ASSERT_OK(res.getStatus());
+ loc = res.getValue();
+ uow.commit();
}
+ }
- {
- unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- ASSERT_EQUALS( 1, rs->numRecords( opCtx.get() ) );
- }
+ {
+ unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
+ ASSERT_EQUALS(1, rs->numRecords(opCtx.get()));
}
+}
- // Insert multiple records and verify the number of entries in the collection
- // equals the number that were inserted.
- TEST( RecordStoreTestHarness, InsertMultipleRecords ) {
- unique_ptr<HarnessHelper> harnessHelper( newHarnessHelper() );
- unique_ptr<RecordStore> rs( harnessHelper->newNonCappedRecordStore() );
+// Insert multiple records and verify the number of entries in the collection
+// equals the number that were inserted.
+TEST(RecordStoreTestHarness, InsertMultipleRecords) {
+ unique_ptr<HarnessHelper> harnessHelper(newHarnessHelper());
+ unique_ptr<RecordStore> rs(harnessHelper->newNonCappedRecordStore());
- {
- unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- ASSERT_EQUALS( 0, rs->numRecords( opCtx.get() ) );
- }
-
- const int nToInsert = 10;
- RecordId locs[nToInsert];
- for ( int i = 0; i < nToInsert; i++ ) {
- unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- {
- stringstream ss;
- ss << "record " << i;
- string data = ss.str();
-
- WriteUnitOfWork uow( opCtx.get() );
- StatusWith<RecordId> res = rs->insertRecord( opCtx.get(),
- data.c_str(),
- data.size() + 1,
- false );
- ASSERT_OK( res.getStatus() );
- locs[i] = res.getValue();
- uow.commit();
- }
- }
+ {
+ unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
+ ASSERT_EQUALS(0, rs->numRecords(opCtx.get()));
+ }
+ const int nToInsert = 10;
+ RecordId locs[nToInsert];
+ for (int i = 0; i < nToInsert; i++) {
+ unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
{
- unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- ASSERT_EQUALS( nToInsert, rs->numRecords( opCtx.get() ) );
+ stringstream ss;
+ ss << "record " << i;
+ string data = ss.str();
+
+ WriteUnitOfWork uow(opCtx.get());
+ StatusWith<RecordId> res =
+ rs->insertRecord(opCtx.get(), data.c_str(), data.size() + 1, false);
+ ASSERT_OK(res.getStatus());
+ locs[i] = res.getValue();
+ uow.commit();
}
}
- // Insert a record using a DocWriter and verify the number of entries
- // in the collection is 1.
- TEST( RecordStoreTestHarness, InsertRecordUsingDocWriter ) {
- unique_ptr<HarnessHelper> harnessHelper( newHarnessHelper() );
- unique_ptr<RecordStore> rs( harnessHelper->newNonCappedRecordStore() );
+ {
+ unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
+ ASSERT_EQUALS(nToInsert, rs->numRecords(opCtx.get()));
+ }
+}
- {
- unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- ASSERT_EQUALS( 0, rs->numRecords( opCtx.get() ) );
- }
+// Insert a record using a DocWriter and verify the number of entries
+// in the collection is 1.
+TEST(RecordStoreTestHarness, InsertRecordUsingDocWriter) {
+ unique_ptr<HarnessHelper> harnessHelper(newHarnessHelper());
+ unique_ptr<RecordStore> rs(harnessHelper->newNonCappedRecordStore());
- RecordId loc;
- {
- unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- {
- StringDocWriter docWriter( "my record", false );
-
- WriteUnitOfWork uow( opCtx.get() );
- StatusWith<RecordId> res = rs->insertRecord( opCtx.get(),
- &docWriter,
- false );
- ASSERT_OK( res.getStatus() );
- loc = res.getValue();
- uow.commit();
- }
- }
+ {
+ unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
+ ASSERT_EQUALS(0, rs->numRecords(opCtx.get()));
+ }
+ RecordId loc;
+ {
+ unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
{
- unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- ASSERT_EQUALS( 1, rs->numRecords( opCtx.get() ) );
+ StringDocWriter docWriter("my record", false);
+
+ WriteUnitOfWork uow(opCtx.get());
+ StatusWith<RecordId> res = rs->insertRecord(opCtx.get(), &docWriter, false);
+ ASSERT_OK(res.getStatus());
+ loc = res.getValue();
+ uow.commit();
}
}
- // Insert multiple records using a DocWriter and verify the number of entries
- // in the collection equals the number that were inserted.
- TEST( RecordStoreTestHarness, InsertMultipleRecordsUsingDocWriter ) {
- unique_ptr<HarnessHelper> harnessHelper( newHarnessHelper() );
- unique_ptr<RecordStore> rs( harnessHelper->newNonCappedRecordStore() );
+ {
+ unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
+ ASSERT_EQUALS(1, rs->numRecords(opCtx.get()));
+ }
+}
- {
- unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- ASSERT_EQUALS( 0, rs->numRecords( opCtx.get() ) );
- }
+// Insert multiple records using a DocWriter and verify the number of entries
+// in the collection equals the number that were inserted.
+TEST(RecordStoreTestHarness, InsertMultipleRecordsUsingDocWriter) {
+ unique_ptr<HarnessHelper> harnessHelper(newHarnessHelper());
+ unique_ptr<RecordStore> rs(harnessHelper->newNonCappedRecordStore());
- const int nToInsert = 10;
- RecordId locs[nToInsert];
- for ( int i = 0; i < nToInsert; i++ ) {
- unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- {
- stringstream ss;
- ss << "record " << i;
- StringDocWriter docWriter( ss.str(), false );
-
- WriteUnitOfWork uow( opCtx.get() );
- StatusWith<RecordId> res = rs->insertRecord( opCtx.get(),
- &docWriter,
- false );
- ASSERT_OK( res.getStatus() );
- locs[i] = res.getValue();
- uow.commit();
- }
- }
+ {
+ unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
+ ASSERT_EQUALS(0, rs->numRecords(opCtx.get()));
+ }
+ const int nToInsert = 10;
+ RecordId locs[nToInsert];
+ for (int i = 0; i < nToInsert; i++) {
+ unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
{
- unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- ASSERT_EQUALS( nToInsert, rs->numRecords( opCtx.get() ) );
+ stringstream ss;
+ ss << "record " << i;
+ StringDocWriter docWriter(ss.str(), false);
+
+ WriteUnitOfWork uow(opCtx.get());
+ StatusWith<RecordId> res = rs->insertRecord(opCtx.get(), &docWriter, false);
+ ASSERT_OK(res.getStatus());
+ locs[i] = res.getValue();
+ uow.commit();
}
}
-} // namespace mongo
+ {
+ unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
+ ASSERT_EQUALS(nToInsert, rs->numRecords(opCtx.get()));
+ }
+}
+
+} // namespace mongo
diff --git a/src/mongo/db/storage/record_store_test_manyiter.cpp b/src/mongo/db/storage/record_store_test_manyiter.cpp
index ec896be50a0..842e2681abc 100644
--- a/src/mongo/db/storage/record_store_test_manyiter.cpp
+++ b/src/mongo/db/storage/record_store_test_manyiter.cpp
@@ -43,74 +43,72 @@ using std::vector;
namespace mongo {
- using std::unique_ptr;
+using std::unique_ptr;
- // Create multiple iterators over an empty record store.
- TEST( RecordStoreTestHarness, GetManyIteratorsEmpty ) {
- unique_ptr<HarnessHelper> harnessHelper( newHarnessHelper() );
- unique_ptr<RecordStore> rs( harnessHelper->newNonCappedRecordStore() );
+// Create multiple iterators over an empty record store.
+TEST(RecordStoreTestHarness, GetManyIteratorsEmpty) {
+ unique_ptr<HarnessHelper> harnessHelper(newHarnessHelper());
+ unique_ptr<RecordStore> rs(harnessHelper->newNonCappedRecordStore());
- {
- unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- ASSERT_EQUALS( 0, rs->numRecords( opCtx.get() ) );
- }
+ {
+ unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
+ ASSERT_EQUALS(0, rs->numRecords(opCtx.get()));
+ }
- {
- unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- for (auto&& cursor : rs->getManyCursors(opCtx.get())) {
- ASSERT(!cursor->next());
- ASSERT(!cursor->next());
- }
+ {
+ unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
+ for (auto&& cursor : rs->getManyCursors(opCtx.get())) {
+ ASSERT(!cursor->next());
+ ASSERT(!cursor->next());
}
}
+}
- // Create multiple iterators over a nonempty record store.
- TEST( RecordStoreTestHarness, GetManyIteratorsNonEmpty ) {
- unique_ptr<HarnessHelper> harnessHelper( newHarnessHelper() );
- unique_ptr<RecordStore> rs( harnessHelper->newNonCappedRecordStore() );
-
- {
- unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- ASSERT_EQUALS( 0, rs->numRecords( opCtx.get() ) );
- }
+// Create multiple iterators over a nonempty record store.
+TEST(RecordStoreTestHarness, GetManyIteratorsNonEmpty) {
+ unique_ptr<HarnessHelper> harnessHelper(newHarnessHelper());
+ unique_ptr<RecordStore> rs(harnessHelper->newNonCappedRecordStore());
- const int nToInsert = 10;
- RecordId locs[nToInsert];
- for ( int i = 0; i < nToInsert; i++ ) {
- unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- {
- stringstream ss;
- ss << "record " << i;
- string data = ss.str();
-
- WriteUnitOfWork uow( opCtx.get() );
- StatusWith<RecordId> res = rs->insertRecord( opCtx.get(),
- data.c_str(),
- data.size() + 1,
- false );
- ASSERT_OK( res.getStatus() );
- locs[i] = res.getValue();
- uow.commit();
- }
- }
+ {
+ unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
+ ASSERT_EQUALS(0, rs->numRecords(opCtx.get()));
+ }
+ const int nToInsert = 10;
+ RecordId locs[nToInsert];
+ for (int i = 0; i < nToInsert; i++) {
+ unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
{
- unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- ASSERT_EQUALS( nToInsert, rs->numRecords( opCtx.get() ) );
+ stringstream ss;
+ ss << "record " << i;
+ string data = ss.str();
+
+ WriteUnitOfWork uow(opCtx.get());
+ StatusWith<RecordId> res =
+ rs->insertRecord(opCtx.get(), data.c_str(), data.size() + 1, false);
+ ASSERT_OK(res.getStatus());
+ locs[i] = res.getValue();
+ uow.commit();
}
+ }
- set<RecordId> remain( locs, locs + nToInsert );
- {
- unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- for (auto&& cursor : rs->getManyCursors(opCtx.get())) {
- while (auto record = cursor->next()) {
- ASSERT_EQ(remain.erase(record->id), size_t(1));
- }
+ {
+ unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
+ ASSERT_EQUALS(nToInsert, rs->numRecords(opCtx.get()));
+ }
- ASSERT(!cursor->next());
+ set<RecordId> remain(locs, locs + nToInsert);
+ {
+ unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
+ for (auto&& cursor : rs->getManyCursors(opCtx.get())) {
+ while (auto record = cursor->next()) {
+ ASSERT_EQ(remain.erase(record->id), size_t(1));
}
- ASSERT( remain.empty() );
+
+ ASSERT(!cursor->next());
}
+ ASSERT(remain.empty());
}
+}
-} // namespace mongo
+} // namespace mongo
diff --git a/src/mongo/db/storage/record_store_test_recorditer.cpp b/src/mongo/db/storage/record_store_test_recorditer.cpp
index a4f33287ee3..cfd1a9b9c24 100644
--- a/src/mongo/db/storage/record_store_test_recorditer.cpp
+++ b/src/mongo/db/storage/record_store_test_recorditer.cpp
@@ -44,359 +44,345 @@ using std::stringstream;
namespace mongo {
- // Insert multiple records and iterate through them in the forward direction.
- // When curr() or getNext() is called on an iterator positioned at EOF,
- // the iterator returns RecordId() and stays at EOF.
- TEST( RecordStoreTestHarness, IterateOverMultipleRecords ) {
- unique_ptr<HarnessHelper> harnessHelper( newHarnessHelper() );
- unique_ptr<RecordStore> rs( harnessHelper->newNonCappedRecordStore() );
+// Insert multiple records and iterate through them in the forward direction.
+// When curr() or getNext() is called on an iterator positioned at EOF,
+// the iterator returns RecordId() and stays at EOF.
+TEST(RecordStoreTestHarness, IterateOverMultipleRecords) {
+ unique_ptr<HarnessHelper> harnessHelper(newHarnessHelper());
+ unique_ptr<RecordStore> rs(harnessHelper->newNonCappedRecordStore());
+
+ {
+ unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
+ ASSERT_EQUALS(0, rs->numRecords(opCtx.get()));
+ }
+ const int nToInsert = 10;
+ RecordId locs[nToInsert];
+ std::string datas[nToInsert];
+ for (int i = 0; i < nToInsert; i++) {
+ unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
{
- unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- ASSERT_EQUALS( 0, rs->numRecords( opCtx.get() ) );
+ stringstream ss;
+ ss << "record " << i;
+ string data = ss.str();
+
+ WriteUnitOfWork uow(opCtx.get());
+ StatusWith<RecordId> res =
+ rs->insertRecord(opCtx.get(), data.c_str(), data.size() + 1, false);
+ ASSERT_OK(res.getStatus());
+ locs[i] = res.getValue();
+ datas[i] = data;
+ uow.commit();
}
+ }
- const int nToInsert = 10;
- RecordId locs[nToInsert];
- std::string datas[nToInsert];
- for ( int i = 0; i < nToInsert; i++ ) {
- unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- {
- stringstream ss;
- ss << "record " << i;
- string data = ss.str();
-
- WriteUnitOfWork uow( opCtx.get() );
- StatusWith<RecordId> res = rs->insertRecord( opCtx.get(),
- data.c_str(),
- data.size() + 1,
- false );
- ASSERT_OK( res.getStatus() );
- locs[i] = res.getValue();
- datas[i] = data;
- uow.commit();
- }
- }
+ {
+ unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
+ ASSERT_EQUALS(nToInsert, rs->numRecords(opCtx.get()));
+ }
- {
- unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- ASSERT_EQUALS( nToInsert, rs->numRecords( opCtx.get() ) );
+ std::sort(locs, locs + nToInsert); // inserted records may not be in RecordId order
+ {
+ unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
+ auto cursor = rs->getCursor(opCtx.get());
+ for (int i = 0; i < nToInsert; i++) {
+ const auto record = cursor->next();
+ ASSERT(record);
+ ASSERT_EQUALS(locs[i], record->id);
+ ASSERT_EQUALS(datas[i], record->data.data());
}
+ ASSERT(!cursor->next());
+ }
+}
+
+// Insert multiple records and iterate through them in the reverse direction.
+// When curr() or getNext() is called on an iterator positioned at EOF,
+// the iterator returns RecordId() and stays at EOF.
+TEST(RecordStoreTestHarness, IterateOverMultipleRecordsReversed) {
+ unique_ptr<HarnessHelper> harnessHelper(newHarnessHelper());
+ unique_ptr<RecordStore> rs(harnessHelper->newNonCappedRecordStore());
+
+ {
+ unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
+ ASSERT_EQUALS(0, rs->numRecords(opCtx.get()));
+ }
- std::sort( locs, locs + nToInsert ); // inserted records may not be in RecordId order
+ const int nToInsert = 10;
+ RecordId locs[nToInsert];
+ std::string datas[nToInsert];
+ for (int i = 0; i < nToInsert; i++) {
+ unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
{
- unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- auto cursor = rs->getCursor(opCtx.get());
- for ( int i = 0; i < nToInsert; i++ ) {
- const auto record = cursor->next();
- ASSERT(record);
- ASSERT_EQUALS( locs[i], record->id );
- ASSERT_EQUALS( datas[i], record->data.data() );
- }
- ASSERT(!cursor->next());
+ stringstream ss;
+ ss << "record " << i;
+ string data = ss.str();
+
+ WriteUnitOfWork uow(opCtx.get());
+ StatusWith<RecordId> res =
+ rs->insertRecord(opCtx.get(), data.c_str(), data.size() + 1, false);
+ ASSERT_OK(res.getStatus());
+ locs[i] = res.getValue();
+ datas[i] = data;
+ uow.commit();
}
}
- // Insert multiple records and iterate through them in the reverse direction.
- // When curr() or getNext() is called on an iterator positioned at EOF,
- // the iterator returns RecordId() and stays at EOF.
- TEST( RecordStoreTestHarness, IterateOverMultipleRecordsReversed ) {
- unique_ptr<HarnessHelper> harnessHelper( newHarnessHelper() );
- unique_ptr<RecordStore> rs( harnessHelper->newNonCappedRecordStore() );
+ {
+ unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
+ ASSERT_EQUALS(nToInsert, rs->numRecords(opCtx.get()));
+ }
- {
- unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- ASSERT_EQUALS( 0, rs->numRecords( opCtx.get() ) );
- }
+ std::sort(locs, locs + nToInsert); // inserted records may not be in RecordId order
+ {
+ unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
- const int nToInsert = 10;
- RecordId locs[nToInsert];
- std::string datas[nToInsert];
- for ( int i = 0; i < nToInsert; i++ ) {
- unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- {
- stringstream ss;
- ss << "record " << i;
- string data = ss.str();
-
- WriteUnitOfWork uow( opCtx.get() );
- StatusWith<RecordId> res = rs->insertRecord( opCtx.get(),
- data.c_str(),
- data.size() + 1,
- false );
- ASSERT_OK( res.getStatus() );
- locs[i] = res.getValue();
- datas[i] = data;
- uow.commit();
- }
+ auto cursor = rs->getCursor(opCtx.get(), false);
+ for (int i = nToInsert - 1; i >= 0; i--) {
+ const auto record = cursor->next();
+ ASSERT(record);
+ ASSERT_EQUALS(locs[i], record->id);
+ ASSERT_EQUALS(datas[i], record->data.data());
}
+ ASSERT(!cursor->next());
+ }
+}
- {
- unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- ASSERT_EQUALS( nToInsert, rs->numRecords( opCtx.get() ) );
- }
+// Insert multiple records and try to create a forward iterator
+// starting at an interior position.
+TEST(RecordStoreTestHarness, IterateStartFromMiddle) {
+ unique_ptr<HarnessHelper> harnessHelper(newHarnessHelper());
+ unique_ptr<RecordStore> rs(harnessHelper->newNonCappedRecordStore());
+
+ {
+ unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
+ ASSERT_EQUALS(0, rs->numRecords(opCtx.get()));
+ }
- std::sort( locs, locs + nToInsert ); // inserted records may not be in RecordId order
+ const int nToInsert = 10;
+ RecordId locs[nToInsert];
+ std::string datas[nToInsert];
+ for (int i = 0; i < nToInsert; i++) {
+ unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
{
- unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
-
- auto cursor = rs->getCursor(opCtx.get(), false);
- for ( int i = nToInsert - 1; i >= 0; i-- ) {
- const auto record = cursor->next();
- ASSERT(record);
- ASSERT_EQUALS( locs[i], record->id );
- ASSERT_EQUALS( datas[i], record->data.data() );
- }
- ASSERT(!cursor->next());
+ stringstream ss;
+ ss << "record " << i;
+ string data = ss.str();
+
+ WriteUnitOfWork uow(opCtx.get());
+ StatusWith<RecordId> res =
+ rs->insertRecord(opCtx.get(), data.c_str(), data.size() + 1, false);
+ ASSERT_OK(res.getStatus());
+ locs[i] = res.getValue();
+ datas[i] = data;
+ uow.commit();
}
}
- // Insert multiple records and try to create a forward iterator
- // starting at an interior position.
- TEST( RecordStoreTestHarness, IterateStartFromMiddle ) {
- unique_ptr<HarnessHelper> harnessHelper( newHarnessHelper() );
- unique_ptr<RecordStore> rs( harnessHelper->newNonCappedRecordStore() );
+ {
+ unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
+ ASSERT_EQUALS(nToInsert, rs->numRecords(opCtx.get()));
+ }
- {
- unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- ASSERT_EQUALS( 0, rs->numRecords( opCtx.get() ) );
+ std::sort(locs, locs + nToInsert); // inserted records may not be in RecordId order
+ {
+ unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
+
+ int start = nToInsert / 2;
+ auto cursor = rs->getCursor(opCtx.get());
+ for (int i = start; i < nToInsert; i++) {
+ const auto record = (i == start) ? cursor->seekExact(locs[i]) : cursor->next();
+ ASSERT(record);
+ ASSERT_EQUALS(locs[i], record->id);
+ ASSERT_EQUALS(datas[i], record->data.data());
}
+ ASSERT(!cursor->next());
+ }
+}
- const int nToInsert = 10;
- RecordId locs[nToInsert];
- std::string datas[nToInsert];
- for ( int i = 0; i < nToInsert; i++ ) {
- unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- {
- stringstream ss;
- ss << "record " << i;
- string data = ss.str();
-
- WriteUnitOfWork uow( opCtx.get() );
- StatusWith<RecordId> res = rs->insertRecord( opCtx.get(),
- data.c_str(),
- data.size() + 1,
- false );
- ASSERT_OK( res.getStatus() );
- locs[i] = res.getValue();
- datas[i] = data;
- uow.commit();
- }
- }
+// Insert multiple records and try to create a reverse iterator
+// starting at an interior position.
+TEST(RecordStoreTestHarness, IterateStartFromMiddleReversed) {
+ unique_ptr<HarnessHelper> harnessHelper(newHarnessHelper());
+ unique_ptr<RecordStore> rs(harnessHelper->newNonCappedRecordStore());
- {
- unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- ASSERT_EQUALS( nToInsert, rs->numRecords( opCtx.get() ) );
- }
+ {
+ unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
+ ASSERT_EQUALS(0, rs->numRecords(opCtx.get()));
+ }
- std::sort( locs, locs + nToInsert ); // inserted records may not be in RecordId order
+ const int nToInsert = 10;
+ RecordId locs[nToInsert];
+ std::string datas[nToInsert];
+ for (int i = 0; i < nToInsert; i++) {
+ unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
{
- unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
-
- int start = nToInsert / 2;
- auto cursor = rs->getCursor(opCtx.get());
- for ( int i = start; i < nToInsert; i++ ) {
- const auto record = (i == start) ? cursor->seekExact(locs[i]) : cursor->next();
- ASSERT(record);
- ASSERT_EQUALS( locs[i], record->id );
- ASSERT_EQUALS( datas[i], record->data.data() );
- }
- ASSERT(!cursor->next());
+ stringstream ss;
+ ss << "record " << i;
+ string data = ss.str();
+
+ WriteUnitOfWork uow(opCtx.get());
+ StatusWith<RecordId> res =
+ rs->insertRecord(opCtx.get(), data.c_str(), data.size() + 1, false);
+ ASSERT_OK(res.getStatus());
+ locs[i] = res.getValue();
+ datas[i] = data;
+ uow.commit();
}
}
- // Insert multiple records and try to create a reverse iterator
- // starting at an interior position.
- TEST( RecordStoreTestHarness, IterateStartFromMiddleReversed ) {
- unique_ptr<HarnessHelper> harnessHelper( newHarnessHelper() );
- unique_ptr<RecordStore> rs( harnessHelper->newNonCappedRecordStore() );
-
- {
- unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- ASSERT_EQUALS( 0, rs->numRecords( opCtx.get() ) );
- }
+ {
+ unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
+ ASSERT_EQUALS(nToInsert, rs->numRecords(opCtx.get()));
+ }
- const int nToInsert = 10;
- RecordId locs[nToInsert];
- std::string datas[nToInsert];
- for ( int i = 0; i < nToInsert; i++ ) {
- unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- {
- stringstream ss;
- ss << "record " << i;
- string data = ss.str();
-
- WriteUnitOfWork uow( opCtx.get() );
- StatusWith<RecordId> res = rs->insertRecord( opCtx.get(),
- data.c_str(),
- data.size() + 1,
- false );
- ASSERT_OK( res.getStatus() );
- locs[i] = res.getValue();
- datas[i] = data;
- uow.commit();
- }
+ std::sort(locs, locs + nToInsert); // inserted records may not be in RecordId order
+ {
+ unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
+
+ int start = nToInsert / 2;
+ auto cursor = rs->getCursor(opCtx.get(), false);
+ for (int i = start; i >= 0; i--) {
+ const auto record = (i == start) ? cursor->seekExact(locs[i]) : cursor->next();
+ ASSERT(record);
+ ASSERT_EQUALS(locs[i], record->id);
+ ASSERT_EQUALS(datas[i], record->data.data());
}
+ ASSERT(!cursor->next());
+ }
+}
+
+// Insert several records, and iterate to the end. Ensure that the record iterator
+// is EOF. Add an additional record, saving and restoring the iterator state, and check
+// that the iterator remains EOF.
+TEST(RecordStoreTestHarness, RecordIteratorEOF) {
+ unique_ptr<HarnessHelper> harnessHelper(newHarnessHelper());
+ unique_ptr<RecordStore> rs(harnessHelper->newNonCappedRecordStore());
+
+ {
+ unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
+ ASSERT_EQUALS(0, rs->numRecords(opCtx.get()));
+ }
+ const int nToInsert = 10;
+ RecordId locs[nToInsert];
+ std::string datas[nToInsert];
+ for (int i = 0; i < nToInsert; i++) {
+ unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
{
- unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- ASSERT_EQUALS( nToInsert, rs->numRecords( opCtx.get() ) );
- }
+ StringBuilder sb;
+ sb << "record " << i;
+ string data = sb.str();
- std::sort( locs, locs + nToInsert ); // inserted records may not be in RecordId order
- {
- unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
-
- int start = nToInsert / 2;
- auto cursor = rs->getCursor(opCtx.get(), false);
- for ( int i = start; i >= 0; i-- ) {
- const auto record = (i == start) ? cursor->seekExact(locs[i]) : cursor->next();
- ASSERT(record);
- ASSERT_EQUALS( locs[i], record->id );
- ASSERT_EQUALS( datas[i], record->data.data() );
- }
- ASSERT(!cursor->next());
+ WriteUnitOfWork uow(opCtx.get());
+ StatusWith<RecordId> res =
+ rs->insertRecord(opCtx.get(), data.c_str(), data.size() + 1, false);
+ ASSERT_OK(res.getStatus());
+ locs[i] = res.getValue();
+ datas[i] = data;
+ uow.commit();
}
}
- // Insert several records, and iterate to the end. Ensure that the record iterator
- // is EOF. Add an additional record, saving and restoring the iterator state, and check
- // that the iterator remains EOF.
- TEST( RecordStoreTestHarness, RecordIteratorEOF ) {
- unique_ptr<HarnessHelper> harnessHelper( newHarnessHelper() );
- unique_ptr<RecordStore> rs( harnessHelper->newNonCappedRecordStore() );
+ {
+ unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
+ ASSERT_EQUALS(nToInsert, rs->numRecords(opCtx.get()));
+ }
- {
- unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- ASSERT_EQUALS( 0, rs->numRecords( opCtx.get() ) );
- }
+ {
+ unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
- const int nToInsert = 10;
- RecordId locs[nToInsert];
- std::string datas[nToInsert];
- for ( int i = 0; i < nToInsert; i++ ) {
- unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- {
- StringBuilder sb;
- sb << "record " << i;
- string data = sb.str();
-
- WriteUnitOfWork uow( opCtx.get() );
- StatusWith<RecordId> res = rs->insertRecord( opCtx.get(),
- data.c_str(),
- data.size() + 1,
- false );
- ASSERT_OK( res.getStatus() );
- locs[i] = res.getValue();
- datas[i] = data;
- uow.commit();
- }
- }
+ // Get a forward iterator starting at the beginning of the record store.
+ auto cursor = rs->getCursor(opCtx.get());
- {
- unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- ASSERT_EQUALS( nToInsert, rs->numRecords( opCtx.get() ) );
+ // Iterate, checking EOF along the way.
+ for (int i = 0; i < nToInsert; i++) {
+ const auto record = cursor->next();
+ ASSERT(record);
+ ASSERT_EQUALS(locs[i], record->id);
+ ASSERT_EQUALS(datas[i], record->data.data());
}
+ ASSERT(!cursor->next());
- {
- unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
-
- // Get a forward iterator starting at the beginning of the record store.
- auto cursor = rs->getCursor(opCtx.get());
+ // Add a record and ensure we're still EOF.
+ cursor->savePositioned();
- // Iterate, checking EOF along the way.
- for ( int i = 0; i < nToInsert; i++ ) {
- const auto record = cursor->next();
- ASSERT(record);
- ASSERT_EQUALS( locs[i], record->id );
- ASSERT_EQUALS( datas[i], record->data.data() );
- }
- ASSERT(!cursor->next());
+ StringBuilder sb;
+ sb << "record " << nToInsert + 1;
+ string data = sb.str();
- // Add a record and ensure we're still EOF.
- cursor->savePositioned();
+ WriteUnitOfWork uow(opCtx.get());
+ StatusWith<RecordId> res =
+ rs->insertRecord(opCtx.get(), data.c_str(), data.size() + 1, false);
+ ASSERT_OK(res.getStatus());
+ uow.commit();
- StringBuilder sb;
- sb << "record " << nToInsert + 1;
- string data = sb.str();
+ ASSERT(cursor->restore(opCtx.get()));
- WriteUnitOfWork uow( opCtx.get() );
- StatusWith<RecordId> res = rs->insertRecord( opCtx.get(),
- data.c_str(),
- data.size() + 1,
- false );
- ASSERT_OK( res.getStatus() );
- uow.commit();
+ // Iterator should still be EOF.
+ ASSERT(!cursor->next());
+ ASSERT(!cursor->next());
+ }
+}
- ASSERT( cursor->restore( opCtx.get() ) );
+// Test calling savePositioned and restore after each call to next
+TEST(RecordStoreTestHarness, RecordIteratorSavePositionedRestore) {
+ unique_ptr<HarnessHelper> harnessHelper(newHarnessHelper());
+ unique_ptr<RecordStore> rs(harnessHelper->newNonCappedRecordStore());
- // Iterator should still be EOF.
- ASSERT(!cursor->next());
- ASSERT(!cursor->next());
- }
+ {
+ unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
+ ASSERT_EQUALS(0, rs->numRecords(opCtx.get()));
}
- // Test calling savePositioned and restore after each call to next
- TEST( RecordStoreTestHarness, RecordIteratorSavePositionedRestore ) {
- unique_ptr<HarnessHelper> harnessHelper( newHarnessHelper() );
- unique_ptr<RecordStore> rs( harnessHelper->newNonCappedRecordStore() );
-
+ const int nToInsert = 10;
+ RecordId locs[nToInsert];
+ std::string datas[nToInsert];
+ for (int i = 0; i < nToInsert; i++) {
+ unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
{
- unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- ASSERT_EQUALS( 0, rs->numRecords( opCtx.get() ) );
- }
-
- const int nToInsert = 10;
- RecordId locs[nToInsert];
- std::string datas[nToInsert];
- for ( int i = 0; i < nToInsert; i++ ) {
- unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- {
- StringBuilder sb;
- sb << "record " << i;
- string data = sb.str();
-
- WriteUnitOfWork uow( opCtx.get() );
- StatusWith<RecordId> res = rs->insertRecord( opCtx.get(),
- data.c_str(),
- data.size() + 1,
- false );
- ASSERT_OK( res.getStatus() );
- locs[i] = res.getValue();
- datas[i] = data;
- uow.commit();
- }
- }
+ StringBuilder sb;
+ sb << "record " << i;
+ string data = sb.str();
- {
- unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- ASSERT_EQUALS( nToInsert, rs->numRecords( opCtx.get() ) );
+ WriteUnitOfWork uow(opCtx.get());
+ StatusWith<RecordId> res =
+ rs->insertRecord(opCtx.get(), data.c_str(), data.size() + 1, false);
+ ASSERT_OK(res.getStatus());
+ locs[i] = res.getValue();
+ datas[i] = data;
+ uow.commit();
}
+ }
- {
- unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
-
- // Get a forward iterator starting at the beginning of the record store.
- auto cursor = rs->getCursor(opCtx.get());
+ {
+ unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
+ ASSERT_EQUALS(nToInsert, rs->numRecords(opCtx.get()));
+ }
- // Iterate, checking EOF along the way.
- for ( int i = 0; i < nToInsert; i++ ) {
- cursor->savePositioned();
- cursor->savePositioned(); // It is legal to save twice in a row.
- cursor->restore(opCtx.get());
+ {
+ unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
- const auto record = cursor->next();
- ASSERT(record);
- ASSERT_EQUALS( locs[i], record->id );
- ASSERT_EQUALS( datas[i], record->data.data() );
- }
+ // Get a forward iterator starting at the beginning of the record store.
+ auto cursor = rs->getCursor(opCtx.get());
+ // Iterate, checking EOF along the way.
+ for (int i = 0; i < nToInsert; i++) {
cursor->savePositioned();
- cursor->savePositioned(); // It is legal to save twice in a row.
+ cursor->savePositioned(); // It is legal to save twice in a row.
cursor->restore(opCtx.get());
- ASSERT(!cursor->next());
+ const auto record = cursor->next();
+ ASSERT(record);
+ ASSERT_EQUALS(locs[i], record->id);
+ ASSERT_EQUALS(datas[i], record->data.data());
}
+
+ cursor->savePositioned();
+ cursor->savePositioned(); // It is legal to save twice in a row.
+ cursor->restore(opCtx.get());
+
+ ASSERT(!cursor->next());
}
+}
-} // namespace mongo
+} // namespace mongo
diff --git a/src/mongo/db/storage/record_store_test_recordstore.cpp b/src/mongo/db/storage/record_store_test_recordstore.cpp
index 14b21c15aac..2e2fb9c3bfd 100644
--- a/src/mongo/db/storage/record_store_test_recordstore.cpp
+++ b/src/mongo/db/storage/record_store_test_recordstore.cpp
@@ -39,33 +39,33 @@ using std::string;
namespace mongo {
- // Verify that the name of the record store is not NULL and nonempty.
- TEST ( RecordStoreTestHarness, RecordStoreName ) {
- unique_ptr<HarnessHelper> harnessHelper( newHarnessHelper() );
- unique_ptr<RecordStore> rs( harnessHelper->newNonCappedRecordStore() );
+// Verify that the name of the record store is not NULL and nonempty.
+TEST(RecordStoreTestHarness, RecordStoreName) {
+ unique_ptr<HarnessHelper> harnessHelper(newHarnessHelper());
+ unique_ptr<RecordStore> rs(harnessHelper->newNonCappedRecordStore());
- {
- const char *name = rs->name();
- ASSERT( name != NULL && name[0] != '\0' );
- }
+ {
+ const char* name = rs->name();
+ ASSERT(name != NULL && name[0] != '\0');
}
+}
- // Verify that the namespace of the record store is nonempty.
- TEST( RecordStoreTestHarness, Namespace ) {
- unique_ptr<HarnessHelper> harnessHelper( newHarnessHelper() );
- unique_ptr<RecordStore> rs( harnessHelper->newNonCappedRecordStore() );
+// Verify that the namespace of the record store is nonempty.
+TEST(RecordStoreTestHarness, Namespace) {
+ unique_ptr<HarnessHelper> harnessHelper(newHarnessHelper());
+ unique_ptr<RecordStore> rs(harnessHelper->newNonCappedRecordStore());
- {
- string ns = rs->ns();
- ASSERT( ns[0] != '\0' );
- }
+ {
+ string ns = rs->ns();
+ ASSERT(ns[0] != '\0');
}
+}
- // Call isCapped() on a non-capped collection and verify the result is false.
- TEST( RecordStoreTestHarness, IsNotCapped ) {
- unique_ptr<HarnessHelper> harnessHelper( newHarnessHelper() );
- unique_ptr<RecordStore> rs( harnessHelper->newNonCappedRecordStore() );
- ASSERT( !rs->isCapped() );
- }
+// Call isCapped() on a non-capped collection and verify the result is false.
+TEST(RecordStoreTestHarness, IsNotCapped) {
+ unique_ptr<HarnessHelper> harnessHelper(newHarnessHelper());
+ unique_ptr<RecordStore> rs(harnessHelper->newNonCappedRecordStore());
+ ASSERT(!rs->isCapped());
+}
-} // namespace mongo
+} // namespace mongo
diff --git a/src/mongo/db/storage/record_store_test_repairiter.cpp b/src/mongo/db/storage/record_store_test_repairiter.cpp
index b3e11e137d4..56abdcb6b14 100644
--- a/src/mongo/db/storage/record_store_test_repairiter.cpp
+++ b/src/mongo/db/storage/record_store_test_repairiter.cpp
@@ -43,128 +43,126 @@ using std::stringstream;
namespace mongo {
- // Create an iterator for repairing an empty record store.
- TEST( RecordStoreTestHarness, GetIteratorForRepairEmpty ) {
- unique_ptr<HarnessHelper> harnessHelper( newHarnessHelper() );
- unique_ptr<RecordStore> rs( harnessHelper->newNonCappedRecordStore() );
+// Create an iterator for repairing an empty record store.
+TEST(RecordStoreTestHarness, GetIteratorForRepairEmpty) {
+ unique_ptr<HarnessHelper> harnessHelper(newHarnessHelper());
+ unique_ptr<RecordStore> rs(harnessHelper->newNonCappedRecordStore());
+
+ {
+ unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
+ ASSERT_EQUALS(0, rs->numRecords(opCtx.get()));
+ }
- {
- unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- ASSERT_EQUALS( 0, rs->numRecords( opCtx.get() ) );
+ {
+ unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
+ auto cursor = rs->getCursorForRepair(opCtx.get());
+ // returns NULL if getCursorForRepair is not supported
+ if (!cursor) {
+ return;
}
+ ASSERT(!cursor->next());
+ }
+}
+
+// Insert multiple records and create an iterator for repairing the record store,
+// even though the it has not been corrupted.
+TEST(RecordStoreTestHarness, GetIteratorForRepairNonEmpty) {
+ unique_ptr<HarnessHelper> harnessHelper(newHarnessHelper());
+ unique_ptr<RecordStore> rs(harnessHelper->newNonCappedRecordStore());
+ {
+ unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
+ ASSERT_EQUALS(0, rs->numRecords(opCtx.get()));
+ }
+
+ const int nToInsert = 10;
+ RecordId locs[nToInsert];
+ for (int i = 0; i < nToInsert; i++) {
+ unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
{
- unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- auto cursor = rs->getCursorForRepair( opCtx.get() );
- // returns NULL if getCursorForRepair is not supported
- if (!cursor) {
- return;
- }
- ASSERT(!cursor->next());
+ stringstream ss;
+ ss << "record " << i;
+ string data = ss.str();
+
+ WriteUnitOfWork uow(opCtx.get());
+ StatusWith<RecordId> res =
+ rs->insertRecord(opCtx.get(), data.c_str(), data.size() + 1, false);
+ ASSERT_OK(res.getStatus());
+ locs[i] = res.getValue();
+ uow.commit();
}
}
- // Insert multiple records and create an iterator for repairing the record store,
- // even though the it has not been corrupted.
- TEST( RecordStoreTestHarness, GetIteratorForRepairNonEmpty ) {
- unique_ptr<HarnessHelper> harnessHelper( newHarnessHelper() );
- unique_ptr<RecordStore> rs( harnessHelper->newNonCappedRecordStore() );
+ {
+ unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
+ ASSERT_EQUALS(nToInsert, rs->numRecords(opCtx.get()));
+ }
- {
- unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- ASSERT_EQUALS( 0, rs->numRecords( opCtx.get() ) );
+ set<RecordId> remain(locs, locs + nToInsert);
+ {
+ unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
+ auto cursor = rs->getCursorForRepair(opCtx.get());
+ // returns NULL if getCursorForRepair is not supported
+ if (!cursor) {
+ return;
}
- const int nToInsert = 10;
- RecordId locs[nToInsert];
- for ( int i = 0; i < nToInsert; i++ ) {
- unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- {
- stringstream ss;
- ss << "record " << i;
- string data = ss.str();
-
- WriteUnitOfWork uow( opCtx.get() );
- StatusWith<RecordId> res = rs->insertRecord( opCtx.get(),
- data.c_str(),
- data.size() + 1,
- false );
- ASSERT_OK( res.getStatus() );
- locs[i] = res.getValue();
- uow.commit();
- }
+ while (auto record = cursor->next()) {
+ remain.erase(record->id); // can happen more than once per doc
}
+ ASSERT(remain.empty());
- {
- unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- ASSERT_EQUALS( nToInsert, rs->numRecords( opCtx.get() ) );
- }
+ ASSERT(!cursor->next());
+ }
+}
+
+// Insert a single record. Create a repair iterator pointing to that single record.
+// Then invalidate the record and ensure that the repair iterator responds correctly.
+// See SERVER-16300.
+TEST(RecordStoreTestHarness, GetIteratorForRepairInvalidateSingleton) {
+ unique_ptr<HarnessHelper> harnessHelper(newHarnessHelper());
+ unique_ptr<RecordStore> rs(harnessHelper->newNonCappedRecordStore());
+
+ {
+ unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
+ ASSERT_EQ(0, rs->numRecords(opCtx.get()));
+ }
- set<RecordId> remain( locs, locs + nToInsert );
- {
- unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- auto cursor = rs->getCursorForRepair( opCtx.get() );
- // returns NULL if getCursorForRepair is not supported
- if (!cursor) {
- return;
- }
-
- while (auto record = cursor->next()) {
- remain.erase(record->id); // can happen more than once per doc
- }
- ASSERT( remain.empty() );
-
- ASSERT(!cursor->next());
- }
+ // Insert one record.
+ RecordId idToInvalidate;
+ {
+ unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
+ WriteUnitOfWork uow(opCtx.get());
+ StatusWith<RecordId> res = rs->insertRecord(opCtx.get(), "some data", 10, false);
+ ASSERT_OK(res.getStatus());
+ idToInvalidate = res.getValue();
+ uow.commit();
}
- // Insert a single record. Create a repair iterator pointing to that single record.
- // Then invalidate the record and ensure that the repair iterator responds correctly.
- // See SERVER-16300.
- TEST(RecordStoreTestHarness, GetIteratorForRepairInvalidateSingleton) {
- unique_ptr<HarnessHelper> harnessHelper(newHarnessHelper());
- unique_ptr<RecordStore> rs(harnessHelper->newNonCappedRecordStore());
+ // Double-check that the record store has one record in it now.
+ {
+ unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
+ ASSERT_EQ(1, rs->numRecords(opCtx.get()));
+ }
- {
- unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
- ASSERT_EQ(0, rs->numRecords(opCtx.get()));
+ {
+ unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
+ auto cursor = rs->getCursorForRepair(opCtx.get());
+ // returns NULL if getCursorForRepair is not supported
+ if (!cursor) {
+ return;
}
- // Insert one record.
- RecordId idToInvalidate;
- {
- unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
- WriteUnitOfWork uow(opCtx.get());
- StatusWith<RecordId> res = rs->insertRecord(opCtx.get(), "some data", 10, false);
- ASSERT_OK(res.getStatus());
- idToInvalidate = res.getValue();
- uow.commit();
- }
+ // We should be pointing at the only record in the store.
- // Double-check that the record store has one record in it now.
- {
- unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
- ASSERT_EQ(1, rs->numRecords(opCtx.get()));
- }
+ // Invalidate the record we're pointing at.
+ cursor->savePositioned();
+ cursor->invalidate(idToInvalidate);
+ cursor->restore(opCtx.get());
- {
- unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
- auto cursor = rs->getCursorForRepair( opCtx.get() );
- // returns NULL if getCursorForRepair is not supported
- if (!cursor) {
- return;
- }
-
- // We should be pointing at the only record in the store.
-
- // Invalidate the record we're pointing at.
- cursor->savePositioned();
- cursor->invalidate(idToInvalidate);
- cursor->restore(opCtx.get());
-
- // Iterator should be EOF now because the only thing in the collection got deleted.
- ASSERT(!cursor->next());
- }
+ // Iterator should be EOF now because the only thing in the collection got deleted.
+ ASSERT(!cursor->next());
}
+}
-} // namespace mongo
+} // namespace mongo
diff --git a/src/mongo/db/storage/record_store_test_storagesize.cpp b/src/mongo/db/storage/record_store_test_storagesize.cpp
index a0d5cfab024..61303b7967d 100644
--- a/src/mongo/db/storage/record_store_test_storagesize.cpp
+++ b/src/mongo/db/storage/record_store_test_storagesize.cpp
@@ -40,43 +40,41 @@ using std::stringstream;
namespace mongo {
- // Verify that a nonempty collection maybe takes up some space on disk.
- TEST( RecordStoreTestHarness, StorageSizeNonEmpty ) {
- unique_ptr<HarnessHelper> harnessHelper( newHarnessHelper() );
- unique_ptr<RecordStore> rs( harnessHelper->newNonCappedRecordStore() );
+// Verify that a nonempty collection maybe takes up some space on disk.
+TEST(RecordStoreTestHarness, StorageSizeNonEmpty) {
+ unique_ptr<HarnessHelper> harnessHelper(newHarnessHelper());
+ unique_ptr<RecordStore> rs(harnessHelper->newNonCappedRecordStore());
- {
- unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- ASSERT_EQUALS( 0, rs->numRecords( opCtx.get() ) );
- }
+ {
+ unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
+ ASSERT_EQUALS(0, rs->numRecords(opCtx.get()));
+ }
- int nToInsert = 10;
- for ( int i = 0; i < nToInsert; i++ ) {
- unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- {
- stringstream ss;
- ss << "record " << i;
- string data = ss.str();
+ int nToInsert = 10;
+ for (int i = 0; i < nToInsert; i++) {
+ unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
+ {
+ stringstream ss;
+ ss << "record " << i;
+ string data = ss.str();
- WriteUnitOfWork uow( opCtx.get() );
- StatusWith<RecordId> res = rs->insertRecord( opCtx.get(),
- data.c_str(),
- data.size() + 1,
- false );
- ASSERT_OK( res.getStatus() );
- uow.commit();
- }
+ WriteUnitOfWork uow(opCtx.get());
+ StatusWith<RecordId> res =
+ rs->insertRecord(opCtx.get(), data.c_str(), data.size() + 1, false);
+ ASSERT_OK(res.getStatus());
+ uow.commit();
}
+ }
- {
- unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- ASSERT_EQUALS( nToInsert, rs->numRecords( opCtx.get() ) );
- }
+ {
+ unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
+ ASSERT_EQUALS(nToInsert, rs->numRecords(opCtx.get()));
+ }
- {
- unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- ASSERT( rs->storageSize( opCtx.get(), NULL ) >= 0 );
- }
+ {
+ unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
+ ASSERT(rs->storageSize(opCtx.get(), NULL) >= 0);
}
+}
-} // namespace mongo
+} // namespace mongo
diff --git a/src/mongo/db/storage/record_store_test_touch.cpp b/src/mongo/db/storage/record_store_test_touch.cpp
index 80b8909611c..c765bc2638b 100644
--- a/src/mongo/db/storage/record_store_test_touch.cpp
+++ b/src/mongo/db/storage/record_store_test_touch.cpp
@@ -40,131 +40,127 @@ using std::stringstream;
namespace mongo {
- // Verify that calling touch() on an empty collection returns an OK status.
- TEST( RecordStoreTestHarness, TouchEmpty ) {
- unique_ptr<HarnessHelper> harnessHelper( newHarnessHelper() );
- unique_ptr<RecordStore> rs( harnessHelper->newNonCappedRecordStore() );
-
- {
- unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- ASSERT_EQUALS( 0, rs->numRecords( opCtx.get() ) );
- }
-
- {
- unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- {
- BSONObjBuilder stats;
- Status status = rs->touch( opCtx.get(), &stats );
- ASSERT( status.isOK() || status.code() == ErrorCodes::CommandNotSupported );
- }
- }
+// Verify that calling touch() on an empty collection returns an OK status.
+TEST(RecordStoreTestHarness, TouchEmpty) {
+ unique_ptr<HarnessHelper> harnessHelper(newHarnessHelper());
+ unique_ptr<RecordStore> rs(harnessHelper->newNonCappedRecordStore());
+
+ {
+ unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
+ ASSERT_EQUALS(0, rs->numRecords(opCtx.get()));
}
- // Insert multiple records, and verify that calling touch() on a nonempty collection
- // returns an OK status.
- TEST( RecordStoreTestHarness, TouchNonEmpty ) {
- unique_ptr<HarnessHelper> harnessHelper( newHarnessHelper() );
- unique_ptr<RecordStore> rs( harnessHelper->newNonCappedRecordStore() );
-
+ {
+ unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
{
- unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- ASSERT_EQUALS( 0, rs->numRecords( opCtx.get() ) );
+ BSONObjBuilder stats;
+ Status status = rs->touch(opCtx.get(), &stats);
+ ASSERT(status.isOK() || status.code() == ErrorCodes::CommandNotSupported);
}
+ }
+}
- int nToInsert = 10;
- for ( int i = 0; i < nToInsert; i++ ) {
- unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- {
- stringstream ss;
- ss << "record " << i;
- string data = ss.str();
-
- WriteUnitOfWork uow( opCtx.get() );
- StatusWith<RecordId> res = rs->insertRecord( opCtx.get(),
- data.c_str(),
- data.size() + 1,
- false );
- ASSERT_OK( res.getStatus() );
- uow.commit();
- }
- }
+// Insert multiple records, and verify that calling touch() on a nonempty collection
+// returns an OK status.
+TEST(RecordStoreTestHarness, TouchNonEmpty) {
+ unique_ptr<HarnessHelper> harnessHelper(newHarnessHelper());
+ unique_ptr<RecordStore> rs(harnessHelper->newNonCappedRecordStore());
- {
- unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- ASSERT_EQUALS( nToInsert, rs->numRecords( opCtx.get() ) );
- }
+ {
+ unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
+ ASSERT_EQUALS(0, rs->numRecords(opCtx.get()));
+ }
+ int nToInsert = 10;
+ for (int i = 0; i < nToInsert; i++) {
+ unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
{
- unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- {
- BSONObjBuilder stats;
- // XXX does not verify the collection was loaded into cache
- // (even if supported by storage engine)
- Status status = rs->touch( opCtx.get(), &stats );
- ASSERT( status.isOK() || status.code() == ErrorCodes::CommandNotSupported );
- }
+ stringstream ss;
+ ss << "record " << i;
+ string data = ss.str();
+
+ WriteUnitOfWork uow(opCtx.get());
+ StatusWith<RecordId> res =
+ rs->insertRecord(opCtx.get(), data.c_str(), data.size() + 1, false);
+ ASSERT_OK(res.getStatus());
+ uow.commit();
}
}
- // Verify that calling touch() on an empty collection returns an OK status,
- // even when NULL is passed in for the stats output.
- TEST( RecordStoreTestHarness, TouchEmptyWithNullStats ) {
- unique_ptr<HarnessHelper> harnessHelper( newHarnessHelper() );
- unique_ptr<RecordStore> rs( harnessHelper->newNonCappedRecordStore() );
+ {
+ unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
+ ASSERT_EQUALS(nToInsert, rs->numRecords(opCtx.get()));
+ }
+ {
+ unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
{
- unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- ASSERT_EQUALS( 0, rs->numRecords( opCtx.get() ) );
+ BSONObjBuilder stats;
+ // XXX does not verify the collection was loaded into cache
+ // (even if supported by storage engine)
+ Status status = rs->touch(opCtx.get(), &stats);
+ ASSERT(status.isOK() || status.code() == ErrorCodes::CommandNotSupported);
}
+ }
+}
- {
- unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- Status status = rs->touch( opCtx.get(), NULL /* stats output */ );
- ASSERT( status.isOK() || status.code() == ErrorCodes::CommandNotSupported );
- }
+// Verify that calling touch() on an empty collection returns an OK status,
+// even when NULL is passed in for the stats output.
+TEST(RecordStoreTestHarness, TouchEmptyWithNullStats) {
+ unique_ptr<HarnessHelper> harnessHelper(newHarnessHelper());
+ unique_ptr<RecordStore> rs(harnessHelper->newNonCappedRecordStore());
+
+ {
+ unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
+ ASSERT_EQUALS(0, rs->numRecords(opCtx.get()));
}
- // Insert multiple records, and verify that calling touch() on a nonempty collection
- // returns an OK status, even when NULL is passed in for the stats output.
- TEST( RecordStoreTestHarness, TouchNonEmptyWithNullStats ) {
- unique_ptr<HarnessHelper> harnessHelper( newHarnessHelper() );
- unique_ptr<RecordStore> rs( harnessHelper->newNonCappedRecordStore() );
+ {
+ unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
+ Status status = rs->touch(opCtx.get(), NULL /* stats output */);
+ ASSERT(status.isOK() || status.code() == ErrorCodes::CommandNotSupported);
+ }
+}
- {
- unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- ASSERT_EQUALS( 0, rs->numRecords( opCtx.get() ) );
- }
+// Insert multiple records, and verify that calling touch() on a nonempty collection
+// returns an OK status, even when NULL is passed in for the stats output.
+TEST(RecordStoreTestHarness, TouchNonEmptyWithNullStats) {
+ unique_ptr<HarnessHelper> harnessHelper(newHarnessHelper());
+ unique_ptr<RecordStore> rs(harnessHelper->newNonCappedRecordStore());
- int nToInsert = 10;
- for ( int i = 0; i < nToInsert; i++ ) {
- unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- {
- stringstream ss;
- ss << "record " << i;
- string data = ss.str();
-
- WriteUnitOfWork uow( opCtx.get() );
- StatusWith<RecordId> res = rs->insertRecord( opCtx.get(),
- data.c_str(),
- data.size() + 1,
- false );
- ASSERT_OK( res.getStatus() );
- uow.commit();
- }
- }
+ {
+ unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
+ ASSERT_EQUALS(0, rs->numRecords(opCtx.get()));
+ }
+ int nToInsert = 10;
+ for (int i = 0; i < nToInsert; i++) {
+ unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
{
- unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- ASSERT_EQUALS( nToInsert, rs->numRecords( opCtx.get() ) );
+ stringstream ss;
+ ss << "record " << i;
+ string data = ss.str();
+
+ WriteUnitOfWork uow(opCtx.get());
+ StatusWith<RecordId> res =
+ rs->insertRecord(opCtx.get(), data.c_str(), data.size() + 1, false);
+ ASSERT_OK(res.getStatus());
+ uow.commit();
}
+ }
- {
- unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- // XXX does not verify the collection was loaded into cache
- // (even if supported by storage engine)
- Status status = rs->touch( opCtx.get(), NULL /* stats output */ );
- ASSERT( status.isOK() || status.code() == ErrorCodes::CommandNotSupported );
- }
+ {
+ unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
+ ASSERT_EQUALS(nToInsert, rs->numRecords(opCtx.get()));
+ }
+
+ {
+ unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
+ // XXX does not verify the collection was loaded into cache
+ // (even if supported by storage engine)
+ Status status = rs->touch(opCtx.get(), NULL /* stats output */);
+ ASSERT(status.isOK() || status.code() == ErrorCodes::CommandNotSupported);
}
+}
-} // namespace mongo
+} // namespace mongo
diff --git a/src/mongo/db/storage/record_store_test_truncate.cpp b/src/mongo/db/storage/record_store_test_truncate.cpp
index 08870cc141f..b38ee3cb806 100644
--- a/src/mongo/db/storage/record_store_test_truncate.cpp
+++ b/src/mongo/db/storage/record_store_test_truncate.cpp
@@ -40,78 +40,76 @@ using std::stringstream;
namespace mongo {
- // Verify that calling truncate() on an already empty collection returns an OK status.
- TEST( RecordStoreTestHarness, TruncateEmpty ) {
- unique_ptr<HarnessHelper> harnessHelper( newHarnessHelper() );
- unique_ptr<RecordStore> rs( harnessHelper->newNonCappedRecordStore() );
-
- {
- unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- ASSERT_EQUALS( 0, rs->numRecords( opCtx.get() ) );
- }
-
- {
- unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- {
- WriteUnitOfWork uow( opCtx.get() );
- ASSERT_OK( rs->truncate( opCtx.get() ) );
- uow.commit();
- }
- }
+// Verify that calling truncate() on an already empty collection returns an OK status.
+TEST(RecordStoreTestHarness, TruncateEmpty) {
+ unique_ptr<HarnessHelper> harnessHelper(newHarnessHelper());
+ unique_ptr<RecordStore> rs(harnessHelper->newNonCappedRecordStore());
+
+ {
+ unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
+ ASSERT_EQUALS(0, rs->numRecords(opCtx.get()));
+ }
+ {
+ unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
{
- unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- ASSERT_EQUALS( 0, rs->numRecords( opCtx.get() ) );
+ WriteUnitOfWork uow(opCtx.get());
+ ASSERT_OK(rs->truncate(opCtx.get()));
+ uow.commit();
}
}
- // Insert multiple records, and verify that calling truncate() on a nonempty collection
- // removes all of them and returns an OK status.
- TEST( RecordStoreTestHarness, TruncateNonEmpty ) {
- unique_ptr<HarnessHelper> harnessHelper( newHarnessHelper() );
- unique_ptr<RecordStore> rs( harnessHelper->newNonCappedRecordStore() );
+ {
+ unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
+ ASSERT_EQUALS(0, rs->numRecords(opCtx.get()));
+ }
+}
- {
- unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- ASSERT_EQUALS( 0, rs->numRecords( opCtx.get() ) );
- }
+// Insert multiple records, and verify that calling truncate() on a nonempty collection
+// removes all of them and returns an OK status.
+TEST(RecordStoreTestHarness, TruncateNonEmpty) {
+ unique_ptr<HarnessHelper> harnessHelper(newHarnessHelper());
+ unique_ptr<RecordStore> rs(harnessHelper->newNonCappedRecordStore());
- int nToInsert = 10;
- for ( int i = 0; i < nToInsert; i++ ) {
- unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- {
- stringstream ss;
- ss << "record " << i;
- string data = ss.str();
-
- WriteUnitOfWork uow( opCtx.get() );
- StatusWith<RecordId> res = rs->insertRecord( opCtx.get(),
- data.c_str(),
- data.size() + 1,
- false );
- ASSERT_OK( res.getStatus() );
- uow.commit();
- }
- }
+ {
+ unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
+ ASSERT_EQUALS(0, rs->numRecords(opCtx.get()));
+ }
+ int nToInsert = 10;
+ for (int i = 0; i < nToInsert; i++) {
+ unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
{
- unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- ASSERT_EQUALS( nToInsert, rs->numRecords( opCtx.get() ) );
+ stringstream ss;
+ ss << "record " << i;
+ string data = ss.str();
+
+ WriteUnitOfWork uow(opCtx.get());
+ StatusWith<RecordId> res =
+ rs->insertRecord(opCtx.get(), data.c_str(), data.size() + 1, false);
+ ASSERT_OK(res.getStatus());
+ uow.commit();
}
+ }
- {
- unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- {
- WriteUnitOfWork uow( opCtx.get() );
- ASSERT_OK( rs->truncate( opCtx.get() ) );
- uow.commit();
- }
- }
+ {
+ unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
+ ASSERT_EQUALS(nToInsert, rs->numRecords(opCtx.get()));
+ }
+ {
+ unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
{
- unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- ASSERT_EQUALS( 0, rs->numRecords( opCtx.get() ) );
+ WriteUnitOfWork uow(opCtx.get());
+ ASSERT_OK(rs->truncate(opCtx.get()));
+ uow.commit();
}
}
-} // namespace mongo
+ {
+ unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
+ ASSERT_EQUALS(0, rs->numRecords(opCtx.get()));
+ }
+}
+
+} // namespace mongo
diff --git a/src/mongo/db/storage/record_store_test_updaterecord.cpp b/src/mongo/db/storage/record_store_test_updaterecord.cpp
index 260ce0e7e9e..0d7c9433503 100644
--- a/src/mongo/db/storage/record_store_test_updaterecord.cpp
+++ b/src/mongo/db/storage/record_store_test_updaterecord.cpp
@@ -43,202 +43,184 @@ using std::stringstream;
namespace mongo {
- // Insert a record and try to update it.
- TEST( RecordStoreTestHarness, UpdateRecord ) {
- unique_ptr<HarnessHelper> harnessHelper( newHarnessHelper() );
- unique_ptr<RecordStore> rs( harnessHelper->newNonCappedRecordStore() );
+// Insert a record and try to update it.
+TEST(RecordStoreTestHarness, UpdateRecord) {
+ unique_ptr<HarnessHelper> harnessHelper(newHarnessHelper());
+ unique_ptr<RecordStore> rs(harnessHelper->newNonCappedRecordStore());
+
+ {
+ unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
+ ASSERT_EQUALS(0, rs->numRecords(opCtx.get()));
+ }
+ string data = "my record";
+ RecordId loc;
+ {
+ unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
{
- unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- ASSERT_EQUALS( 0, rs->numRecords( opCtx.get() ) );
+ WriteUnitOfWork uow(opCtx.get());
+ StatusWith<RecordId> res =
+ rs->insertRecord(opCtx.get(), data.c_str(), data.size() + 1, false);
+ ASSERT_OK(res.getStatus());
+ loc = res.getValue();
+ uow.commit();
}
+ }
- string data = "my record";
- RecordId loc;
- {
- unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- {
- WriteUnitOfWork uow( opCtx.get() );
- StatusWith<RecordId> res = rs->insertRecord( opCtx.get(),
- data.c_str(),
- data.size() + 1,
- false );
- ASSERT_OK( res.getStatus() );
- loc = res.getValue();
- uow.commit();
- }
- }
+ {
+ unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
+ ASSERT_EQUALS(1, rs->numRecords(opCtx.get()));
+ }
+ data = "updated record-";
+ {
+ unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
{
- unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- ASSERT_EQUALS( 1, rs->numRecords( opCtx.get() ) );
- }
-
- data = "updated record-";
- {
- unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- {
- WriteUnitOfWork uow( opCtx.get() );
- StatusWith<RecordId> res = rs->updateRecord( opCtx.get(),
- loc,
- data.c_str(),
- data.size() + 1,
- false,
- NULL );
- ASSERT_OK( res.getStatus() );
- loc = res.getValue();
- uow.commit();
- }
+ WriteUnitOfWork uow(opCtx.get());
+ StatusWith<RecordId> res =
+ rs->updateRecord(opCtx.get(), loc, data.c_str(), data.size() + 1, false, NULL);
+ ASSERT_OK(res.getStatus());
+ loc = res.getValue();
+ uow.commit();
}
+ }
+ {
+ unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
{
- unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- {
- RecordData record = rs->dataFor( opCtx.get(), loc );
- ASSERT_EQUALS( data.size() + 1, static_cast<size_t>( record.size() ) );
- ASSERT_EQUALS( data, record.data() );
- }
+ RecordData record = rs->dataFor(opCtx.get(), loc);
+ ASSERT_EQUALS(data.size() + 1, static_cast<size_t>(record.size()));
+ ASSERT_EQUALS(data, record.data());
}
}
+}
+
+// Insert multiple records and try to update them.
+TEST(RecordStoreTestHarness, UpdateMultipleRecords) {
+ unique_ptr<HarnessHelper> harnessHelper(newHarnessHelper());
+ unique_ptr<RecordStore> rs(harnessHelper->newNonCappedRecordStore());
- // Insert multiple records and try to update them.
- TEST( RecordStoreTestHarness, UpdateMultipleRecords ) {
- unique_ptr<HarnessHelper> harnessHelper( newHarnessHelper() );
- unique_ptr<RecordStore> rs( harnessHelper->newNonCappedRecordStore() );
+ {
+ unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
+ ASSERT_EQUALS(0, rs->numRecords(opCtx.get()));
+ }
+ const int nToInsert = 10;
+ RecordId locs[nToInsert];
+ for (int i = 0; i < nToInsert; i++) {
+ unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
{
- unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- ASSERT_EQUALS( 0, rs->numRecords( opCtx.get() ) );
+ stringstream ss;
+ ss << "record " << i;
+ string data = ss.str();
+
+ WriteUnitOfWork uow(opCtx.get());
+ StatusWith<RecordId> res =
+ rs->insertRecord(opCtx.get(), data.c_str(), data.size() + 1, false);
+ ASSERT_OK(res.getStatus());
+ locs[i] = res.getValue();
+ uow.commit();
}
+ }
- const int nToInsert = 10;
- RecordId locs[nToInsert];
- for ( int i = 0; i < nToInsert; i++ ) {
- unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- {
- stringstream ss;
- ss << "record " << i;
- string data = ss.str();
-
- WriteUnitOfWork uow( opCtx.get() );
- StatusWith<RecordId> res = rs->insertRecord( opCtx.get(),
- data.c_str(),
- data.size() + 1,
- false );
- ASSERT_OK( res.getStatus() );
- locs[i] = res.getValue();
- uow.commit();
- }
- }
+ {
+ unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
+ ASSERT_EQUALS(nToInsert, rs->numRecords(opCtx.get()));
+ }
+ for (int i = 0; i < nToInsert; i++) {
+ unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
{
- unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- ASSERT_EQUALS( nToInsert, rs->numRecords( opCtx.get() ) );
- }
-
- for ( int i = 0; i < nToInsert; i++ ) {
- unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- {
- stringstream ss;
- ss << "update record-" << i;
- string data = ss.str();
-
- WriteUnitOfWork uow( opCtx.get() );
- StatusWith<RecordId> res = rs->updateRecord( opCtx.get(),
- locs[i],
- data.c_str(),
- data.size() + 1,
- false,
- NULL );
- ASSERT_OK( res.getStatus() );
- locs[i] = res.getValue();
- uow.commit();
- }
+ stringstream ss;
+ ss << "update record-" << i;
+ string data = ss.str();
+
+ WriteUnitOfWork uow(opCtx.get());
+ StatusWith<RecordId> res =
+ rs->updateRecord(opCtx.get(), locs[i], data.c_str(), data.size() + 1, false, NULL);
+ ASSERT_OK(res.getStatus());
+ locs[i] = res.getValue();
+ uow.commit();
}
+ }
- for ( int i = 0; i < nToInsert; i++ ) {
- unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- {
- stringstream ss;
- ss << "update record-" << i;
- string data = ss.str();
+ for (int i = 0; i < nToInsert; i++) {
+ unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
+ {
+ stringstream ss;
+ ss << "update record-" << i;
+ string data = ss.str();
- RecordData record = rs->dataFor( opCtx.get(), locs[i] );
- ASSERT_EQUALS( data.size() + 1, static_cast<size_t>( record.size() ) );
- ASSERT_EQUALS( data, record.data() );
- }
+ RecordData record = rs->dataFor(opCtx.get(), locs[i]);
+ ASSERT_EQUALS(data.size() + 1, static_cast<size_t>(record.size()));
+ ASSERT_EQUALS(data, record.data());
}
}
+}
- // Insert a record, try to update it, and examine how the UpdateNotifier is called.
- TEST( RecordStoreTestHarness, UpdateRecordWithMoveNotifier ) {
- unique_ptr<HarnessHelper> harnessHelper( newHarnessHelper() );
- unique_ptr<RecordStore> rs( harnessHelper->newNonCappedRecordStore() );
+// Insert a record, try to update it, and examine how the UpdateNotifier is called.
+TEST(RecordStoreTestHarness, UpdateRecordWithMoveNotifier) {
+ unique_ptr<HarnessHelper> harnessHelper(newHarnessHelper());
+ unique_ptr<RecordStore> rs(harnessHelper->newNonCappedRecordStore());
- {
- unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- ASSERT_EQUALS( 0, rs->numRecords( opCtx.get() ) );
- }
+ {
+ unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
+ ASSERT_EQUALS(0, rs->numRecords(opCtx.get()));
+ }
- string oldData = "my record";
- RecordId loc;
+ string oldData = "my record";
+ RecordId loc;
+ {
+ unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
{
- unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- {
- WriteUnitOfWork uow( opCtx.get() );
- StatusWith<RecordId> res = rs->insertRecord( opCtx.get(),
- oldData.c_str(),
- oldData.size() + 1,
- false );
- ASSERT_OK( res.getStatus() );
- loc = res.getValue();
- uow.commit();
- }
+ WriteUnitOfWork uow(opCtx.get());
+ StatusWith<RecordId> res =
+ rs->insertRecord(opCtx.get(), oldData.c_str(), oldData.size() + 1, false);
+ ASSERT_OK(res.getStatus());
+ loc = res.getValue();
+ uow.commit();
}
+ }
- {
- unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- ASSERT_EQUALS( 1, rs->numRecords( opCtx.get() ) );
- }
+ {
+ unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
+ ASSERT_EQUALS(1, rs->numRecords(opCtx.get()));
+ }
- string newData = "my updated record--";
+ string newData = "my updated record--";
+ {
+ unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
{
- unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- {
- UpdateNotifierSpy umn( opCtx.get(), loc, oldData.c_str(), oldData.size() );
-
- WriteUnitOfWork uow( opCtx.get() );
- StatusWith<RecordId> res = rs->updateRecord( opCtx.get(),
- loc,
- newData.c_str(),
- newData.size() + 1,
- false,
- &umn );
- ASSERT_OK( res.getStatus() );
- // UpdateNotifier::recordStoreGoingToMove() called only if
- // the RecordId for the record changes
- if ( loc == res.getValue() ) {
- ASSERT_EQUALS( 0, umn.numMoveCallbacks() );
- // Only MMAP v1 is required to use the UpdateNotifier for in-place updates,
- // so the number of callbacks is expected to be 0 for non-MMAP storage engines.
- ASSERT_GTE( 1, umn.numInPlaceCallbacks() );
- } else {
- ASSERT_EQUALS( 1, umn.numMoveCallbacks() );
- ASSERT_EQUALS( 0, umn.numInPlaceCallbacks() );
- }
- loc = res.getValue();
- uow.commit();
+ UpdateNotifierSpy umn(opCtx.get(), loc, oldData.c_str(), oldData.size());
+
+ WriteUnitOfWork uow(opCtx.get());
+ StatusWith<RecordId> res = rs->updateRecord(
+ opCtx.get(), loc, newData.c_str(), newData.size() + 1, false, &umn);
+ ASSERT_OK(res.getStatus());
+ // UpdateNotifier::recordStoreGoingToMove() called only if
+ // the RecordId for the record changes
+ if (loc == res.getValue()) {
+ ASSERT_EQUALS(0, umn.numMoveCallbacks());
+ // Only MMAP v1 is required to use the UpdateNotifier for in-place updates,
+ // so the number of callbacks is expected to be 0 for non-MMAP storage engines.
+ ASSERT_GTE(1, umn.numInPlaceCallbacks());
+ } else {
+ ASSERT_EQUALS(1, umn.numMoveCallbacks());
+ ASSERT_EQUALS(0, umn.numInPlaceCallbacks());
}
+ loc = res.getValue();
+ uow.commit();
}
+ }
+ {
+ unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
{
- unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- {
- RecordData record = rs->dataFor( opCtx.get(), loc );
- ASSERT_EQUALS( newData.size() + 1, static_cast<size_t>( record.size() ) );
- ASSERT_EQUALS( newData, record.data() );
- }
+ RecordData record = rs->dataFor(opCtx.get(), loc);
+ ASSERT_EQUALS(newData.size() + 1, static_cast<size_t>(record.size()));
+ ASSERT_EQUALS(newData, record.data());
}
}
+}
-} // namespace mongo
+} // namespace mongo
diff --git a/src/mongo/db/storage/record_store_test_updaterecord.h b/src/mongo/db/storage/record_store_test_updaterecord.h
index 36000c6fc21..f82feb6b592 100644
--- a/src/mongo/db/storage/record_store_test_updaterecord.h
+++ b/src/mongo/db/storage/record_store_test_updaterecord.h
@@ -38,51 +38,48 @@
namespace mongo {
namespace {
- class UpdateNotifierSpy : public UpdateNotifier {
- public:
- UpdateNotifierSpy( OperationContext* txn, const RecordId &loc,
- const char *buf, size_t size )
- : _txn( txn ),
- _loc( loc ),
- _data( buf, size ),
- nMoveCalls( 0 ),
- nInPlaceCalls( 0 ) {
- }
+class UpdateNotifierSpy : public UpdateNotifier {
+public:
+ UpdateNotifierSpy(OperationContext* txn, const RecordId& loc, const char* buf, size_t size)
+ : _txn(txn), _loc(loc), _data(buf, size), nMoveCalls(0), nInPlaceCalls(0) {}
- ~UpdateNotifierSpy() { }
+ ~UpdateNotifierSpy() {}
- Status recordStoreGoingToMove( OperationContext *txn,
- const RecordId &oldLocation,
- const char *oldBuffer,
- size_t oldSize ) {
- nMoveCalls++;
- ASSERT_EQUALS( _txn, txn );
- ASSERT_EQUALS( _loc, oldLocation );
- ASSERT_EQUALS( _data, oldBuffer );
- return Status::OK();
- }
+ Status recordStoreGoingToMove(OperationContext* txn,
+ const RecordId& oldLocation,
+ const char* oldBuffer,
+ size_t oldSize) {
+ nMoveCalls++;
+ ASSERT_EQUALS(_txn, txn);
+ ASSERT_EQUALS(_loc, oldLocation);
+ ASSERT_EQUALS(_data, oldBuffer);
+ return Status::OK();
+ }
- Status recordStoreGoingToUpdateInPlace( OperationContext* txn,
- const RecordId& loc ) {
- nInPlaceCalls++;
- ASSERT_EQUALS( _txn, txn );
- ASSERT_EQUALS( _loc, loc );
- return Status::OK();
- }
+ Status recordStoreGoingToUpdateInPlace(OperationContext* txn, const RecordId& loc) {
+ nInPlaceCalls++;
+ ASSERT_EQUALS(_txn, txn);
+ ASSERT_EQUALS(_loc, loc);
+ return Status::OK();
+ }
- int numMoveCallbacks() const { return nMoveCalls; }
+ int numMoveCallbacks() const {
+ return nMoveCalls;
+ }
- int numInPlaceCallbacks() const { return nInPlaceCalls; }
+ int numInPlaceCallbacks() const {
+ return nInPlaceCalls;
+ }
- private:
- OperationContext *_txn;
- RecordId _loc;
- std::string _data;
+private:
+ OperationContext* _txn;
+ RecordId _loc;
+ std::string _data;
- // To verify the number of callbacks to the notifier.
- int nMoveCalls;
- int nInPlaceCalls;
- };
+ // To verify the number of callbacks to the notifier.
+ int nMoveCalls;
+ int nInPlaceCalls;
+};
-} // namespace
-} // namespace mongo
+} // namespace
+} // namespace mongo
diff --git a/src/mongo/db/storage/record_store_test_updatewithdamages.cpp b/src/mongo/db/storage/record_store_test_updatewithdamages.cpp
index f5dc1353f15..701d5fd58f0 100644
--- a/src/mongo/db/storage/record_store_test_updatewithdamages.cpp
+++ b/src/mongo/db/storage/record_store_test_updatewithdamages.cpp
@@ -41,251 +41,239 @@ using std::string;
namespace mongo {
- // Insert a record and try to perform an in-place update on it.
- TEST( RecordStoreTestHarness, UpdateWithDamages ) {
- unique_ptr<HarnessHelper> harnessHelper( newHarnessHelper() );
- unique_ptr<RecordStore> rs( harnessHelper->newNonCappedRecordStore() );
+// Insert a record and try to perform an in-place update on it.
+TEST(RecordStoreTestHarness, UpdateWithDamages) {
+ unique_ptr<HarnessHelper> harnessHelper(newHarnessHelper());
+ unique_ptr<RecordStore> rs(harnessHelper->newNonCappedRecordStore());
- if (!rs->updateWithDamagesSupported())
- return;
+ if (!rs->updateWithDamagesSupported())
+ return;
- {
- unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- ASSERT_EQUALS( 0, rs->numRecords( opCtx.get() ) );
- }
+ {
+ unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
+ ASSERT_EQUALS(0, rs->numRecords(opCtx.get()));
+ }
- string data = "00010111";
- RecordId loc;
- const RecordData rec(data.c_str(), data.size() + 1);
+ string data = "00010111";
+ RecordId loc;
+ const RecordData rec(data.c_str(), data.size() + 1);
+ {
+ unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
{
- unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- {
- WriteUnitOfWork uow( opCtx.get() );
- StatusWith<RecordId> res = rs->insertRecord( opCtx.get(),
- rec.data(),
- rec.size(),
- false );
- ASSERT_OK( res.getStatus() );
- loc = res.getValue();
- uow.commit();
- }
+ WriteUnitOfWork uow(opCtx.get());
+ StatusWith<RecordId> res = rs->insertRecord(opCtx.get(), rec.data(), rec.size(), false);
+ ASSERT_OK(res.getStatus());
+ loc = res.getValue();
+ uow.commit();
}
+ }
- {
- unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- ASSERT_EQUALS( 1, rs->numRecords( opCtx.get() ) );
- }
+ {
+ unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
+ ASSERT_EQUALS(1, rs->numRecords(opCtx.get()));
+ }
+ {
+ unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
{
- unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- {
- mutablebson::DamageVector dv( 3 );
- dv[0].sourceOffset = 5;
- dv[0].targetOffset = 0;
- dv[0].size = 2;
- dv[1].sourceOffset = 3;
- dv[1].targetOffset = 2;
- dv[1].size = 3;
- dv[2].sourceOffset = 0;
- dv[2].targetOffset = 5;
- dv[2].size = 3;
-
- WriteUnitOfWork uow( opCtx.get() );
- ASSERT_OK( rs->updateWithDamages( opCtx.get(), loc, rec, data.c_str(), dv ) );
- uow.commit();
- }
+ mutablebson::DamageVector dv(3);
+ dv[0].sourceOffset = 5;
+ dv[0].targetOffset = 0;
+ dv[0].size = 2;
+ dv[1].sourceOffset = 3;
+ dv[1].targetOffset = 2;
+ dv[1].size = 3;
+ dv[2].sourceOffset = 0;
+ dv[2].targetOffset = 5;
+ dv[2].size = 3;
+
+ WriteUnitOfWork uow(opCtx.get());
+ ASSERT_OK(rs->updateWithDamages(opCtx.get(), loc, rec, data.c_str(), dv));
+ uow.commit();
}
+ }
- data = "11101000";
+ data = "11101000";
+ {
+ unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
{
- unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- {
- RecordData record = rs->dataFor( opCtx.get(), loc );
- ASSERT_EQUALS( data, record.data() );
- }
+ RecordData record = rs->dataFor(opCtx.get(), loc);
+ ASSERT_EQUALS(data, record.data());
}
}
+}
- // Insert a record and try to perform an in-place update on it with a DamageVector
- // containing overlapping DamageEvents.
- TEST( RecordStoreTestHarness, UpdateWithOverlappingDamageEvents ) {
- unique_ptr<HarnessHelper> harnessHelper( newHarnessHelper() );
- unique_ptr<RecordStore> rs( harnessHelper->newNonCappedRecordStore() );
+// Insert a record and try to perform an in-place update on it with a DamageVector
+// containing overlapping DamageEvents.
+TEST(RecordStoreTestHarness, UpdateWithOverlappingDamageEvents) {
+ unique_ptr<HarnessHelper> harnessHelper(newHarnessHelper());
+ unique_ptr<RecordStore> rs(harnessHelper->newNonCappedRecordStore());
- if (!rs->updateWithDamagesSupported())
- return;
+ if (!rs->updateWithDamagesSupported())
+ return;
- {
- unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- ASSERT_EQUALS( 0, rs->numRecords( opCtx.get() ) );
- }
+ {
+ unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
+ ASSERT_EQUALS(0, rs->numRecords(opCtx.get()));
+ }
- string data = "00010111";
- RecordId loc;
- const RecordData rec(data.c_str(), data.size() + 1);
+ string data = "00010111";
+ RecordId loc;
+ const RecordData rec(data.c_str(), data.size() + 1);
+ {
+ unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
{
- unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- {
- WriteUnitOfWork uow( opCtx.get() );
- StatusWith<RecordId> res = rs->insertRecord( opCtx.get(),
- rec.data(),
- rec.size(),
- false );
- ASSERT_OK( res.getStatus() );
- loc = res.getValue();
- uow.commit();
- }
+ WriteUnitOfWork uow(opCtx.get());
+ StatusWith<RecordId> res = rs->insertRecord(opCtx.get(), rec.data(), rec.size(), false);
+ ASSERT_OK(res.getStatus());
+ loc = res.getValue();
+ uow.commit();
}
+ }
- {
- unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- ASSERT_EQUALS( 1, rs->numRecords( opCtx.get() ) );
- }
+ {
+ unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
+ ASSERT_EQUALS(1, rs->numRecords(opCtx.get()));
+ }
+ {
+ unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
{
- unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- {
- mutablebson::DamageVector dv( 2 );
- dv[0].sourceOffset = 3;
- dv[0].targetOffset = 0;
- dv[0].size = 5;
- dv[1].sourceOffset = 0;
- dv[1].targetOffset = 3;
- dv[1].size = 5;
-
- WriteUnitOfWork uow( opCtx.get() );
- ASSERT_OK( rs->updateWithDamages( opCtx.get(), loc, rec, data.c_str(), dv ) );
- uow.commit();
- }
+ mutablebson::DamageVector dv(2);
+ dv[0].sourceOffset = 3;
+ dv[0].targetOffset = 0;
+ dv[0].size = 5;
+ dv[1].sourceOffset = 0;
+ dv[1].targetOffset = 3;
+ dv[1].size = 5;
+
+ WriteUnitOfWork uow(opCtx.get());
+ ASSERT_OK(rs->updateWithDamages(opCtx.get(), loc, rec, data.c_str(), dv));
+ uow.commit();
}
+ }
- data = "10100010";
+ data = "10100010";
+ {
+ unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
{
- unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- {
- RecordData record = rs->dataFor( opCtx.get(), loc );
- ASSERT_EQUALS( data, record.data() );
- }
+ RecordData record = rs->dataFor(opCtx.get(), loc);
+ ASSERT_EQUALS(data, record.data());
}
}
+}
- // Insert a record and try to perform an in-place update on it with a DamageVector
- // containing overlapping DamageEvents. The changes should be applied in the order
- // specified by the DamageVector, and not -- for instance -- by the targetOffset.
- TEST( RecordStoreTestHarness, UpdateWithOverlappingDamageEventsReversed ) {
- unique_ptr<HarnessHelper> harnessHelper( newHarnessHelper() );
- unique_ptr<RecordStore> rs( harnessHelper->newNonCappedRecordStore() );
+// Insert a record and try to perform an in-place update on it with a DamageVector
+// containing overlapping DamageEvents. The changes should be applied in the order
+// specified by the DamageVector, and not -- for instance -- by the targetOffset.
+TEST(RecordStoreTestHarness, UpdateWithOverlappingDamageEventsReversed) {
+ unique_ptr<HarnessHelper> harnessHelper(newHarnessHelper());
+ unique_ptr<RecordStore> rs(harnessHelper->newNonCappedRecordStore());
- if (!rs->updateWithDamagesSupported())
- return;
+ if (!rs->updateWithDamagesSupported())
+ return;
- {
- unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- ASSERT_EQUALS( 0, rs->numRecords( opCtx.get() ) );
- }
+ {
+ unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
+ ASSERT_EQUALS(0, rs->numRecords(opCtx.get()));
+ }
- string data = "00010111";
- RecordId loc;
- const RecordData rec(data.c_str(), data.size() + 1);
+ string data = "00010111";
+ RecordId loc;
+ const RecordData rec(data.c_str(), data.size() + 1);
+ {
+ unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
{
- unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- {
- WriteUnitOfWork uow( opCtx.get() );
- StatusWith<RecordId> res = rs->insertRecord( opCtx.get(),
- rec.data(),
- rec.size(),
- false );
- ASSERT_OK( res.getStatus() );
- loc = res.getValue();
- uow.commit();
- }
+ WriteUnitOfWork uow(opCtx.get());
+ StatusWith<RecordId> res = rs->insertRecord(opCtx.get(), rec.data(), rec.size(), false);
+ ASSERT_OK(res.getStatus());
+ loc = res.getValue();
+ uow.commit();
}
+ }
- {
- unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- ASSERT_EQUALS( 1, rs->numRecords( opCtx.get() ) );
- }
+ {
+ unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
+ ASSERT_EQUALS(1, rs->numRecords(opCtx.get()));
+ }
+ {
+ unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
{
- unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- {
- mutablebson::DamageVector dv( 2 );
- dv[0].sourceOffset = 0;
- dv[0].targetOffset = 3;
- dv[0].size = 5;
- dv[1].sourceOffset = 3;
- dv[1].targetOffset = 0;
- dv[1].size = 5;
-
- WriteUnitOfWork uow( opCtx.get() );
- ASSERT_OK( rs->updateWithDamages( opCtx.get(), loc, rec, data.c_str(), dv ) );
- uow.commit();
- }
+ mutablebson::DamageVector dv(2);
+ dv[0].sourceOffset = 0;
+ dv[0].targetOffset = 3;
+ dv[0].size = 5;
+ dv[1].sourceOffset = 3;
+ dv[1].targetOffset = 0;
+ dv[1].size = 5;
+
+ WriteUnitOfWork uow(opCtx.get());
+ ASSERT_OK(rs->updateWithDamages(opCtx.get(), loc, rec, data.c_str(), dv));
+ uow.commit();
}
+ }
- data = "10111010";
+ data = "10111010";
+ {
+ unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
{
- unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- {
- RecordData record = rs->dataFor( opCtx.get(), loc );
- ASSERT_EQUALS( data, record.data() );
- }
+ RecordData record = rs->dataFor(opCtx.get(), loc);
+ ASSERT_EQUALS(data, record.data());
}
}
+}
- // Insert a record and try to call updateWithDamages() with an empty DamageVector.
- TEST( RecordStoreTestHarness, UpdateWithNoDamages ) {
- unique_ptr<HarnessHelper> harnessHelper( newHarnessHelper() );
- unique_ptr<RecordStore> rs( harnessHelper->newNonCappedRecordStore() );
+// Insert a record and try to call updateWithDamages() with an empty DamageVector.
+TEST(RecordStoreTestHarness, UpdateWithNoDamages) {
+ unique_ptr<HarnessHelper> harnessHelper(newHarnessHelper());
+ unique_ptr<RecordStore> rs(harnessHelper->newNonCappedRecordStore());
- if (!rs->updateWithDamagesSupported())
- return;
+ if (!rs->updateWithDamagesSupported())
+ return;
- {
- unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- ASSERT_EQUALS( 0, rs->numRecords( opCtx.get() ) );
- }
+ {
+ unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
+ ASSERT_EQUALS(0, rs->numRecords(opCtx.get()));
+ }
- string data = "my record";
- RecordId loc;
- const RecordData rec(data.c_str(), data.size() + 1);
+ string data = "my record";
+ RecordId loc;
+ const RecordData rec(data.c_str(), data.size() + 1);
+ {
+ unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
{
- unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- {
- WriteUnitOfWork uow( opCtx.get() );
- StatusWith<RecordId> res = rs->insertRecord( opCtx.get(),
- rec.data(),
- rec.size(),
- false );
- ASSERT_OK( res.getStatus() );
- loc = res.getValue();
- uow.commit();
- }
+ WriteUnitOfWork uow(opCtx.get());
+ StatusWith<RecordId> res = rs->insertRecord(opCtx.get(), rec.data(), rec.size(), false);
+ ASSERT_OK(res.getStatus());
+ loc = res.getValue();
+ uow.commit();
}
+ }
- {
- unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- ASSERT_EQUALS( 1, rs->numRecords( opCtx.get() ) );
- }
+ {
+ unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
+ ASSERT_EQUALS(1, rs->numRecords(opCtx.get()));
+ }
+ {
+ unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
{
- unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- {
- mutablebson::DamageVector dv;
-
- WriteUnitOfWork uow( opCtx.get() );
- ASSERT_OK( rs->updateWithDamages( opCtx.get(), loc, rec, "", dv ) );
- uow.commit();
- }
+ mutablebson::DamageVector dv;
+
+ WriteUnitOfWork uow(opCtx.get());
+ ASSERT_OK(rs->updateWithDamages(opCtx.get(), loc, rec, "", dv));
+ uow.commit();
}
+ }
+ {
+ unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
{
- unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- {
- RecordData record = rs->dataFor( opCtx.get(), loc );
- ASSERT_EQUALS( data, record.data() );
- }
+ RecordData record = rs->dataFor(opCtx.get(), loc);
+ ASSERT_EQUALS(data, record.data());
}
}
+}
-} // namespace mongo
+} // namespace mongo
diff --git a/src/mongo/db/storage/record_store_test_validate.cpp b/src/mongo/db/storage/record_store_test_validate.cpp
index 89453520073..71790a376a3 100644
--- a/src/mongo/db/storage/record_store_test_validate.cpp
+++ b/src/mongo/db/storage/record_store_test_validate.cpp
@@ -41,211 +41,211 @@ using std::string;
namespace mongo {
namespace {
- // Verify that calling validate() on an empty collection returns an OK status.
- // When either of `full` or `scanData` are false, the ValidateAdaptor
- // should not be used.
- TEST( RecordStoreTestHarness, ValidateEmpty ) {
- unique_ptr<HarnessHelper> harnessHelper( newHarnessHelper() );
- unique_ptr<RecordStore> rs( harnessHelper->newNonCappedRecordStore() );
-
- {
- unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- ASSERT_EQUALS( 0, rs->numRecords( opCtx.get() ) );
- }
-
- {
- unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- {
- ValidateAdaptorSpy adaptor;
- ValidateResults results;
- BSONObjBuilder stats;
- ASSERT_OK( rs->validate( opCtx.get(),
- false, // full validate
- false, // scan data
- &adaptor,
- &results,
- &stats ) );
- ASSERT( results.valid );
- ASSERT( results.errors.empty() );
- }
- }
+// Verify that calling validate() on an empty collection returns an OK status.
+// When either of `full` or `scanData` are false, the ValidateAdaptor
+// should not be used.
+TEST(RecordStoreTestHarness, ValidateEmpty) {
+ unique_ptr<HarnessHelper> harnessHelper(newHarnessHelper());
+ unique_ptr<RecordStore> rs(harnessHelper->newNonCappedRecordStore());
+
+ {
+ unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
+ ASSERT_EQUALS(0, rs->numRecords(opCtx.get()));
}
- // Verify that calling validate() on an empty collection returns an OK status.
- // When either of `full` or `scanData` are false, the ValidateAdaptor
- // should not be used.
- TEST( RecordStoreTestHarness, ValidateEmptyAndScanData ) {
- unique_ptr<HarnessHelper> harnessHelper( newHarnessHelper() );
- unique_ptr<RecordStore> rs( harnessHelper->newNonCappedRecordStore() );
-
- {
- unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- ASSERT_EQUALS( 0, rs->numRecords( opCtx.get() ) );
- }
-
+ {
+ unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
{
- unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- {
- ValidateAdaptorSpy adaptor;
- ValidateResults results;
- BSONObjBuilder stats;
- ASSERT_OK( rs->validate( opCtx.get(),
- false, // full validate
- true, // scan data
- &adaptor,
- &results,
- &stats ) );
- ASSERT( results.valid );
- ASSERT( results.errors.empty() );
- }
+ ValidateAdaptorSpy adaptor;
+ ValidateResults results;
+ BSONObjBuilder stats;
+ ASSERT_OK(rs->validate(opCtx.get(),
+ false, // full validate
+ false, // scan data
+ &adaptor,
+ &results,
+ &stats));
+ ASSERT(results.valid);
+ ASSERT(results.errors.empty());
}
}
+}
+
+// Verify that calling validate() on an empty collection returns an OK status.
+// When either of `full` or `scanData` are false, the ValidateAdaptor
+// should not be used.
+TEST(RecordStoreTestHarness, ValidateEmptyAndScanData) {
+ unique_ptr<HarnessHelper> harnessHelper(newHarnessHelper());
+ unique_ptr<RecordStore> rs(harnessHelper->newNonCappedRecordStore());
+
+ {
+ unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
+ ASSERT_EQUALS(0, rs->numRecords(opCtx.get()));
+ }
- // Verify that calling validate() on an empty collection returns an OK status.
- // When either of `full` or `scanData` are false, the ValidateAdaptor
- // should not be used.
- TEST( RecordStoreTestHarness, FullValidateEmpty ) {
- unique_ptr<HarnessHelper> harnessHelper( newHarnessHelper() );
- unique_ptr<RecordStore> rs( harnessHelper->newNonCappedRecordStore() );
-
+ {
+ unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
{
- unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- ASSERT_EQUALS( 0, rs->numRecords( opCtx.get() ) );
+ ValidateAdaptorSpy adaptor;
+ ValidateResults results;
+ BSONObjBuilder stats;
+ ASSERT_OK(rs->validate(opCtx.get(),
+ false, // full validate
+ true, // scan data
+ &adaptor,
+ &results,
+ &stats));
+ ASSERT(results.valid);
+ ASSERT(results.errors.empty());
}
+ }
+}
+
+// Verify that calling validate() on an empty collection returns an OK status.
+// When either of `full` or `scanData` are false, the ValidateAdaptor
+// should not be used.
+TEST(RecordStoreTestHarness, FullValidateEmpty) {
+ unique_ptr<HarnessHelper> harnessHelper(newHarnessHelper());
+ unique_ptr<RecordStore> rs(harnessHelper->newNonCappedRecordStore());
+
+ {
+ unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
+ ASSERT_EQUALS(0, rs->numRecords(opCtx.get()));
+ }
+ {
+ unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
{
- unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- {
- ValidateAdaptorSpy adaptor;
- ValidateResults results;
- BSONObjBuilder stats;
- ASSERT_OK( rs->validate( opCtx.get(),
- true, // full validate
- false, // scan data
- &adaptor,
- &results,
- &stats ) );
- ASSERT( results.valid );
- ASSERT( results.errors.empty() );
- }
+ ValidateAdaptorSpy adaptor;
+ ValidateResults results;
+ BSONObjBuilder stats;
+ ASSERT_OK(rs->validate(opCtx.get(),
+ true, // full validate
+ false, // scan data
+ &adaptor,
+ &results,
+ &stats));
+ ASSERT(results.valid);
+ ASSERT(results.errors.empty());
}
}
+}
- // Verify that calling validate() on an empty collection returns an OK status.
- TEST( RecordStoreTestHarness, FullValidateEmptyAndScanData ) {
- unique_ptr<HarnessHelper> harnessHelper( newHarnessHelper() );
- unique_ptr<RecordStore> rs( harnessHelper->newNonCappedRecordStore() );
+// Verify that calling validate() on an empty collection returns an OK status.
+TEST(RecordStoreTestHarness, FullValidateEmptyAndScanData) {
+ unique_ptr<HarnessHelper> harnessHelper(newHarnessHelper());
+ unique_ptr<RecordStore> rs(harnessHelper->newNonCappedRecordStore());
- {
- unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- ASSERT_EQUALS( 0, rs->numRecords( opCtx.get() ) );
- }
+ {
+ unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
+ ASSERT_EQUALS(0, rs->numRecords(opCtx.get()));
+ }
+ {
+ unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
{
- unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- {
- ValidateAdaptorSpy adaptor;
- ValidateResults results;
- BSONObjBuilder stats;
- ASSERT_OK( rs->validate( opCtx.get(),
- true, // full validate
- true, // scan data
- &adaptor,
- &results,
- &stats ) );
- ASSERT( results.valid );
- ASSERT( results.errors.empty() );
- }
+ ValidateAdaptorSpy adaptor;
+ ValidateResults results;
+ BSONObjBuilder stats;
+ ASSERT_OK(rs->validate(opCtx.get(),
+ true, // full validate
+ true, // scan data
+ &adaptor,
+ &results,
+ &stats));
+ ASSERT(results.valid);
+ ASSERT(results.errors.empty());
}
}
-
- // Insert multiple records, and verify that calling validate() on a nonempty collection
- // returns an OK status. When either of `full` or `scanData` are false, the ValidateAdaptor
- // should not be used.
- TEST_F( ValidateTest, ValidateNonEmpty ) {
+}
+
+// Insert multiple records, and verify that calling validate() on a nonempty collection
+// returns an OK status. When either of `full` or `scanData` are false, the ValidateAdaptor
+// should not be used.
+TEST_F(ValidateTest, ValidateNonEmpty) {
+ {
+ unique_ptr<OperationContext> opCtx(newOperationContext());
{
- unique_ptr<OperationContext> opCtx( newOperationContext() );
- {
- ValidateAdaptorSpy adaptor;
- ValidateResults results;
- BSONObjBuilder stats;
- ASSERT_OK( getRecordStore().validate( opCtx.get(),
- false, // full validate
- false, // scan data
- &adaptor,
- &results,
- &stats ) );
- ASSERT( results.valid );
- ASSERT( results.errors.empty() );
- }
+ ValidateAdaptorSpy adaptor;
+ ValidateResults results;
+ BSONObjBuilder stats;
+ ASSERT_OK(getRecordStore().validate(opCtx.get(),
+ false, // full validate
+ false, // scan data
+ &adaptor,
+ &results,
+ &stats));
+ ASSERT(results.valid);
+ ASSERT(results.errors.empty());
}
}
-
- // Insert multiple records, and verify that calling validate() on a nonempty collection
- // returns an OK status. When either of `full` or `scanData` are false, the ValidateAdaptor
- // should not be used.
- TEST_F( ValidateTest, ValidateAndScanDataNonEmpty ) {
+}
+
+// Insert multiple records, and verify that calling validate() on a nonempty collection
+// returns an OK status. When either of `full` or `scanData` are false, the ValidateAdaptor
+// should not be used.
+TEST_F(ValidateTest, ValidateAndScanDataNonEmpty) {
+ {
+ unique_ptr<OperationContext> opCtx(newOperationContext());
{
- unique_ptr<OperationContext> opCtx( newOperationContext() );
- {
- ValidateAdaptorSpy adaptor;
- ValidateResults results;
- BSONObjBuilder stats;
- ASSERT_OK( getRecordStore().validate( opCtx.get(),
- false, // full validate
- true, // scan data
- &adaptor,
- &results,
- &stats ) );
- ASSERT( results.valid );
- ASSERT( results.errors.empty() );
- }
+ ValidateAdaptorSpy adaptor;
+ ValidateResults results;
+ BSONObjBuilder stats;
+ ASSERT_OK(getRecordStore().validate(opCtx.get(),
+ false, // full validate
+ true, // scan data
+ &adaptor,
+ &results,
+ &stats));
+ ASSERT(results.valid);
+ ASSERT(results.errors.empty());
}
}
-
- // Insert multiple records, and verify that calling validate() on a nonempty collection
- // returns an OK status. When either of `full` or `scanData` are false, the ValidateAdaptor
- // should not be used.
- TEST_F( ValidateTest, FullValidateNonEmpty ) {
+}
+
+// Insert multiple records, and verify that calling validate() on a nonempty collection
+// returns an OK status. When either of `full` or `scanData` are false, the ValidateAdaptor
+// should not be used.
+TEST_F(ValidateTest, FullValidateNonEmpty) {
+ {
+ unique_ptr<OperationContext> opCtx(newOperationContext());
{
- unique_ptr<OperationContext> opCtx( newOperationContext() );
- {
- ValidateAdaptorSpy adaptor;
- ValidateResults results;
- BSONObjBuilder stats;
- ASSERT_OK( getRecordStore().validate( opCtx.get(),
- true, // full validate
- false, // scan data
- &adaptor,
- &results,
- &stats ) );
- ASSERT( results.valid );
- ASSERT( results.errors.empty() );
- }
+ ValidateAdaptorSpy adaptor;
+ ValidateResults results;
+ BSONObjBuilder stats;
+ ASSERT_OK(getRecordStore().validate(opCtx.get(),
+ true, // full validate
+ false, // scan data
+ &adaptor,
+ &results,
+ &stats));
+ ASSERT(results.valid);
+ ASSERT(results.errors.empty());
}
}
+}
- // Insert multiple records, and verify that calling validate() on a nonempty collection
- // returns an OK status.
- TEST_F( ValidateTest, FullValidateNonEmptyAndScanData ) {
+// Insert multiple records, and verify that calling validate() on a nonempty collection
+// returns an OK status.
+TEST_F(ValidateTest, FullValidateNonEmptyAndScanData) {
+ {
+ unique_ptr<OperationContext> opCtx(newOperationContext());
{
- unique_ptr<OperationContext> opCtx( newOperationContext() );
- {
- ValidateAdaptorSpy adaptor( getInsertedRecords() );
- ValidateResults results;
- BSONObjBuilder stats;
- ASSERT_OK( getRecordStore().validate( opCtx.get(),
- true, // full validate
- true, // scan data
- &adaptor,
- &results,
- &stats ) );
- ASSERT( adaptor.allValidated() );
- ASSERT( results.valid );
- ASSERT( results.errors.empty() );
- }
+ ValidateAdaptorSpy adaptor(getInsertedRecords());
+ ValidateResults results;
+ BSONObjBuilder stats;
+ ASSERT_OK(getRecordStore().validate(opCtx.get(),
+ true, // full validate
+ true, // scan data
+ &adaptor,
+ &results,
+ &stats));
+ ASSERT(adaptor.allValidated());
+ ASSERT(results.valid);
+ ASSERT(results.errors.empty());
}
}
+}
-} // namespace
-} // namespace mongo
+} // namespace
+} // namespace mongo
diff --git a/src/mongo/db/storage/record_store_test_validate.h b/src/mongo/db/storage/record_store_test_validate.h
index e8ff30acfa3..e7b435e4738 100644
--- a/src/mongo/db/storage/record_store_test_validate.h
+++ b/src/mongo/db/storage/record_store_test_validate.h
@@ -39,81 +39,81 @@
namespace mongo {
namespace {
- class ValidateAdaptorSpy : public ValidateAdaptor {
- public:
- ValidateAdaptorSpy() { }
+class ValidateAdaptorSpy : public ValidateAdaptor {
+public:
+ ValidateAdaptorSpy() {}
- ValidateAdaptorSpy( const std::set<std::string> &remain )
- : _remain( remain ) {
- }
+ ValidateAdaptorSpy(const std::set<std::string>& remain) : _remain(remain) {}
- ~ValidateAdaptorSpy() { }
+ ~ValidateAdaptorSpy() {}
- Status validate( const RecordData &recordData, size_t *dataSize ) {
- std::string s( recordData.data() );
- ASSERT( 1 == _remain.erase( s ) );
+ Status validate(const RecordData& recordData, size_t* dataSize) {
+ std::string s(recordData.data());
+ ASSERT(1 == _remain.erase(s));
- *dataSize = recordData.size();
- return Status::OK();
- }
+ *dataSize = recordData.size();
+ return Status::OK();
+ }
- bool allValidated() { return _remain.empty(); }
+ bool allValidated() {
+ return _remain.empty();
+ }
- private:
- std::set<std::string> _remain; // initially contains all inserted records
- };
+private:
+ std::set<std::string> _remain; // initially contains all inserted records
+};
- class ValidateTest : public mongo::unittest::Test {
- public:
- ValidateTest()
- : _harnessHelper( newHarnessHelper() ),
- _rs( _harnessHelper->newNonCappedRecordStore() ) {
- }
+class ValidateTest : public mongo::unittest::Test {
+public:
+ ValidateTest()
+ : _harnessHelper(newHarnessHelper()), _rs(_harnessHelper->newNonCappedRecordStore()) {}
- OperationContext* newOperationContext() {
- return _harnessHelper->newOperationContext();
- }
+ OperationContext* newOperationContext() {
+ return _harnessHelper->newOperationContext();
+ }
- RecordStore& getRecordStore() { return *_rs; }
+ RecordStore& getRecordStore() {
+ return *_rs;
+ }
- const std::set<std::string>& getInsertedRecords() { return _remain; }
+ const std::set<std::string>& getInsertedRecords() {
+ return _remain;
+ }
- void setUp() {
- {
- std::unique_ptr<OperationContext> opCtx( newOperationContext() );
- ASSERT_EQUALS( 0, _rs->numRecords( opCtx.get() ) );
- }
-
- int nToInsert = 10;
- for ( int i = 0; i < nToInsert; i++ ) {
- std::unique_ptr<OperationContext> opCtx( newOperationContext() );
- {
- std::stringstream ss;
- ss << "record " << i;
- std::string data = ss.str();
- ASSERT( _remain.insert( data ).second );
-
- WriteUnitOfWork uow( opCtx.get() );
- StatusWith<RecordId> res = _rs->insertRecord( opCtx.get(),
- data.c_str(),
- data.size() + 1,
- false );
- ASSERT_OK( res.getStatus() );
- uow.commit();
- }
- }
+ void setUp() {
+ {
+ std::unique_ptr<OperationContext> opCtx(newOperationContext());
+ ASSERT_EQUALS(0, _rs->numRecords(opCtx.get()));
+ }
+ int nToInsert = 10;
+ for (int i = 0; i < nToInsert; i++) {
+ std::unique_ptr<OperationContext> opCtx(newOperationContext());
{
- std::unique_ptr<OperationContext> opCtx( newOperationContext() );
- ASSERT_EQUALS( nToInsert, _rs->numRecords( opCtx.get() ) );
+ std::stringstream ss;
+ ss << "record " << i;
+ std::string data = ss.str();
+ ASSERT(_remain.insert(data).second);
+
+ WriteUnitOfWork uow(opCtx.get());
+ StatusWith<RecordId> res =
+ _rs->insertRecord(opCtx.get(), data.c_str(), data.size() + 1, false);
+ ASSERT_OK(res.getStatus());
+ uow.commit();
}
}
- private:
- std::unique_ptr<HarnessHelper> _harnessHelper;
- std::unique_ptr<RecordStore> _rs;
- std::set<std::string> _remain;
- };
+ {
+ std::unique_ptr<OperationContext> opCtx(newOperationContext());
+ ASSERT_EQUALS(nToInsert, _rs->numRecords(opCtx.get()));
+ }
+ }
+
+private:
+ std::unique_ptr<HarnessHelper> _harnessHelper;
+ std::unique_ptr<RecordStore> _rs;
+ std::set<std::string> _remain;
+};
-} // namespace
-} // namespace mongo
+} // namespace
+} // namespace mongo
diff --git a/src/mongo/db/storage/recovery_unit.h b/src/mongo/db/storage/recovery_unit.h
index 77bb7a33dd1..d671516fcbd 100644
--- a/src/mongo/db/storage/recovery_unit.h
+++ b/src/mongo/db/storage/recovery_unit.h
@@ -37,134 +37,135 @@
namespace mongo {
- class BSONObjBuilder;
- class OperationContext;
+class BSONObjBuilder;
+class OperationContext;
+
+/**
+ * A RecoveryUnit is responsible for ensuring that data is persisted.
+ * All on-disk information must be mutated through this interface.
+ */
+class RecoveryUnit {
+ MONGO_DISALLOW_COPYING(RecoveryUnit);
+
+public:
+ virtual ~RecoveryUnit() {}
+
+ virtual void reportState(BSONObjBuilder* b) const {}
+
+ virtual void beingReleasedFromOperationContext() {}
+ virtual void beingSetOnOperationContext() {}
+
+ /**
+ * These should be called through WriteUnitOfWork rather than directly.
+ *
+ * A call to 'beginUnitOfWork' marks the beginning of a unit of work. Each call to
+ * 'beginUnitOfWork' must be matched with exactly one call to either 'commitUnitOfWork' or
+ * 'abortUnitOfWork'. When 'abortUnitOfWork' is called, all changes made since the begin
+ * of the unit of work will be rolled back.
+ */
+ virtual void beginUnitOfWork(OperationContext* opCtx) = 0;
+ virtual void commitUnitOfWork() = 0;
+ virtual void abortUnitOfWork() = 0;
+
+ /**
+ * Waits until all writes prior to this call are durable. Returns true, unless the storage
+ * engine cannot guarantee durability, which should never happen when isDurable() returned
+ * true.
+ */
+ virtual bool waitUntilDurable() = 0;
+
+ /**
+ * This is a hint to the engine that this transaction is going to call waitUntilDurable at
+ * the end. This should be called before any work is done so that transactions can be
+ * configured correctly.
+ */
+ virtual void goingToWaitUntilDurable() {}
+
+ /**
+ * When this is called, if there is an open transaction, it is closed. On return no
+ * transaction is active. This cannot be called inside of a WriteUnitOfWork, and should
+ * fail if it is.
+ */
+ virtual void abandonSnapshot() = 0;
+
+ virtual SnapshotId getSnapshotId() const = 0;
/**
- * A RecoveryUnit is responsible for ensuring that data is persisted.
- * All on-disk information must be mutated through this interface.
+ * A Change is an action that is registerChange()'d while a WriteUnitOfWork exists. The
+ * change is either rollback()'d or commit()'d when the WriteUnitOfWork goes out of scope.
+ *
+ * Neither rollback() nor commit() may fail or throw exceptions.
+ *
+ * Change implementors are responsible for handling their own locking, and must be aware
+ * that rollback() and commit() may be called after resources with a shorter lifetime than
+ * the WriteUnitOfWork have been freed. Each registered change will be committed or rolled
+ * back once.
*/
- class RecoveryUnit {
- MONGO_DISALLOW_COPYING(RecoveryUnit);
+ class Change {
public:
- virtual ~RecoveryUnit() { }
-
- virtual void reportState( BSONObjBuilder* b ) const { }
-
- virtual void beingReleasedFromOperationContext() {}
- virtual void beingSetOnOperationContext() {}
-
- /**
- * These should be called through WriteUnitOfWork rather than directly.
- *
- * A call to 'beginUnitOfWork' marks the beginning of a unit of work. Each call to
- * 'beginUnitOfWork' must be matched with exactly one call to either 'commitUnitOfWork' or
- * 'abortUnitOfWork'. When 'abortUnitOfWork' is called, all changes made since the begin
- * of the unit of work will be rolled back.
- */
- virtual void beginUnitOfWork(OperationContext* opCtx) = 0;
- virtual void commitUnitOfWork() = 0;
- virtual void abortUnitOfWork() = 0;
-
- /**
- * Waits until all writes prior to this call are durable. Returns true, unless the storage
- * engine cannot guarantee durability, which should never happen when isDurable() returned
- * true.
- */
- virtual bool waitUntilDurable() = 0;
-
- /**
- * This is a hint to the engine that this transaction is going to call waitUntilDurable at
- * the end. This should be called before any work is done so that transactions can be
- * configured correctly.
- */
- virtual void goingToWaitUntilDurable() { }
-
- /**
- * When this is called, if there is an open transaction, it is closed. On return no
- * transaction is active. This cannot be called inside of a WriteUnitOfWork, and should
- * fail if it is.
- */
- virtual void abandonSnapshot() = 0;
-
- virtual SnapshotId getSnapshotId() const = 0;
-
- /**
- * A Change is an action that is registerChange()'d while a WriteUnitOfWork exists. The
- * change is either rollback()'d or commit()'d when the WriteUnitOfWork goes out of scope.
- *
- * Neither rollback() nor commit() may fail or throw exceptions.
- *
- * Change implementors are responsible for handling their own locking, and must be aware
- * that rollback() and commit() may be called after resources with a shorter lifetime than
- * the WriteUnitOfWork have been freed. Each registered change will be committed or rolled
- * back once.
- */
- class Change {
- public:
- virtual ~Change() { }
-
- virtual void rollback() = 0;
- virtual void commit() = 0;
- };
-
- /**
- * The RecoveryUnit takes ownership of the change. The commitUnitOfWork() method calls the
- * commit() method of each registered change in order of registration. The endUnitOfWork()
- * method calls the rollback() method of each registered Change in reverse order of
- * registration. Either will unregister and delete the changes.
- *
- * The registerChange() method may only be called when a WriteUnitOfWork is active, and
- * may not be called during commit or rollback.
- */
- virtual void registerChange(Change* change) = 0;
-
- //
- // The remaining methods probably belong on DurRecoveryUnit rather than on the interface.
- //
-
- /**
- * Declare that the data at [x, x + len) is being written.
- */
- virtual void* writingPtr(void* data, size_t len) = 0;
-
- //
- // Syntactic sugar
- //
-
- /**
- * Declare write intent for an int
- */
- inline int& writingInt(int& d) {
- return *writing(&d);
- }
-
- /**
- * A templated helper for writingPtr.
- */
- template <typename T>
- inline T* writing(T* x) {
- writingPtr(x, sizeof(T));
- return x;
- }
-
- /**
- * Sets a flag that declares this RecoveryUnit will skip rolling back writes, for the
- * duration of the current outermost WriteUnitOfWork. This function can only be called
- * between a pair of unnested beginUnitOfWork() / endUnitOfWork() calls.
- * The flag is cleared when endUnitOfWork() is called.
- * While the flag is set, rollback will skip rolling back writes, but custom rollback
- * change functions are still called. Clearly, this functionality should only be used when
- * writing to temporary collections that can be cleaned up externally. For example,
- * foreground index builds write to a temporary collection; if something goes wrong that
- * normally requires a rollback, we can instead clean up the index by dropping the entire
- * index.
- * Setting the flag may permit increased performance.
- */
- virtual void setRollbackWritesDisabled() = 0;
-
- protected:
- RecoveryUnit() { }
+ virtual ~Change() {}
+
+ virtual void rollback() = 0;
+ virtual void commit() = 0;
};
+ /**
+ * The RecoveryUnit takes ownership of the change. The commitUnitOfWork() method calls the
+ * commit() method of each registered change in order of registration. The endUnitOfWork()
+ * method calls the rollback() method of each registered Change in reverse order of
+ * registration. Either will unregister and delete the changes.
+ *
+ * The registerChange() method may only be called when a WriteUnitOfWork is active, and
+ * may not be called during commit or rollback.
+ */
+ virtual void registerChange(Change* change) = 0;
+
+ //
+ // The remaining methods probably belong on DurRecoveryUnit rather than on the interface.
+ //
+
+ /**
+ * Declare that the data at [x, x + len) is being written.
+ */
+ virtual void* writingPtr(void* data, size_t len) = 0;
+
+ //
+ // Syntactic sugar
+ //
+
+ /**
+ * Declare write intent for an int
+ */
+ inline int& writingInt(int& d) {
+ return *writing(&d);
+ }
+
+ /**
+ * A templated helper for writingPtr.
+ */
+ template <typename T>
+ inline T* writing(T* x) {
+ writingPtr(x, sizeof(T));
+ return x;
+ }
+
+ /**
+ * Sets a flag that declares this RecoveryUnit will skip rolling back writes, for the
+ * duration of the current outermost WriteUnitOfWork. This function can only be called
+ * between a pair of unnested beginUnitOfWork() / endUnitOfWork() calls.
+ * The flag is cleared when endUnitOfWork() is called.
+ * While the flag is set, rollback will skip rolling back writes, but custom rollback
+ * change functions are still called. Clearly, this functionality should only be used when
+ * writing to temporary collections that can be cleaned up externally. For example,
+ * foreground index builds write to a temporary collection; if something goes wrong that
+ * normally requires a rollback, we can instead clean up the index by dropping the entire
+ * index.
+ * Setting the flag may permit increased performance.
+ */
+ virtual void setRollbackWritesDisabled() = 0;
+
+protected:
+ RecoveryUnit() {}
+};
+
} // namespace mongo
diff --git a/src/mongo/db/storage/recovery_unit_noop.h b/src/mongo/db/storage/recovery_unit_noop.h
index c324bc330a8..d8f7e69adb1 100644
--- a/src/mongo/db/storage/recovery_unit_noop.h
+++ b/src/mongo/db/storage/recovery_unit_noop.h
@@ -35,53 +35,53 @@
namespace mongo {
- class OperationContext;
+class OperationContext;
- class RecoveryUnitNoop : public RecoveryUnit {
- public:
- void beginUnitOfWork(OperationContext* opCtx) final {}
- void commitUnitOfWork() final {
- for (auto& change : _changes) {
- try {
- change->commit();
- }
- catch (...) {
- std::terminate();
- }
+class RecoveryUnitNoop : public RecoveryUnit {
+public:
+ void beginUnitOfWork(OperationContext* opCtx) final {}
+ void commitUnitOfWork() final {
+ for (auto& change : _changes) {
+ try {
+ change->commit();
+ } catch (...) {
+ std::terminate();
}
- _changes.clear();
}
- void abortUnitOfWork() final {
- for (auto it = _changes.rbegin(); it != _changes.rend(); ++it) {
- try {
- (*it)->rollback();
- }
- catch (...) {
- std::terminate();
- }
+ _changes.clear();
+ }
+ void abortUnitOfWork() final {
+ for (auto it = _changes.rbegin(); it != _changes.rend(); ++it) {
+ try {
+ (*it)->rollback();
+ } catch (...) {
+ std::terminate();
}
- _changes.clear();
}
+ _changes.clear();
+ }
- virtual void abandonSnapshot() {}
+ virtual void abandonSnapshot() {}
- virtual bool waitUntilDurable() {
- return true;
- }
+ virtual bool waitUntilDurable() {
+ return true;
+ }
- virtual void registerChange(Change* change) {
- _changes.push_back(std::unique_ptr<Change>(change));
- }
+ virtual void registerChange(Change* change) {
+ _changes.push_back(std::unique_ptr<Change>(change));
+ }
- virtual void* writingPtr(void* data, size_t len) {
- return data;
- }
- virtual void setRollbackWritesDisabled() {}
+ virtual void* writingPtr(void* data, size_t len) {
+ return data;
+ }
+ virtual void setRollbackWritesDisabled() {}
- virtual SnapshotId getSnapshotId() const { return SnapshotId(); }
+ virtual SnapshotId getSnapshotId() const {
+ return SnapshotId();
+ }
- private:
- std::vector<std::unique_ptr<Change>> _changes;
- };
+private:
+ std::vector<std::unique_ptr<Change>> _changes;
+};
} // namespace mongo
diff --git a/src/mongo/db/storage/snapshot.h b/src/mongo/db/storage/snapshot.h
index 5d432ba3cbc..6ce5b57e51d 100644
--- a/src/mongo/db/storage/snapshot.h
+++ b/src/mongo/db/storage/snapshot.h
@@ -34,56 +34,60 @@
namespace mongo {
- class SnapshotId {
- static const uint64_t kNullId = 0;
- public:
- SnapshotId()
- : _id(kNullId) {
- }
-
- // 0 is NULL
- explicit SnapshotId(uint64_t id)
- : _id(id) {
- invariant(id != kNullId);
- }
-
- bool isNull() const { return _id == kNullId; }
-
- bool operator==(const SnapshotId& other) const {
- return _id == other._id;
- }
-
- bool operator!=(const SnapshotId& other) const {
- return _id != other._id;
- }
-
- private:
- uint64_t _id;
- };
-
- template<typename T>
- class Snapshotted {
- public:
- Snapshotted()
- : _id(), _value() {
- }
-
- Snapshotted(SnapshotId id, const T& value ) :
- _id(id), _value(value) {
- }
-
- void reset() {
- *this = Snapshotted();
- }
-
- void setValue(const T& t) { _value = t; }
-
- SnapshotId snapshotId() const { return _id; }
- const T& value() const { return _value; }
- T& value() { return _value; }
-
- private:
- SnapshotId _id;
- T _value;
- };
+class SnapshotId {
+ static const uint64_t kNullId = 0;
+
+public:
+ SnapshotId() : _id(kNullId) {}
+
+ // 0 is NULL
+ explicit SnapshotId(uint64_t id) : _id(id) {
+ invariant(id != kNullId);
+ }
+
+ bool isNull() const {
+ return _id == kNullId;
+ }
+
+ bool operator==(const SnapshotId& other) const {
+ return _id == other._id;
+ }
+
+ bool operator!=(const SnapshotId& other) const {
+ return _id != other._id;
+ }
+
+private:
+ uint64_t _id;
+};
+
+template <typename T>
+class Snapshotted {
+public:
+ Snapshotted() : _id(), _value() {}
+
+ Snapshotted(SnapshotId id, const T& value) : _id(id), _value(value) {}
+
+ void reset() {
+ *this = Snapshotted();
+ }
+
+ void setValue(const T& t) {
+ _value = t;
+ }
+
+ SnapshotId snapshotId() const {
+ return _id;
+ }
+ const T& value() const {
+ return _value;
+ }
+ T& value() {
+ return _value;
+ }
+
+private:
+ SnapshotId _id;
+ T _value;
+};
}
diff --git a/src/mongo/db/storage/sorted_data_interface.h b/src/mongo/db/storage/sorted_data_interface.h
index 006fa7ff4dd..2836c7c4814 100644
--- a/src/mongo/db/storage/sorted_data_interface.h
+++ b/src/mongo/db/storage/sorted_data_interface.h
@@ -39,336 +39,337 @@
namespace mongo {
- class BSONObjBuilder;
- class BucketDeletionNotification;
- class SortedDataBuilderInterface;
+class BSONObjBuilder;
+class BucketDeletionNotification;
+class SortedDataBuilderInterface;
+
+/**
+ * This interface is a work in progress. Notes below:
+ *
+ * This interface began as the SortedDataInterface, a way to hide the fact that there were two
+ * on-disk formats for the btree. With the introduction of other storage engines, this
+ * interface was generalized to provide access to sorted data. Specifically:
+ *
+ * 1. Many other storage engines provide different Btree(-ish) implementations. This interface
+ * could allow those interfaces to avoid storing btree buckets in an already sorted structure.
+ *
+ * TODO: See if there is actually a performance gain.
+ *
+ * 2. The existing btree implementation is written to assume that if it modifies a record it is
+ * modifying the underlying record. This interface is an attempt to work around that.
+ *
+ * TODO: See if this actually works.
+ */
+class SortedDataInterface {
+public:
+ virtual ~SortedDataInterface() {}
+
+ //
+ // Data changes
+ //
/**
- * This interface is a work in progress. Notes below:
+ * Return a bulk builder for 'this' index.
*
- * This interface began as the SortedDataInterface, a way to hide the fact that there were two
- * on-disk formats for the btree. With the introduction of other storage engines, this
- * interface was generalized to provide access to sorted data. Specifically:
+ * Implementations can assume that 'this' index outlives its bulk
+ * builder.
*
- * 1. Many other storage engines provide different Btree(-ish) implementations. This interface
- * could allow those interfaces to avoid storing btree buckets in an already sorted structure.
+ * @param txn the transaction under which keys are added to 'this' index
+ * @param dupsAllowed true if duplicate keys are allowed, and false
+ * otherwise
*
- * TODO: See if there is actually a performance gain.
+ * @return caller takes ownership
+ */
+ virtual SortedDataBuilderInterface* getBulkBuilder(OperationContext* txn, bool dupsAllowed) = 0;
+
+ /**
+ * Insert an entry into the index with the specified key and RecordId.
+ *
+ * @param txn the transaction under which the insert takes place
+ * @param dupsAllowed true if duplicate keys are allowed, and false
+ * otherwise
*
- * 2. The existing btree implementation is written to assume that if it modifies a record it is
- * modifying the underlying record. This interface is an attempt to work around that.
+ * @return Status::OK() if the insert succeeded,
*
- * TODO: See if this actually works.
+ * ErrorCodes::DuplicateKey if 'key' already exists in 'this' index
+ * at a RecordId other than 'loc' and duplicates were not allowed
*/
- class SortedDataInterface {
- public:
- virtual ~SortedDataInterface() { }
+ virtual Status insert(OperationContext* txn,
+ const BSONObj& key,
+ const RecordId& loc,
+ bool dupsAllowed) = 0;
- //
- // Data changes
- //
+ /**
+ * Remove the entry from the index with the specified key and RecordId.
+ *
+ * @param txn the transaction under which the remove takes place
+ * @param dupsAllowed true if duplicate keys are allowed, and false
+ * otherwise
+ */
+ virtual void unindex(OperationContext* txn,
+ const BSONObj& key,
+ const RecordId& loc,
+ bool dupsAllowed) = 0;
- /**
- * Return a bulk builder for 'this' index.
- *
- * Implementations can assume that 'this' index outlives its bulk
- * builder.
- *
- * @param txn the transaction under which keys are added to 'this' index
- * @param dupsAllowed true if duplicate keys are allowed, and false
- * otherwise
- *
- * @return caller takes ownership
- */
- virtual SortedDataBuilderInterface* getBulkBuilder(OperationContext* txn,
- bool dupsAllowed) = 0;
+ /**
+ * Return ErrorCodes::DuplicateKey if 'key' already exists in 'this'
+ * index at a RecordId other than 'loc', and Status::OK() otherwise.
+ *
+ * @param txn the transaction under which this operation takes place
+ *
+ * TODO: Hide this by exposing an update method?
+ */
+ virtual Status dupKeyCheck(OperationContext* txn, const BSONObj& key, const RecordId& loc) = 0;
+ //
+ // Information about the tree
+ //
+
+ /**
+ * 'output' is used to store results of validate when 'full' is true.
+ * If 'full' is false, 'output' may be NULL.
+ *
+ * TODO: expose full set of args for testing?
+ */
+ virtual void fullValidate(OperationContext* txn,
+ bool full,
+ long long* numKeysOut,
+ BSONObjBuilder* output) const = 0;
+
+ virtual bool appendCustomStats(OperationContext* txn,
+ BSONObjBuilder* output,
+ double scale) const = 0;
+
+
+ /**
+ * Return the number of bytes consumed by 'this' index.
+ *
+ * @param txn the transaction under which this operation takes place
+ *
+ * @see IndexAccessMethod::getSpaceUsedBytes
+ */
+ virtual long long getSpaceUsedBytes(OperationContext* txn) const = 0;
+
+ /**
+ * Return true if 'this' index is empty, and false otherwise.
+ */
+ virtual bool isEmpty(OperationContext* txn) = 0;
+
+ /**
+ * Attempt to bring the entirety of 'this' index into memory.
+ *
+ * If the underlying storage engine does not support the operation,
+ * returns ErrorCodes::CommandNotSupported
+ *
+ * @return Status::OK()
+ */
+ virtual Status touch(OperationContext* txn) const {
+ return Status(ErrorCodes::CommandNotSupported,
+ "this storage engine does not support touch");
+ }
+
+ /**
+ * Return the number of entries in 'this' index.
+ *
+ * The default implementation should be overridden with a more
+ * efficient one if at all possible.
+ */
+ virtual long long numEntries(OperationContext* txn) const {
+ long long x = -1;
+ fullValidate(txn, false, &x, NULL);
+ return x;
+ }
+
+ /**
+ * Navigates over the sorted data.
+ *
+ * A cursor is constructed with a direction flag with the following effects:
+ * - The direction that next() moves.
+ * - If a seek method hits an exact match on key, forward cursors will be positioned on
+ * the first value for that key, reverse cursors on the last.
+ * - If a seek method or restore does not hit an exact match, cursors will be
+ * positioned on the closest position *after* the query in the direction of the
+ * search.
+ * - The end position is on the "far" side of the query. In a forward cursor that means
+ * that it is the lowest value for the key if the end is exclusive or the first entry
+ * past the key if the end is inclusive or there are no exact matches.
+ *
+ * A cursor is tied to a transaction, such as the OperationContext or a WriteUnitOfWork
+ * inside that context. Any cursor acquired inside a transaction is invalid outside
+ * of that transaction, instead use the save and restore methods to reestablish the cursor.
+ *
+ * Any method other than the save methods may throw WriteConflict exception. If that
+ * happens, the cursor may not be used again until it has been saved and successfully
+ * restored. If next() or restore() throw a WCE the cursor's position will be the same as
+ * before the call (strong exception guarantee). All other methods leave the cursor in a
+ * valid state but with an unspecified position (basic exception guarantee). All methods
+ * only provide the basic guarantee for exceptions other than WCE.
+ *
+ * Any returned unowned BSON is only valid until the next call to any method on this
+ * interface. The implementations must assume that passed-in unowned BSON is only valid for
+ * the duration of the call.
+ *
+ * Implementations may override any default implementation if they can provide a more
+ * efficient implementation.
+ */
+ class Cursor {
+ public:
/**
- * Insert an entry into the index with the specified key and RecordId.
- *
- * @param txn the transaction under which the insert takes place
- * @param dupsAllowed true if duplicate keys are allowed, and false
- * otherwise
+ * Tells methods that return an IndexKeyEntry what part of the data the caller is
+ * interested in.
*
- * @return Status::OK() if the insert succeeded,
+ * Methods returning an engaged optional<T> will only return null RecordIds or empty
+ * BSONObjs if they have been explicitly left out of the request.
*
- * ErrorCodes::DuplicateKey if 'key' already exists in 'this' index
- * at a RecordId other than 'loc' and duplicates were not allowed
+ * Implementations are allowed to return more data than requested, but not less.
*/
- virtual Status insert(OperationContext* txn,
- const BSONObj& key,
- const RecordId& loc,
- bool dupsAllowed) = 0;
+ enum RequestedInfo {
+ // Only usable part of the return is whether it is engaged or not.
+ kJustExistance = 0,
+ // Key must be filled in.
+ kWantKey = 1,
+ // Loc must be fulled in.
+ kWantLoc = 2,
+ // Both must be returned.
+ kKeyAndLoc = kWantKey | kWantLoc,
+ };
+
+ virtual ~Cursor() = default;
+
/**
- * Remove the entry from the index with the specified key and RecordId.
+ * Sets the position to stop scanning. An empty key unsets the end position.
*
- * @param txn the transaction under which the remove takes place
- * @param dupsAllowed true if duplicate keys are allowed, and false
- * otherwise
+ * If next() hits this position, or a seek method attempts to seek past it they
+ * unposition the cursor and return boost::none.
+ *
+ * Setting the end position should be done before seeking since the current position, if
+ * any, isn't checked.
*/
- virtual void unindex(OperationContext* txn,
- const BSONObj& key,
- const RecordId& loc,
- bool dupsAllowed) = 0;
+ virtual void setEndPosition(const BSONObj& key, bool inclusive) = 0;
/**
- * Return ErrorCodes::DuplicateKey if 'key' already exists in 'this'
- * index at a RecordId other than 'loc', and Status::OK() otherwise.
- *
- * @param txn the transaction under which this operation takes place
- *
- * TODO: Hide this by exposing an update method?
+ * Moves forward and returns the new data or boost::none if there is no more data.
+ * If not positioned, returns boost::none.
*/
- virtual Status dupKeyCheck(OperationContext* txn,
- const BSONObj& key,
- const RecordId& loc) = 0;
+ virtual boost::optional<IndexKeyEntry> next(RequestedInfo parts = kKeyAndLoc) = 0;
//
- // Information about the tree
+ // Seeking
//
/**
- * 'output' is used to store results of validate when 'full' is true.
- * If 'full' is false, 'output' may be NULL.
+ * Seeks to the provided key and returns current position.
*
- * TODO: expose full set of args for testing?
+ * TODO consider removing once IndexSeekPoint has been cleaned up a bit. In particular,
+ * need a way to specify use whole keyPrefix and nothing else and to support the
+ * combination of empty and exclusive. Should also make it easier to construct for the
+ * common cases.
*/
- virtual void fullValidate(OperationContext* txn, bool full, long long* numKeysOut,
- BSONObjBuilder* output) const = 0;
-
- virtual bool appendCustomStats(OperationContext* txn, BSONObjBuilder* output, double scale)
- const = 0;
-
+ virtual boost::optional<IndexKeyEntry> seek(const BSONObj& key,
+ bool inclusive,
+ RequestedInfo parts = kKeyAndLoc) = 0;
/**
- * Return the number of bytes consumed by 'this' index.
+ * Seeks to the position described by seekPoint and returns the current position.
*
- * @param txn the transaction under which this operation takes place
- *
- * @see IndexAccessMethod::getSpaceUsedBytes
+ * NOTE: most implementations should just pass seekPoint to
+ * IndexEntryComparison::makeQueryObject().
*/
- virtual long long getSpaceUsedBytes( OperationContext* txn ) const = 0;
+ virtual boost::optional<IndexKeyEntry> seek(const IndexSeekPoint& seekPoint,
+ RequestedInfo parts = kKeyAndLoc) = 0;
/**
- * Return true if 'this' index is empty, and false otherwise.
+ * Seeks to a key with a hint to the implementation that you only want exact matches. If
+ * an exact match can't be found, boost::none will be returned and the resulting
+ * position of the cursor is unspecified.
*/
- virtual bool isEmpty(OperationContext* txn) = 0;
+ virtual boost::optional<IndexKeyEntry> seekExact(const BSONObj& key,
+ RequestedInfo parts = kKeyAndLoc) {
+ auto kv = seek(key, true, kKeyAndLoc);
+ if (kv && kv->key.woCompare(key, BSONObj(), /*considerFieldNames*/ false) == 0)
+ return kv;
+ return {};
+ }
+
+ //
+ // Saving and restoring state
+ //
/**
- * Attempt to bring the entirety of 'this' index into memory.
- *
- * If the underlying storage engine does not support the operation,
- * returns ErrorCodes::CommandNotSupported
+ * Prepares for state changes in underlying data in a way that allows the cursor's
+ * current position to be restored.
*
- * @return Status::OK()
+ * It is safe to call savePositioned multiple times in a row.
+ * No other method (excluding destructor) may be called until successfully restored.
*/
- virtual Status touch(OperationContext* txn) const {
- return Status(ErrorCodes::CommandNotSupported,
- "this storage engine does not support touch");
- }
+ virtual void savePositioned() = 0;
/**
- * Return the number of entries in 'this' index.
+ * Prepares for state changes in underlying data without necessarily saving the current
+ * state.
*
- * The default implementation should be overridden with a more
- * efficient one if at all possible.
+ * The cursor's position when restored is unspecified. Caller is expected to seek
+ * following the restore.
+ *
+ * It is safe to call saveUnpositioned multiple times in a row.
+ * No other method (excluding destructor) may be called until successfully restored.
*/
- virtual long long numEntries( OperationContext* txn ) const {
- long long x = -1;
- fullValidate(txn, false, &x, NULL);
- return x;
+ virtual void saveUnpositioned() {
+ savePositioned();
}
/**
- * Navigates over the sorted data.
- *
- * A cursor is constructed with a direction flag with the following effects:
- * - The direction that next() moves.
- * - If a seek method hits an exact match on key, forward cursors will be positioned on
- * the first value for that key, reverse cursors on the last.
- * - If a seek method or restore does not hit an exact match, cursors will be
- * positioned on the closest position *after* the query in the direction of the
- * search.
- * - The end position is on the "far" side of the query. In a forward cursor that means
- * that it is the lowest value for the key if the end is exclusive or the first entry
- * past the key if the end is inclusive or there are no exact matches.
- *
- * A cursor is tied to a transaction, such as the OperationContext or a WriteUnitOfWork
- * inside that context. Any cursor acquired inside a transaction is invalid outside
- * of that transaction, instead use the save and restore methods to reestablish the cursor.
+ * Recovers from potential state changes in underlying data.
*
- * Any method other than the save methods may throw WriteConflict exception. If that
- * happens, the cursor may not be used again until it has been saved and successfully
- * restored. If next() or restore() throw a WCE the cursor's position will be the same as
- * before the call (strong exception guarantee). All other methods leave the cursor in a
- * valid state but with an unspecified position (basic exception guarantee). All methods
- * only provide the basic guarantee for exceptions other than WCE.
+ * If the former position no longer exists, a following call to next() will return the
+ * next closest position in the direction of the scan, if any.
*
- * Any returned unowned BSON is only valid until the next call to any method on this
- * interface. The implementations must assume that passed-in unowned BSON is only valid for
- * the duration of the call.
- *
- * Implementations may override any default implementation if they can provide a more
- * efficient implementation.
+ * This handles restoring after either savePositioned() or saveUnpositioned().
*/
- class Cursor {
- public:
-
- /**
- * Tells methods that return an IndexKeyEntry what part of the data the caller is
- * interested in.
- *
- * Methods returning an engaged optional<T> will only return null RecordIds or empty
- * BSONObjs if they have been explicitly left out of the request.
- *
- * Implementations are allowed to return more data than requested, but not less.
- */
- enum RequestedInfo {
- // Only usable part of the return is whether it is engaged or not.
- kJustExistance = 0,
- // Key must be filled in.
- kWantKey = 1,
- // Loc must be fulled in.
- kWantLoc = 2,
- // Both must be returned.
- kKeyAndLoc = kWantKey | kWantLoc,
- };
-
- virtual ~Cursor() = default;
-
-
- /**
- * Sets the position to stop scanning. An empty key unsets the end position.
- *
- * If next() hits this position, or a seek method attempts to seek past it they
- * unposition the cursor and return boost::none.
- *
- * Setting the end position should be done before seeking since the current position, if
- * any, isn't checked.
- */
- virtual void setEndPosition(const BSONObj& key, bool inclusive) = 0;
-
- /**
- * Moves forward and returns the new data or boost::none if there is no more data.
- * If not positioned, returns boost::none.
- */
- virtual boost::optional<IndexKeyEntry> next(RequestedInfo parts = kKeyAndLoc) = 0;
-
- //
- // Seeking
- //
-
- /**
- * Seeks to the provided key and returns current position.
- *
- * TODO consider removing once IndexSeekPoint has been cleaned up a bit. In particular,
- * need a way to specify use whole keyPrefix and nothing else and to support the
- * combination of empty and exclusive. Should also make it easier to construct for the
- * common cases.
- */
- virtual boost::optional<IndexKeyEntry> seek(const BSONObj& key,
- bool inclusive,
- RequestedInfo parts = kKeyAndLoc) = 0;
-
- /**
- * Seeks to the position described by seekPoint and returns the current position.
- *
- * NOTE: most implementations should just pass seekPoint to
- * IndexEntryComparison::makeQueryObject().
- */
- virtual boost::optional<IndexKeyEntry> seek(const IndexSeekPoint& seekPoint,
- RequestedInfo parts = kKeyAndLoc) = 0;
-
- /**
- * Seeks to a key with a hint to the implementation that you only want exact matches. If
- * an exact match can't be found, boost::none will be returned and the resulting
- * position of the cursor is unspecified.
- */
- virtual boost::optional<IndexKeyEntry> seekExact(const BSONObj& key,
- RequestedInfo parts = kKeyAndLoc) {
- auto kv = seek(key, true, kKeyAndLoc);
- if (kv && kv->key.woCompare(key, BSONObj(), /*considerFieldNames*/false) == 0)
- return kv;
- return {};
- }
-
- //
- // Saving and restoring state
- //
-
- /**
- * Prepares for state changes in underlying data in a way that allows the cursor's
- * current position to be restored.
- *
- * It is safe to call savePositioned multiple times in a row.
- * No other method (excluding destructor) may be called until successfully restored.
- */
- virtual void savePositioned() = 0;
-
- /**
- * Prepares for state changes in underlying data without necessarily saving the current
- * state.
- *
- * The cursor's position when restored is unspecified. Caller is expected to seek
- * following the restore.
- *
- * It is safe to call saveUnpositioned multiple times in a row.
- * No other method (excluding destructor) may be called until successfully restored.
- */
- virtual void saveUnpositioned() { savePositioned(); }
-
- /**
- * Recovers from potential state changes in underlying data.
- *
- * If the former position no longer exists, a following call to next() will return the
- * next closest position in the direction of the scan, if any.
- *
- * This handles restoring after either savePositioned() or saveUnpositioned().
- */
- virtual void restore(OperationContext* txn) = 0;
- };
+ virtual void restore(OperationContext* txn) = 0;
+ };
- /**
- * Returns an unpositioned cursor over 'this' index.
- *
- * Implementations can assume that 'this' index outlives all cursors it produces.
- */
- virtual std::unique_ptr<Cursor> newCursor(OperationContext* txn,
- bool isForward = true) const = 0;
+ /**
+ * Returns an unpositioned cursor over 'this' index.
+ *
+ * Implementations can assume that 'this' index outlives all cursors it produces.
+ */
+ virtual std::unique_ptr<Cursor> newCursor(OperationContext* txn,
+ bool isForward = true) const = 0;
- //
- // Index creation
- //
+ //
+ // Index creation
+ //
- virtual Status initAsEmpty(OperationContext* txn) = 0;
- };
+ virtual Status initAsEmpty(OperationContext* txn) = 0;
+};
+
+/**
+ * A version-hiding wrapper around the bulk builder for the Btree.
+ */
+class SortedDataBuilderInterface {
+public:
+ virtual ~SortedDataBuilderInterface() {}
/**
- * A version-hiding wrapper around the bulk builder for the Btree.
+ * Adds 'key' to intermediate storage.
+ *
+ * 'key' must be > or >= the last key passed to this function (depends on _dupsAllowed). If
+ * this is violated an error Status (ErrorCodes::InternalError) will be returned.
*/
- class SortedDataBuilderInterface {
- public:
- virtual ~SortedDataBuilderInterface() { }
+ virtual Status addKey(const BSONObj& key, const RecordId& loc) = 0;
- /**
- * Adds 'key' to intermediate storage.
- *
- * 'key' must be > or >= the last key passed to this function (depends on _dupsAllowed). If
- * this is violated an error Status (ErrorCodes::InternalError) will be returned.
- */
- virtual Status addKey(const BSONObj& key, const RecordId& loc) = 0;
-
- /**
- * Do any necessary work to finish building the tree.
- *
- * The default implementation may be used if no commit phase is necessary because addKey
- * always leaves the tree in a valid state.
- *
- * This is called outside of any WriteUnitOfWork to allow implementations to split this up
- * into multiple units.
- */
- virtual void commit(bool mayInterrupt) {}
- };
+ /**
+ * Do any necessary work to finish building the tree.
+ *
+ * The default implementation may be used if no commit phase is necessary because addKey
+ * always leaves the tree in a valid state.
+ *
+ * This is called outside of any WriteUnitOfWork to allow implementations to split this up
+ * into multiple units.
+ */
+ virtual void commit(bool mayInterrupt) {}
+};
} // namespace mongo
diff --git a/src/mongo/db/storage/sorted_data_interface_test_bulkbuilder.cpp b/src/mongo/db/storage/sorted_data_interface_test_bulkbuilder.cpp
index cb1f8fa0953..754bed1d757 100644
--- a/src/mongo/db/storage/sorted_data_interface_test_bulkbuilder.cpp
+++ b/src/mongo/db/storage/sorted_data_interface_test_bulkbuilder.cpp
@@ -37,165 +37,165 @@
namespace mongo {
- // Add a key using a bulk builder.
- TEST( SortedDataInterface, BuilderAddKey ) {
- const std::unique_ptr<HarnessHelper> harnessHelper( newHarnessHelper() );
- const std::unique_ptr<SortedDataInterface> sorted( harnessHelper->newSortedDataInterface( false ) );
-
- {
- const std::unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- ASSERT( sorted->isEmpty( opCtx.get() ) );
- }
-
- {
- const std::unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- const std::unique_ptr<SortedDataBuilderInterface> builder(
- sorted->getBulkBuilder( opCtx.get(), true ) );
-
- ASSERT_OK( builder->addKey( key1, loc1 ) );
- builder->commit( false );
- }
-
- {
- const std::unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- ASSERT_EQUALS( 1, sorted->numEntries( opCtx.get() ) );
- }
- }
-
- // Add a compound key using a bulk builder.
- TEST( SortedDataInterface, BuilderAddCompoundKey ) {
- const std::unique_ptr<HarnessHelper> harnessHelper( newHarnessHelper() );
- const std::unique_ptr<SortedDataInterface> sorted( harnessHelper->newSortedDataInterface( false ) );
-
- {
- const std::unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- ASSERT( sorted->isEmpty( opCtx.get() ) );
- }
-
- {
- const std::unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- const std::unique_ptr<SortedDataBuilderInterface> builder(
- sorted->getBulkBuilder( opCtx.get(), true ) );
-
- ASSERT_OK( builder->addKey( compoundKey1a, loc1 ) );
- builder->commit( false );
- }
-
- {
- const std::unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- ASSERT_EQUALS( 1, sorted->numEntries( opCtx.get() ) );
- }
- }
-
- // Add the same key multiple times using a bulk builder and verify that
- // the returned status is ErrorCodes::DuplicateKey when duplicates are
- // not allowed.
- TEST( SortedDataInterface, BuilderAddSameKey ) {
- const std::unique_ptr<HarnessHelper> harnessHelper( newHarnessHelper() );
- const std::unique_ptr<SortedDataInterface> sorted( harnessHelper->newSortedDataInterface( true ) );
-
- {
- const std::unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- ASSERT( sorted->isEmpty( opCtx.get() ) );
- }
-
- {
- const std::unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- const std::unique_ptr<SortedDataBuilderInterface> builder(
- sorted->getBulkBuilder( opCtx.get(), false ) );
-
- ASSERT_OK( builder->addKey( key1, loc1 ) );
- ASSERT_EQUALS( ErrorCodes::DuplicateKey, builder->addKey( key1, loc2 ) );
- builder->commit( false );
- }
-
- {
- const std::unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- ASSERT_EQUALS( 1, sorted->numEntries( opCtx.get() ) );
- }
- }
-
- // Add the same key multiple times using a bulk builder and verify that
- // the returned status is OK when duplicates are allowed.
- TEST( SortedDataInterface, BuilderAddSameKeyWithDupsAllowed ) {
- const std::unique_ptr<HarnessHelper> harnessHelper( newHarnessHelper() );
- const std::unique_ptr<SortedDataInterface> sorted( harnessHelper->newSortedDataInterface( false ) );
-
- {
- const std::unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- ASSERT( sorted->isEmpty( opCtx.get() ) );
- }
-
- {
- const std::unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- const std::unique_ptr<SortedDataBuilderInterface> builder(
- sorted->getBulkBuilder( opCtx.get(), true /* allow duplicates */ ) );
-
- ASSERT_OK( builder->addKey( key1, loc1 ) );
- ASSERT_OK( builder->addKey( key1, loc2 ) );
- builder->commit( false );
- }
-
- {
- const std::unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- ASSERT_EQUALS( 2, sorted->numEntries( opCtx.get() ) );
- }
- }
-
- // Add multiple keys using a bulk builder.
- TEST( SortedDataInterface, BuilderAddMultipleKeys ) {
- const std::unique_ptr<HarnessHelper> harnessHelper( newHarnessHelper() );
- const std::unique_ptr<SortedDataInterface> sorted( harnessHelper->newSortedDataInterface( false ) );
-
- {
- const std::unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- ASSERT( sorted->isEmpty( opCtx.get() ) );
- }
-
- {
- const std::unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- const std::unique_ptr<SortedDataBuilderInterface> builder(
- sorted->getBulkBuilder( opCtx.get(), true ) );
-
- ASSERT_OK( builder->addKey( key1, loc1 ) );
- ASSERT_OK( builder->addKey( key2, loc2 ) );
- ASSERT_OK( builder->addKey( key3, loc3 ) );
- builder->commit( false );
- }
-
- {
- const std::unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- ASSERT_EQUALS( 3, sorted->numEntries( opCtx.get() ) );
- }
- }
-
- // Add multiple compound keys using a bulk builder.
- TEST( SortedDataInterface, BuilderAddMultipleCompoundKeys ) {
- const std::unique_ptr<HarnessHelper> harnessHelper( newHarnessHelper() );
- const std::unique_ptr<SortedDataInterface> sorted( harnessHelper->newSortedDataInterface( false ) );
-
- {
- const std::unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- ASSERT( sorted->isEmpty( opCtx.get() ) );
- }
-
- {
- const std::unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- const std::unique_ptr<SortedDataBuilderInterface> builder(
- sorted->getBulkBuilder( opCtx.get(), true ) );
-
- ASSERT_OK( builder->addKey( compoundKey1a, loc1 ) );
- ASSERT_OK( builder->addKey( compoundKey1b, loc2 ) );
- ASSERT_OK( builder->addKey( compoundKey1c, loc4 ) );
- ASSERT_OK( builder->addKey( compoundKey2b, loc3 ) );
- ASSERT_OK( builder->addKey( compoundKey3a, loc5 ) );
- builder->commit( false );
- }
-
- {
- const std::unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- ASSERT_EQUALS( 5, sorted->numEntries( opCtx.get() ) );
- }
- }
-
-} // namespace mongo
+// Add a key using a bulk builder.
+TEST(SortedDataInterface, BuilderAddKey) {
+ const std::unique_ptr<HarnessHelper> harnessHelper(newHarnessHelper());
+ const std::unique_ptr<SortedDataInterface> sorted(harnessHelper->newSortedDataInterface(false));
+
+ {
+ const std::unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
+ ASSERT(sorted->isEmpty(opCtx.get()));
+ }
+
+ {
+ const std::unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
+ const std::unique_ptr<SortedDataBuilderInterface> builder(
+ sorted->getBulkBuilder(opCtx.get(), true));
+
+ ASSERT_OK(builder->addKey(key1, loc1));
+ builder->commit(false);
+ }
+
+ {
+ const std::unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
+ ASSERT_EQUALS(1, sorted->numEntries(opCtx.get()));
+ }
+}
+
+// Add a compound key using a bulk builder.
+TEST(SortedDataInterface, BuilderAddCompoundKey) {
+ const std::unique_ptr<HarnessHelper> harnessHelper(newHarnessHelper());
+ const std::unique_ptr<SortedDataInterface> sorted(harnessHelper->newSortedDataInterface(false));
+
+ {
+ const std::unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
+ ASSERT(sorted->isEmpty(opCtx.get()));
+ }
+
+ {
+ const std::unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
+ const std::unique_ptr<SortedDataBuilderInterface> builder(
+ sorted->getBulkBuilder(opCtx.get(), true));
+
+ ASSERT_OK(builder->addKey(compoundKey1a, loc1));
+ builder->commit(false);
+ }
+
+ {
+ const std::unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
+ ASSERT_EQUALS(1, sorted->numEntries(opCtx.get()));
+ }
+}
+
+// Add the same key multiple times using a bulk builder and verify that
+// the returned status is ErrorCodes::DuplicateKey when duplicates are
+// not allowed.
+TEST(SortedDataInterface, BuilderAddSameKey) {
+ const std::unique_ptr<HarnessHelper> harnessHelper(newHarnessHelper());
+ const std::unique_ptr<SortedDataInterface> sorted(harnessHelper->newSortedDataInterface(true));
+
+ {
+ const std::unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
+ ASSERT(sorted->isEmpty(opCtx.get()));
+ }
+
+ {
+ const std::unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
+ const std::unique_ptr<SortedDataBuilderInterface> builder(
+ sorted->getBulkBuilder(opCtx.get(), false));
+
+ ASSERT_OK(builder->addKey(key1, loc1));
+ ASSERT_EQUALS(ErrorCodes::DuplicateKey, builder->addKey(key1, loc2));
+ builder->commit(false);
+ }
+
+ {
+ const std::unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
+ ASSERT_EQUALS(1, sorted->numEntries(opCtx.get()));
+ }
+}
+
+// Add the same key multiple times using a bulk builder and verify that
+// the returned status is OK when duplicates are allowed.
+TEST(SortedDataInterface, BuilderAddSameKeyWithDupsAllowed) {
+ const std::unique_ptr<HarnessHelper> harnessHelper(newHarnessHelper());
+ const std::unique_ptr<SortedDataInterface> sorted(harnessHelper->newSortedDataInterface(false));
+
+ {
+ const std::unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
+ ASSERT(sorted->isEmpty(opCtx.get()));
+ }
+
+ {
+ const std::unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
+ const std::unique_ptr<SortedDataBuilderInterface> builder(
+ sorted->getBulkBuilder(opCtx.get(), true /* allow duplicates */));
+
+ ASSERT_OK(builder->addKey(key1, loc1));
+ ASSERT_OK(builder->addKey(key1, loc2));
+ builder->commit(false);
+ }
+
+ {
+ const std::unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
+ ASSERT_EQUALS(2, sorted->numEntries(opCtx.get()));
+ }
+}
+
+// Add multiple keys using a bulk builder.
+TEST(SortedDataInterface, BuilderAddMultipleKeys) {
+ const std::unique_ptr<HarnessHelper> harnessHelper(newHarnessHelper());
+ const std::unique_ptr<SortedDataInterface> sorted(harnessHelper->newSortedDataInterface(false));
+
+ {
+ const std::unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
+ ASSERT(sorted->isEmpty(opCtx.get()));
+ }
+
+ {
+ const std::unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
+ const std::unique_ptr<SortedDataBuilderInterface> builder(
+ sorted->getBulkBuilder(opCtx.get(), true));
+
+ ASSERT_OK(builder->addKey(key1, loc1));
+ ASSERT_OK(builder->addKey(key2, loc2));
+ ASSERT_OK(builder->addKey(key3, loc3));
+ builder->commit(false);
+ }
+
+ {
+ const std::unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
+ ASSERT_EQUALS(3, sorted->numEntries(opCtx.get()));
+ }
+}
+
+// Add multiple compound keys using a bulk builder.
+TEST(SortedDataInterface, BuilderAddMultipleCompoundKeys) {
+ const std::unique_ptr<HarnessHelper> harnessHelper(newHarnessHelper());
+ const std::unique_ptr<SortedDataInterface> sorted(harnessHelper->newSortedDataInterface(false));
+
+ {
+ const std::unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
+ ASSERT(sorted->isEmpty(opCtx.get()));
+ }
+
+ {
+ const std::unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
+ const std::unique_ptr<SortedDataBuilderInterface> builder(
+ sorted->getBulkBuilder(opCtx.get(), true));
+
+ ASSERT_OK(builder->addKey(compoundKey1a, loc1));
+ ASSERT_OK(builder->addKey(compoundKey1b, loc2));
+ ASSERT_OK(builder->addKey(compoundKey1c, loc4));
+ ASSERT_OK(builder->addKey(compoundKey2b, loc3));
+ ASSERT_OK(builder->addKey(compoundKey3a, loc5));
+ builder->commit(false);
+ }
+
+ {
+ const std::unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
+ ASSERT_EQUALS(5, sorted->numEntries(opCtx.get()));
+ }
+}
+
+} // namespace mongo
diff --git a/src/mongo/db/storage/sorted_data_interface_test_cursor.cpp b/src/mongo/db/storage/sorted_data_interface_test_cursor.cpp
index d4a99333fdc..3b49ed4bded 100644
--- a/src/mongo/db/storage/sorted_data_interface_test_cursor.cpp
+++ b/src/mongo/db/storage/sorted_data_interface_test_cursor.cpp
@@ -37,130 +37,132 @@
namespace mongo {
- // Verify that a forward cursor is positioned at EOF when the index is empty.
- TEST( SortedDataInterface, CursorIsEOFWhenEmpty ) {
- const std::unique_ptr<HarnessHelper> harnessHelper( newHarnessHelper() );
- const std::unique_ptr<SortedDataInterface> sorted( harnessHelper->newSortedDataInterface( false ) );
+// Verify that a forward cursor is positioned at EOF when the index is empty.
+TEST(SortedDataInterface, CursorIsEOFWhenEmpty) {
+ const std::unique_ptr<HarnessHelper> harnessHelper(newHarnessHelper());
+ const std::unique_ptr<SortedDataInterface> sorted(harnessHelper->newSortedDataInterface(false));
+
+ {
+ const std::unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
+ ASSERT(sorted->isEmpty(opCtx.get()));
+ }
- {
- const std::unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- ASSERT( sorted->isEmpty( opCtx.get() ) );
- }
+ {
+ const std::unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
+ const std::unique_ptr<SortedDataInterface::Cursor> cursor(sorted->newCursor(opCtx.get()));
- {
- const std::unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- const std::unique_ptr<SortedDataInterface::Cursor> cursor( sorted->newCursor(opCtx.get()) );
+ ASSERT(!cursor->seek(minKey, true));
- ASSERT( !cursor->seek(minKey, true) );
-
- // Cursor at EOF should remain at EOF when advanced
- ASSERT( !cursor->next() );
- }
+ // Cursor at EOF should remain at EOF when advanced
+ ASSERT(!cursor->next());
}
+}
- // Verify that a reverse cursor is positioned at EOF when the index is empty.
- TEST( SortedDataInterface, CursorIsEOFWhenEmptyReversed ) {
- const std::unique_ptr<HarnessHelper> harnessHelper( newHarnessHelper() );
- const std::unique_ptr<SortedDataInterface> sorted( harnessHelper->newSortedDataInterface( false ) );
+// Verify that a reverse cursor is positioned at EOF when the index is empty.
+TEST(SortedDataInterface, CursorIsEOFWhenEmptyReversed) {
+ const std::unique_ptr<HarnessHelper> harnessHelper(newHarnessHelper());
+ const std::unique_ptr<SortedDataInterface> sorted(harnessHelper->newSortedDataInterface(false));
- {
- const std::unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- ASSERT( sorted->isEmpty( opCtx.get() ) );
- }
+ {
+ const std::unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
+ ASSERT(sorted->isEmpty(opCtx.get()));
+ }
- {
- const std::unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- const std::unique_ptr<SortedDataInterface::Cursor> cursor( sorted->newCursor(opCtx.get(), false) );
+ {
+ const std::unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
+ const std::unique_ptr<SortedDataInterface::Cursor> cursor(
+ sorted->newCursor(opCtx.get(), false));
- ASSERT( !cursor->seek( maxKey, true ) );
+ ASSERT(!cursor->seek(maxKey, true));
- // Cursor at EOF should remain at EOF when advanced
- ASSERT( !cursor->next() );
- }
+ // Cursor at EOF should remain at EOF when advanced
+ ASSERT(!cursor->next());
}
+}
- // Call advance() on a forward cursor until it is exhausted.
- // When a cursor positioned at EOF is advanced, it stays at EOF.
- TEST( SortedDataInterface, ExhaustCursor ) {
- const std::unique_ptr<HarnessHelper> harnessHelper( newHarnessHelper() );
- const std::unique_ptr<SortedDataInterface> sorted( harnessHelper->newSortedDataInterface( false ) );
+// Call advance() on a forward cursor until it is exhausted.
+// When a cursor positioned at EOF is advanced, it stays at EOF.
+TEST(SortedDataInterface, ExhaustCursor) {
+ const std::unique_ptr<HarnessHelper> harnessHelper(newHarnessHelper());
+ const std::unique_ptr<SortedDataInterface> sorted(harnessHelper->newSortedDataInterface(false));
+ {
+ const std::unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
+ ASSERT(sorted->isEmpty(opCtx.get()));
+ }
+
+ int nToInsert = 10;
+ for (int i = 0; i < nToInsert; i++) {
+ const std::unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
{
- const std::unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- ASSERT( sorted->isEmpty( opCtx.get() ) );
+ WriteUnitOfWork uow(opCtx.get());
+ BSONObj key = BSON("" << i);
+ RecordId loc(42, i * 2);
+ ASSERT_OK(sorted->insert(opCtx.get(), key, loc, true));
+ uow.commit();
}
+ }
- int nToInsert = 10;
- for ( int i = 0; i < nToInsert; i++ ) {
- const std::unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- {
- WriteUnitOfWork uow( opCtx.get() );
- BSONObj key = BSON( "" << i );
- RecordId loc( 42, i * 2 );
- ASSERT_OK( sorted->insert( opCtx.get(), key, loc, true ) );
- uow.commit();
- }
- }
+ {
+ const std::unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
+ ASSERT_EQUALS(nToInsert, sorted->numEntries(opCtx.get()));
+ }
- {
- const std::unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- ASSERT_EQUALS( nToInsert, sorted->numEntries( opCtx.get() ) );
+ {
+ const std::unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
+ const std::unique_ptr<SortedDataInterface::Cursor> cursor(sorted->newCursor(opCtx.get()));
+ for (int i = 0; i < nToInsert; i++) {
+ auto entry = i == 0 ? cursor->seek(minKey, true) : cursor->next();
+ ASSERT_EQ(entry, IndexKeyEntry(BSON("" << i), RecordId(42, i * 2)));
}
+ ASSERT(!cursor->next());
- {
- const std::unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- const std::unique_ptr<SortedDataInterface::Cursor> cursor( sorted->newCursor(opCtx.get()) );
- for ( int i = 0; i < nToInsert; i++ ) {
- auto entry = i == 0 ? cursor->seek(minKey, true) : cursor->next();
- ASSERT_EQ(entry, IndexKeyEntry(BSON("" << i), RecordId(42, i * 2)));
- }
- ASSERT( !cursor->next() );
-
- // Cursor at EOF should remain at EOF when advanced
- ASSERT( !cursor->next() );
- }
+ // Cursor at EOF should remain at EOF when advanced
+ ASSERT(!cursor->next());
}
+}
+
+// Call advance() on a reverse cursor until it is exhausted.
+// When a cursor positioned at EOF is advanced, it stays at EOF.
+TEST(SortedDataInterface, ExhaustCursorReversed) {
+ const std::unique_ptr<HarnessHelper> harnessHelper(newHarnessHelper());
+ const std::unique_ptr<SortedDataInterface> sorted(harnessHelper->newSortedDataInterface(false));
- // Call advance() on a reverse cursor until it is exhausted.
- // When a cursor positioned at EOF is advanced, it stays at EOF.
- TEST( SortedDataInterface, ExhaustCursorReversed ) {
- const std::unique_ptr<HarnessHelper> harnessHelper( newHarnessHelper() );
- const std::unique_ptr<SortedDataInterface> sorted( harnessHelper->newSortedDataInterface( false ) );
+ {
+ const std::unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
+ ASSERT(sorted->isEmpty(opCtx.get()));
+ }
+ int nToInsert = 10;
+ for (int i = 0; i < nToInsert; i++) {
+ const std::unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
{
- const std::unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- ASSERT( sorted->isEmpty( opCtx.get() ) );
+ WriteUnitOfWork uow(opCtx.get());
+ BSONObj key = BSON("" << i);
+ RecordId loc(42, i * 2);
+ ASSERT_OK(sorted->insert(opCtx.get(), key, loc, true));
+ uow.commit();
}
+ }
- int nToInsert = 10;
- for ( int i = 0; i < nToInsert; i++ ) {
- const std::unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- {
- WriteUnitOfWork uow( opCtx.get() );
- BSONObj key = BSON( "" << i );
- RecordId loc( 42, i * 2 );
- ASSERT_OK( sorted->insert( opCtx.get(), key, loc, true ) );
- uow.commit();
- }
- }
+ {
+ const std::unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
+ ASSERT_EQUALS(nToInsert, sorted->numEntries(opCtx.get()));
+ }
- {
- const std::unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- ASSERT_EQUALS( nToInsert, sorted->numEntries( opCtx.get() ) );
+ {
+ const std::unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
+ const std::unique_ptr<SortedDataInterface::Cursor> cursor(
+ sorted->newCursor(opCtx.get(), false));
+ for (int i = nToInsert - 1; i >= 0; i--) {
+ auto entry = (i == nToInsert - 1) ? cursor->seek(maxKey, true) : cursor->next();
+ ASSERT_EQ(entry, IndexKeyEntry(BSON("" << i), RecordId(42, i * 2)));
}
+ ASSERT(!cursor->next());
- {
- const std::unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- const std::unique_ptr<SortedDataInterface::Cursor> cursor( sorted->newCursor(opCtx.get(), false) );
- for ( int i = nToInsert - 1; i >= 0; i-- ) {
- auto entry = (i == nToInsert - 1) ? cursor->seek(maxKey, true) : cursor->next();
- ASSERT_EQ(entry, IndexKeyEntry(BSON("" << i), RecordId(42, i * 2)));
- }
- ASSERT( !cursor->next() );
-
- // Cursor at EOF should remain at EOF when advanced
- ASSERT( !cursor->next() );
- }
+ // Cursor at EOF should remain at EOF when advanced
+ ASSERT(!cursor->next());
}
+}
-} // namespace mongo
+} // namespace mongo
diff --git a/src/mongo/db/storage/sorted_data_interface_test_cursor_advanceto.cpp b/src/mongo/db/storage/sorted_data_interface_test_cursor_advanceto.cpp
index 2b094626ac6..d36e84a8a66 100644
--- a/src/mongo/db/storage/sorted_data_interface_test_cursor_advanceto.cpp
+++ b/src/mongo/db/storage/sorted_data_interface_test_cursor_advanceto.cpp
@@ -37,614 +37,620 @@
namespace mongo {
- // Insert multiple single-field keys and advance to each of them
- // using a forward cursor by specifying their exact key. When
- // advanceTo() is called on a duplicate key, the cursor is
- // positioned at the first occurrence of that key in ascending
- // order by RecordId.
- TEST( SortedDataInterface, AdvanceTo ) {
- const std::unique_ptr<HarnessHelper> harnessHelper( newHarnessHelper() );
- const std::unique_ptr<SortedDataInterface> sorted( harnessHelper->newSortedDataInterface( false ) );
-
- {
- const std::unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- ASSERT( sorted->isEmpty( opCtx.get() ) );
- }
+// Insert multiple single-field keys and advance to each of them
+// using a forward cursor by specifying their exact key. When
+// advanceTo() is called on a duplicate key, the cursor is
+// positioned at the first occurrence of that key in ascending
+// order by RecordId.
+TEST(SortedDataInterface, AdvanceTo) {
+ const std::unique_ptr<HarnessHelper> harnessHelper(newHarnessHelper());
+ const std::unique_ptr<SortedDataInterface> sorted(harnessHelper->newSortedDataInterface(false));
+
+ {
+ const std::unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
+ ASSERT(sorted->isEmpty(opCtx.get()));
+ }
+ {
+ const std::unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
{
- const std::unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- {
- WriteUnitOfWork uow( opCtx.get() );
- ASSERT_OK( sorted->insert( opCtx.get(), key1, loc1, true ) );
- ASSERT_OK( sorted->insert( opCtx.get(), key1, loc2, true /* allow duplicates */ ) );
- ASSERT_OK( sorted->insert( opCtx.get(), key1, loc3, true /* allow duplicates */ ) );
- ASSERT_OK( sorted->insert( opCtx.get(), key2, loc4, true ) );
- ASSERT_OK( sorted->insert( opCtx.get(), key3, loc5, true ) );
- uow.commit();
- }
+ WriteUnitOfWork uow(opCtx.get());
+ ASSERT_OK(sorted->insert(opCtx.get(), key1, loc1, true));
+ ASSERT_OK(sorted->insert(opCtx.get(), key1, loc2, true /* allow duplicates */));
+ ASSERT_OK(sorted->insert(opCtx.get(), key1, loc3, true /* allow duplicates */));
+ ASSERT_OK(sorted->insert(opCtx.get(), key2, loc4, true));
+ ASSERT_OK(sorted->insert(opCtx.get(), key3, loc5, true));
+ uow.commit();
}
+ }
- {
- const std::unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- ASSERT_EQUALS( 5, sorted->numEntries( opCtx.get() ) );
- }
+ {
+ const std::unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
+ ASSERT_EQUALS(5, sorted->numEntries(opCtx.get()));
+ }
- {
- const std::unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- const std::unique_ptr<SortedDataInterface::Cursor> cursor( sorted->newCursor(opCtx.get()) );
+ {
+ const std::unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
+ const std::unique_ptr<SortedDataInterface::Cursor> cursor(sorted->newCursor(opCtx.get()));
- ASSERT_EQ(cursor->seek(key1, true), IndexKeyEntry(key1, loc1));
+ ASSERT_EQ(cursor->seek(key1, true), IndexKeyEntry(key1, loc1));
- IndexSeekPoint seekPoint;
- seekPoint.keyPrefix = key1;
- seekPoint.prefixLen = 1;
- seekPoint.prefixExclusive = false;
- ASSERT_EQ(cursor->seek(seekPoint), IndexKeyEntry(key1, loc1));
+ IndexSeekPoint seekPoint;
+ seekPoint.keyPrefix = key1;
+ seekPoint.prefixLen = 1;
+ seekPoint.prefixExclusive = false;
+ ASSERT_EQ(cursor->seek(seekPoint), IndexKeyEntry(key1, loc1));
- seekPoint.keyPrefix = key2;
- ASSERT_EQ(cursor->seek(seekPoint), IndexKeyEntry(key2, loc4));
+ seekPoint.keyPrefix = key2;
+ ASSERT_EQ(cursor->seek(seekPoint), IndexKeyEntry(key2, loc4));
- seekPoint.keyPrefix = key3;
- ASSERT_EQ(cursor->seek(seekPoint), IndexKeyEntry(key3, loc5));
+ seekPoint.keyPrefix = key3;
+ ASSERT_EQ(cursor->seek(seekPoint), IndexKeyEntry(key3, loc5));
- seekPoint.keyPrefix = key4;
- ASSERT_EQ(cursor->seek(seekPoint), boost::none);
- }
+ seekPoint.keyPrefix = key4;
+ ASSERT_EQ(cursor->seek(seekPoint), boost::none);
+ }
+}
+
+// Insert multiple single-field keys and advance to each of them
+// using a reverse cursor by specifying their exact key. When
+// advanceTo() is called on a duplicate key, the cursor is
+// positioned at the first occurrence of that key in descending
+// order by RecordId (last occurrence in index order).
+TEST(SortedDataInterface, AdvanceToReversed) {
+ const std::unique_ptr<HarnessHelper> harnessHelper(newHarnessHelper());
+ const std::unique_ptr<SortedDataInterface> sorted(harnessHelper->newSortedDataInterface(false));
+
+ {
+ const std::unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
+ ASSERT(sorted->isEmpty(opCtx.get()));
}
- // Insert multiple single-field keys and advance to each of them
- // using a reverse cursor by specifying their exact key. When
- // advanceTo() is called on a duplicate key, the cursor is
- // positioned at the first occurrence of that key in descending
- // order by RecordId (last occurrence in index order).
- TEST( SortedDataInterface, AdvanceToReversed ) {
- const std::unique_ptr<HarnessHelper> harnessHelper( newHarnessHelper() );
- const std::unique_ptr<SortedDataInterface> sorted( harnessHelper->newSortedDataInterface( false ) );
-
- {
- const std::unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- ASSERT( sorted->isEmpty( opCtx.get() ) );
- }
-
+ {
+ const std::unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
{
- const std::unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- {
- WriteUnitOfWork uow( opCtx.get() );
- ASSERT_OK( sorted->insert( opCtx.get(), key1, loc1, true ) );
- ASSERT_OK( sorted->insert( opCtx.get(), key2, loc2, true ) );
- ASSERT_OK( sorted->insert( opCtx.get(), key3, loc3, true ) );
- ASSERT_OK( sorted->insert( opCtx.get(), key3, loc4, true /* allow duplicates */ ) );
- ASSERT_OK( sorted->insert( opCtx.get(), key3, loc5, true /* allow duplicates */ ) );
- uow.commit();
- }
+ WriteUnitOfWork uow(opCtx.get());
+ ASSERT_OK(sorted->insert(opCtx.get(), key1, loc1, true));
+ ASSERT_OK(sorted->insert(opCtx.get(), key2, loc2, true));
+ ASSERT_OK(sorted->insert(opCtx.get(), key3, loc3, true));
+ ASSERT_OK(sorted->insert(opCtx.get(), key3, loc4, true /* allow duplicates */));
+ ASSERT_OK(sorted->insert(opCtx.get(), key3, loc5, true /* allow duplicates */));
+ uow.commit();
}
+ }
- {
- const std::unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- ASSERT_EQUALS( 5, sorted->numEntries( opCtx.get() ) );
- }
+ {
+ const std::unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
+ ASSERT_EQUALS(5, sorted->numEntries(opCtx.get()));
+ }
- {
- const std::unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- const std::unique_ptr<SortedDataInterface::Cursor> cursor( sorted->newCursor(opCtx.get(), false) );
+ {
+ const std::unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
+ const std::unique_ptr<SortedDataInterface::Cursor> cursor(
+ sorted->newCursor(opCtx.get(), false));
- ASSERT_EQ(cursor->seek(key3, true), IndexKeyEntry(key3, loc5));
+ ASSERT_EQ(cursor->seek(key3, true), IndexKeyEntry(key3, loc5));
- IndexSeekPoint seekPoint;
- seekPoint.keyPrefix = key3;
- seekPoint.prefixLen = 1;
- seekPoint.prefixExclusive = false;
- ASSERT_EQ(cursor->seek(seekPoint), IndexKeyEntry(key3, loc5));
+ IndexSeekPoint seekPoint;
+ seekPoint.keyPrefix = key3;
+ seekPoint.prefixLen = 1;
+ seekPoint.prefixExclusive = false;
+ ASSERT_EQ(cursor->seek(seekPoint), IndexKeyEntry(key3, loc5));
- seekPoint.keyPrefix = key2;
- ASSERT_EQ(cursor->seek(seekPoint), IndexKeyEntry(key2, loc2));
+ seekPoint.keyPrefix = key2;
+ ASSERT_EQ(cursor->seek(seekPoint), IndexKeyEntry(key2, loc2));
- seekPoint.keyPrefix = key1;
- ASSERT_EQ(cursor->seek(seekPoint), IndexKeyEntry(key1, loc1));
+ seekPoint.keyPrefix = key1;
+ ASSERT_EQ(cursor->seek(seekPoint), IndexKeyEntry(key1, loc1));
- seekPoint.keyPrefix = key0;
- ASSERT_EQ(cursor->seek(seekPoint), boost::none);
- }
+ seekPoint.keyPrefix = key0;
+ ASSERT_EQ(cursor->seek(seekPoint), boost::none);
+ }
+}
+
+// Insert two single-field keys, then seek a forward cursor to the larger one then seek behind
+// the smaller one. Ending position is on the smaller one since a seek describes where to go
+// and should not be effected by current position.
+TEST(SortedDataInterface, AdvanceToKeyBeforeCursorPosition) {
+ const std::unique_ptr<HarnessHelper> harnessHelper(newHarnessHelper());
+ const std::unique_ptr<SortedDataInterface> sorted(harnessHelper->newSortedDataInterface(false));
+
+ {
+ const std::unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
+ ASSERT(sorted->isEmpty(opCtx.get()));
}
- // Insert two single-field keys, then seek a forward cursor to the larger one then seek behind
- // the smaller one. Ending position is on the smaller one since a seek describes where to go
- // and should not be effected by current position.
- TEST( SortedDataInterface, AdvanceToKeyBeforeCursorPosition ) {
- const std::unique_ptr<HarnessHelper> harnessHelper( newHarnessHelper() );
- const std::unique_ptr<SortedDataInterface> sorted( harnessHelper->newSortedDataInterface( false ) );
-
- {
- const std::unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- ASSERT( sorted->isEmpty( opCtx.get() ) );
- }
-
+ {
+ const std::unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
{
- const std::unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- {
- WriteUnitOfWork uow( opCtx.get() );
- ASSERT_OK( sorted->insert( opCtx.get(), key1, loc1, true ) );
- ASSERT_OK( sorted->insert( opCtx.get(), key2, loc2, true ) );
- uow.commit();
- }
+ WriteUnitOfWork uow(opCtx.get());
+ ASSERT_OK(sorted->insert(opCtx.get(), key1, loc1, true));
+ ASSERT_OK(sorted->insert(opCtx.get(), key2, loc2, true));
+ uow.commit();
}
+ }
- {
- const std::unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- ASSERT_EQUALS( 2, sorted->numEntries( opCtx.get() ) );
- }
+ {
+ const std::unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
+ ASSERT_EQUALS(2, sorted->numEntries(opCtx.get()));
+ }
- {
- const std::unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- const std::unique_ptr<SortedDataInterface::Cursor> cursor( sorted->newCursor(opCtx.get()) );
+ {
+ const std::unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
+ const std::unique_ptr<SortedDataInterface::Cursor> cursor(sorted->newCursor(opCtx.get()));
- ASSERT_EQ(cursor->seek(key1, true), IndexKeyEntry(key1, loc1));
+ ASSERT_EQ(cursor->seek(key1, true), IndexKeyEntry(key1, loc1));
- IndexSeekPoint seekPoint;
- seekPoint.keyPrefix = key0;
- seekPoint.prefixLen = 1;
- seekPoint.prefixExclusive = false;
- ASSERT_EQ(cursor->seek(seekPoint), IndexKeyEntry(key1, loc1));
+ IndexSeekPoint seekPoint;
+ seekPoint.keyPrefix = key0;
+ seekPoint.prefixLen = 1;
+ seekPoint.prefixExclusive = false;
+ ASSERT_EQ(cursor->seek(seekPoint), IndexKeyEntry(key1, loc1));
- seekPoint.prefixExclusive = true;
- ASSERT_EQ(cursor->seek(seekPoint), IndexKeyEntry(key1, loc1));
- }
+ seekPoint.prefixExclusive = true;
+ ASSERT_EQ(cursor->seek(seekPoint), IndexKeyEntry(key1, loc1));
+ }
+}
+
+// Insert two single-field keys, then seek a reverse cursor to the smaller one then seek behind
+// the larger one. Ending position is on the larger one since a seek describes where to go
+// and should not be effected by current position.
+TEST(SortedDataInterface, AdvanceToKeyAfterCursorPositionReversed) {
+ const std::unique_ptr<HarnessHelper> harnessHelper(newHarnessHelper());
+ const std::unique_ptr<SortedDataInterface> sorted(harnessHelper->newSortedDataInterface(false));
+
+ {
+ const std::unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
+ ASSERT(sorted->isEmpty(opCtx.get()));
}
- // Insert two single-field keys, then seek a reverse cursor to the smaller one then seek behind
- // the larger one. Ending position is on the larger one since a seek describes where to go
- // and should not be effected by current position.
- TEST( SortedDataInterface, AdvanceToKeyAfterCursorPositionReversed ) {
- const std::unique_ptr<HarnessHelper> harnessHelper( newHarnessHelper() );
- const std::unique_ptr<SortedDataInterface> sorted( harnessHelper->newSortedDataInterface( false ) );
-
- {
- const std::unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- ASSERT( sorted->isEmpty( opCtx.get() ) );
- }
-
+ {
+ const std::unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
{
- const std::unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- {
- WriteUnitOfWork uow( opCtx.get() );
- ASSERT_OK( sorted->insert( opCtx.get(), key1, loc1, true ) );
- ASSERT_OK( sorted->insert( opCtx.get(), key2, loc2, true ) );
- uow.commit();
- }
+ WriteUnitOfWork uow(opCtx.get());
+ ASSERT_OK(sorted->insert(opCtx.get(), key1, loc1, true));
+ ASSERT_OK(sorted->insert(opCtx.get(), key2, loc2, true));
+ uow.commit();
}
+ }
- {
- const std::unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- ASSERT_EQUALS( 2, sorted->numEntries( opCtx.get() ) );
- }
+ {
+ const std::unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
+ ASSERT_EQUALS(2, sorted->numEntries(opCtx.get()));
+ }
- {
- const std::unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- const std::unique_ptr<SortedDataInterface::Cursor> cursor( sorted->newCursor(opCtx.get(), false) );
+ {
+ const std::unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
+ const std::unique_ptr<SortedDataInterface::Cursor> cursor(
+ sorted->newCursor(opCtx.get(), false));
- ASSERT_EQ(cursor->seek(key2, true), IndexKeyEntry(key2, loc2));
+ ASSERT_EQ(cursor->seek(key2, true), IndexKeyEntry(key2, loc2));
- IndexSeekPoint seekPoint;
- seekPoint.keyPrefix = key3;
- seekPoint.prefixLen = 1;
- seekPoint.prefixExclusive = false;
- ASSERT_EQ(cursor->seek(seekPoint), IndexKeyEntry(key2, loc2));
+ IndexSeekPoint seekPoint;
+ seekPoint.keyPrefix = key3;
+ seekPoint.prefixLen = 1;
+ seekPoint.prefixExclusive = false;
+ ASSERT_EQ(cursor->seek(seekPoint), IndexKeyEntry(key2, loc2));
- seekPoint.prefixExclusive = true;
- ASSERT_EQ(cursor->seek(seekPoint), IndexKeyEntry(key2, loc2));
- }
+ seekPoint.prefixExclusive = true;
+ ASSERT_EQ(cursor->seek(seekPoint), IndexKeyEntry(key2, loc2));
+ }
+}
+
+// Insert a single-field key and advance to EOF using a forward cursor
+// by specifying that exact key. When seek() is called with the key
+// where the cursor is positioned (and it is the first entry for that key),
+// the cursor should remain at its current position. An exclusive seek will
+// position the cursor on the next position, which may be EOF.
+TEST(SortedDataInterface, AdvanceToKeyAtCursorPosition) {
+ const std::unique_ptr<HarnessHelper> harnessHelper(newHarnessHelper());
+ const std::unique_ptr<SortedDataInterface> sorted(harnessHelper->newSortedDataInterface(false));
+
+ {
+ const std::unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
+ ASSERT(sorted->isEmpty(opCtx.get()));
}
- // Insert a single-field key and advance to EOF using a forward cursor
- // by specifying that exact key. When seek() is called with the key
- // where the cursor is positioned (and it is the first entry for that key),
- // the cursor should remain at its current position. An exclusive seek will
- // position the cursor on the next position, which may be EOF.
- TEST( SortedDataInterface, AdvanceToKeyAtCursorPosition ) {
- const std::unique_ptr<HarnessHelper> harnessHelper( newHarnessHelper() );
- const std::unique_ptr<SortedDataInterface> sorted( harnessHelper->newSortedDataInterface( false ) );
-
- {
- const std::unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- ASSERT( sorted->isEmpty( opCtx.get() ) );
- }
-
+ {
+ const std::unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
{
- const std::unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- {
- WriteUnitOfWork uow( opCtx.get() );
- ASSERT_OK( sorted->insert( opCtx.get(), key1, loc1, true ) );
- uow.commit();
- }
+ WriteUnitOfWork uow(opCtx.get());
+ ASSERT_OK(sorted->insert(opCtx.get(), key1, loc1, true));
+ uow.commit();
}
+ }
- {
- const std::unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- ASSERT_EQUALS( 1, sorted->numEntries( opCtx.get() ) );
- }
+ {
+ const std::unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
+ ASSERT_EQUALS(1, sorted->numEntries(opCtx.get()));
+ }
- {
- const std::unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- const std::unique_ptr<SortedDataInterface::Cursor> cursor( sorted->newCursor(opCtx.get()) );
+ {
+ const std::unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
+ const std::unique_ptr<SortedDataInterface::Cursor> cursor(sorted->newCursor(opCtx.get()));
- ASSERT_EQ(cursor->seek(key1, true), IndexKeyEntry(key1, loc1));
+ ASSERT_EQ(cursor->seek(key1, true), IndexKeyEntry(key1, loc1));
- IndexSeekPoint seekPoint;
- seekPoint.keyPrefix = key1;
- seekPoint.prefixLen = 1;
- seekPoint.prefixExclusive = false;
- ASSERT_EQ(cursor->seek(seekPoint), IndexKeyEntry(key1, loc1));
+ IndexSeekPoint seekPoint;
+ seekPoint.keyPrefix = key1;
+ seekPoint.prefixLen = 1;
+ seekPoint.prefixExclusive = false;
+ ASSERT_EQ(cursor->seek(seekPoint), IndexKeyEntry(key1, loc1));
- seekPoint.prefixExclusive = true;
- ASSERT_EQ(cursor->seek(seekPoint), boost::none);
- }
+ seekPoint.prefixExclusive = true;
+ ASSERT_EQ(cursor->seek(seekPoint), boost::none);
+ }
+}
+
+// Insert a single-field key and advance to EOF using a reverse cursor
+// by specifying that exact key. When seek() is called with the key
+// where the cursor is positioned (and it is the first entry for that key),
+// the cursor should remain at its current position. An exclusive seek will
+// position the cursor on the next position, which may be EOF.
+TEST(SortedDataInterface, AdvanceToKeyAtCursorPositionReversed) {
+ const std::unique_ptr<HarnessHelper> harnessHelper(newHarnessHelper());
+ const std::unique_ptr<SortedDataInterface> sorted(harnessHelper->newSortedDataInterface(false));
+
+ {
+ const std::unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
+ ASSERT(sorted->isEmpty(opCtx.get()));
}
- // Insert a single-field key and advance to EOF using a reverse cursor
- // by specifying that exact key. When seek() is called with the key
- // where the cursor is positioned (and it is the first entry for that key),
- // the cursor should remain at its current position. An exclusive seek will
- // position the cursor on the next position, which may be EOF.
- TEST( SortedDataInterface, AdvanceToKeyAtCursorPositionReversed ) {
- const std::unique_ptr<HarnessHelper> harnessHelper( newHarnessHelper() );
- const std::unique_ptr<SortedDataInterface> sorted( harnessHelper->newSortedDataInterface( false ) );
-
- {
- const std::unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- ASSERT( sorted->isEmpty( opCtx.get() ) );
- }
-
+ {
+ const std::unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
{
- const std::unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- {
- WriteUnitOfWork uow( opCtx.get() );
- ASSERT_OK( sorted->insert( opCtx.get(), key1, loc1, true ) );
- uow.commit();
- }
+ WriteUnitOfWork uow(opCtx.get());
+ ASSERT_OK(sorted->insert(opCtx.get(), key1, loc1, true));
+ uow.commit();
}
+ }
- {
- const std::unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- ASSERT_EQUALS( 1, sorted->numEntries( opCtx.get() ) );
- }
+ {
+ const std::unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
+ ASSERT_EQUALS(1, sorted->numEntries(opCtx.get()));
+ }
- {
- const std::unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- const std::unique_ptr<SortedDataInterface::Cursor> cursor( sorted->newCursor(opCtx.get(), false) );
+ {
+ const std::unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
+ const std::unique_ptr<SortedDataInterface::Cursor> cursor(
+ sorted->newCursor(opCtx.get(), false));
- ASSERT_EQ(cursor->seek(key1, true), IndexKeyEntry(key1, loc1));
+ ASSERT_EQ(cursor->seek(key1, true), IndexKeyEntry(key1, loc1));
- IndexSeekPoint seekPoint;
- seekPoint.keyPrefix = key1;
- seekPoint.prefixLen = 1;
- seekPoint.prefixExclusive = false;
- ASSERT_EQ(cursor->seek(seekPoint), IndexKeyEntry(key1, loc1));
+ IndexSeekPoint seekPoint;
+ seekPoint.keyPrefix = key1;
+ seekPoint.prefixLen = 1;
+ seekPoint.prefixExclusive = false;
+ ASSERT_EQ(cursor->seek(seekPoint), IndexKeyEntry(key1, loc1));
- seekPoint.prefixExclusive = true;
- ASSERT_EQ(cursor->seek(seekPoint), boost::none);
- }
+ seekPoint.prefixExclusive = true;
+ ASSERT_EQ(cursor->seek(seekPoint), boost::none);
+ }
+}
+
+// Insert multiple single-field keys and advance to each of them using
+// a forward cursor by specifying a key that comes immediately before.
+// When advanceTo() is called in non-inclusive mode, the cursor is
+// positioned at the key that comes after the one specified.
+TEST(SortedDataInterface, AdvanceToExclusive) {
+ const std::unique_ptr<HarnessHelper> harnessHelper(newHarnessHelper());
+ const std::unique_ptr<SortedDataInterface> sorted(harnessHelper->newSortedDataInterface(false));
+
+ {
+ const std::unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
+ ASSERT(sorted->isEmpty(opCtx.get()));
}
- // Insert multiple single-field keys and advance to each of them using
- // a forward cursor by specifying a key that comes immediately before.
- // When advanceTo() is called in non-inclusive mode, the cursor is
- // positioned at the key that comes after the one specified.
- TEST( SortedDataInterface, AdvanceToExclusive ) {
- const std::unique_ptr<HarnessHelper> harnessHelper( newHarnessHelper() );
- const std::unique_ptr<SortedDataInterface> sorted( harnessHelper->newSortedDataInterface( false ) );
-
- {
- const std::unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- ASSERT( sorted->isEmpty( opCtx.get() ) );
- }
-
+ {
+ const std::unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
{
- const std::unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- {
- WriteUnitOfWork uow( opCtx.get() );
- ASSERT_OK( sorted->insert( opCtx.get(), key1, loc1, true ) );
- ASSERT_OK( sorted->insert( opCtx.get(), key1, loc2, true /* allow duplicates */ ) );
- ASSERT_OK( sorted->insert( opCtx.get(), key1, loc3, true /* allow duplicates */ ) );
- ASSERT_OK( sorted->insert( opCtx.get(), key2, loc4, true ) );
- ASSERT_OK( sorted->insert( opCtx.get(), key3, loc5, true ) );
- uow.commit();
- }
+ WriteUnitOfWork uow(opCtx.get());
+ ASSERT_OK(sorted->insert(opCtx.get(), key1, loc1, true));
+ ASSERT_OK(sorted->insert(opCtx.get(), key1, loc2, true /* allow duplicates */));
+ ASSERT_OK(sorted->insert(opCtx.get(), key1, loc3, true /* allow duplicates */));
+ ASSERT_OK(sorted->insert(opCtx.get(), key2, loc4, true));
+ ASSERT_OK(sorted->insert(opCtx.get(), key3, loc5, true));
+ uow.commit();
}
+ }
- {
- const std::unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- ASSERT_EQUALS( 5, sorted->numEntries( opCtx.get() ) );
- }
+ {
+ const std::unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
+ ASSERT_EQUALS(5, sorted->numEntries(opCtx.get()));
+ }
- {
- const std::unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- const std::unique_ptr<SortedDataInterface::Cursor> cursor( sorted->newCursor(opCtx.get()) );
+ {
+ const std::unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
+ const std::unique_ptr<SortedDataInterface::Cursor> cursor(sorted->newCursor(opCtx.get()));
- ASSERT_EQ(cursor->seek(key1, true), IndexKeyEntry(key1, loc1));
+ ASSERT_EQ(cursor->seek(key1, true), IndexKeyEntry(key1, loc1));
- IndexSeekPoint seekPoint;
- seekPoint.keyPrefix = key1;
- seekPoint.prefixLen = 1;
- seekPoint.prefixExclusive = true;
- ASSERT_EQ(cursor->seek(seekPoint), IndexKeyEntry(key2, loc4));
+ IndexSeekPoint seekPoint;
+ seekPoint.keyPrefix = key1;
+ seekPoint.prefixLen = 1;
+ seekPoint.prefixExclusive = true;
+ ASSERT_EQ(cursor->seek(seekPoint), IndexKeyEntry(key2, loc4));
- seekPoint.keyPrefix = key2;
- ASSERT_EQ(cursor->seek(seekPoint), IndexKeyEntry(key3, loc5));
+ seekPoint.keyPrefix = key2;
+ ASSERT_EQ(cursor->seek(seekPoint), IndexKeyEntry(key3, loc5));
- seekPoint.keyPrefix = key3;
- ASSERT_EQ(cursor->seek(seekPoint), boost::none);
+ seekPoint.keyPrefix = key3;
+ ASSERT_EQ(cursor->seek(seekPoint), boost::none);
- seekPoint.keyPrefix = key4;
- ASSERT_EQ(cursor->seek(seekPoint), boost::none);
- }
+ seekPoint.keyPrefix = key4;
+ ASSERT_EQ(cursor->seek(seekPoint), boost::none);
+ }
+}
+
+// Insert multiple single-field keys and advance to each of them using
+// a reverse cursor by specifying a key that comes immediately after.
+// When advanceTo() is called in non-inclusive mode, the cursor is
+// positioned at the key that comes before the one specified.
+TEST(SortedDataInterface, AdvanceToExclusiveReversed) {
+ const std::unique_ptr<HarnessHelper> harnessHelper(newHarnessHelper());
+ const std::unique_ptr<SortedDataInterface> sorted(harnessHelper->newSortedDataInterface(false));
+
+ {
+ const std::unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
+ ASSERT(sorted->isEmpty(opCtx.get()));
}
- // Insert multiple single-field keys and advance to each of them using
- // a reverse cursor by specifying a key that comes immediately after.
- // When advanceTo() is called in non-inclusive mode, the cursor is
- // positioned at the key that comes before the one specified.
- TEST( SortedDataInterface, AdvanceToExclusiveReversed ) {
- const std::unique_ptr<HarnessHelper> harnessHelper( newHarnessHelper() );
- const std::unique_ptr<SortedDataInterface> sorted( harnessHelper->newSortedDataInterface( false ) );
-
- {
- const std::unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- ASSERT( sorted->isEmpty( opCtx.get() ) );
- }
-
+ {
+ const std::unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
{
- const std::unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- {
- WriteUnitOfWork uow( opCtx.get() );
- ASSERT_OK( sorted->insert( opCtx.get(), key1, loc1, true ) );
- ASSERT_OK( sorted->insert( opCtx.get(), key2, loc2, true ) );
- ASSERT_OK( sorted->insert( opCtx.get(), key3, loc3, true ) );
- ASSERT_OK( sorted->insert( opCtx.get(), key3, loc4, true /* allow duplicates */ ) );
- ASSERT_OK( sorted->insert( opCtx.get(), key3, loc5, true /* allow duplicates */ ) );
- uow.commit();
- }
+ WriteUnitOfWork uow(opCtx.get());
+ ASSERT_OK(sorted->insert(opCtx.get(), key1, loc1, true));
+ ASSERT_OK(sorted->insert(opCtx.get(), key2, loc2, true));
+ ASSERT_OK(sorted->insert(opCtx.get(), key3, loc3, true));
+ ASSERT_OK(sorted->insert(opCtx.get(), key3, loc4, true /* allow duplicates */));
+ ASSERT_OK(sorted->insert(opCtx.get(), key3, loc5, true /* allow duplicates */));
+ uow.commit();
}
+ }
- {
- const std::unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- ASSERT_EQUALS( 5, sorted->numEntries( opCtx.get() ) );
- }
+ {
+ const std::unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
+ ASSERT_EQUALS(5, sorted->numEntries(opCtx.get()));
+ }
- {
- const std::unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- const std::unique_ptr<SortedDataInterface::Cursor> cursor( sorted->newCursor(opCtx.get(), false) );
+ {
+ const std::unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
+ const std::unique_ptr<SortedDataInterface::Cursor> cursor(
+ sorted->newCursor(opCtx.get(), false));
- ASSERT_EQ(cursor->seek(key3, true), IndexKeyEntry(key3, loc5));
+ ASSERT_EQ(cursor->seek(key3, true), IndexKeyEntry(key3, loc5));
- IndexSeekPoint seekPoint;
- seekPoint.keyPrefix = key3;
- seekPoint.prefixLen = 1;
- seekPoint.prefixExclusive = true;
- ASSERT_EQ(cursor->seek(seekPoint), IndexKeyEntry(key2, loc2));
+ IndexSeekPoint seekPoint;
+ seekPoint.keyPrefix = key3;
+ seekPoint.prefixLen = 1;
+ seekPoint.prefixExclusive = true;
+ ASSERT_EQ(cursor->seek(seekPoint), IndexKeyEntry(key2, loc2));
- seekPoint.keyPrefix = key2;
- ASSERT_EQ(cursor->seek(seekPoint), IndexKeyEntry(key1, loc1));
+ seekPoint.keyPrefix = key2;
+ ASSERT_EQ(cursor->seek(seekPoint), IndexKeyEntry(key1, loc1));
- seekPoint.keyPrefix = key1;
- ASSERT_EQ(cursor->seek(seekPoint), boost::none);
+ seekPoint.keyPrefix = key1;
+ ASSERT_EQ(cursor->seek(seekPoint), boost::none);
- seekPoint.keyPrefix = key0;
- ASSERT_EQ(cursor->seek(seekPoint), boost::none);
- }
+ seekPoint.keyPrefix = key0;
+ ASSERT_EQ(cursor->seek(seekPoint), boost::none);
}
+}
- // Insert multiple, non-consecutive, single-field keys and advance to
- // each of them using a forward cursor by specifying a key between their
- // exact key and the current position of the cursor.
- TEST( SortedDataInterface, AdvanceToIndirect ) {
- const std::unique_ptr<HarnessHelper> harnessHelper( newHarnessHelper() );
- const std::unique_ptr<SortedDataInterface> sorted( harnessHelper->newSortedDataInterface( false ) );
+// Insert multiple, non-consecutive, single-field keys and advance to
+// each of them using a forward cursor by specifying a key between their
+// exact key and the current position of the cursor.
+TEST(SortedDataInterface, AdvanceToIndirect) {
+ const std::unique_ptr<HarnessHelper> harnessHelper(newHarnessHelper());
+ const std::unique_ptr<SortedDataInterface> sorted(harnessHelper->newSortedDataInterface(false));
- BSONObj unusedKey = key6; // larger than any inserted key
+ BSONObj unusedKey = key6; // larger than any inserted key
- {
- const std::unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- ASSERT( sorted->isEmpty( opCtx.get() ) );
- }
+ {
+ const std::unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
+ ASSERT(sorted->isEmpty(opCtx.get()));
+ }
+ {
+ const std::unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
{
- const std::unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- {
- WriteUnitOfWork uow( opCtx.get() );
- ASSERT_OK( sorted->insert( opCtx.get(), key1, loc1, true ) );
- ASSERT_OK( sorted->insert( opCtx.get(), key3, loc2, true ) );
- ASSERT_OK( sorted->insert( opCtx.get(), key5, loc3, true ) );
- uow.commit();
- }
+ WriteUnitOfWork uow(opCtx.get());
+ ASSERT_OK(sorted->insert(opCtx.get(), key1, loc1, true));
+ ASSERT_OK(sorted->insert(opCtx.get(), key3, loc2, true));
+ ASSERT_OK(sorted->insert(opCtx.get(), key5, loc3, true));
+ uow.commit();
}
+ }
- {
- const std::unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- ASSERT_EQUALS( 3, sorted->numEntries( opCtx.get() ) );
- }
+ {
+ const std::unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
+ ASSERT_EQUALS(3, sorted->numEntries(opCtx.get()));
+ }
- {
- const std::unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- const std::unique_ptr<SortedDataInterface::Cursor> cursor( sorted->newCursor(opCtx.get()) );
+ {
+ const std::unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
+ const std::unique_ptr<SortedDataInterface::Cursor> cursor(sorted->newCursor(opCtx.get()));
- ASSERT_EQ(cursor->seek(key1, true), IndexKeyEntry(key1, loc1));
+ ASSERT_EQ(cursor->seek(key1, true), IndexKeyEntry(key1, loc1));
- IndexSeekPoint seekPoint;
- seekPoint.prefixLen = 0;
- BSONElement suffix0;
- seekPoint.keySuffix = {&suffix0};
- seekPoint.suffixInclusive = {true};
+ IndexSeekPoint seekPoint;
+ seekPoint.prefixLen = 0;
+ BSONElement suffix0;
+ seekPoint.keySuffix = {&suffix0};
+ seekPoint.suffixInclusive = {true};
- suffix0 = key2.firstElement();
- ASSERT_EQ(cursor->seek(seekPoint), IndexKeyEntry(key3, loc2));
+ suffix0 = key2.firstElement();
+ ASSERT_EQ(cursor->seek(seekPoint), IndexKeyEntry(key3, loc2));
- suffix0 = key4.firstElement();
- ASSERT_EQ(cursor->seek(seekPoint), IndexKeyEntry(key5, loc3));
- }
+ suffix0 = key4.firstElement();
+ ASSERT_EQ(cursor->seek(seekPoint), IndexKeyEntry(key5, loc3));
}
+}
- // Insert multiple, non-consecutive, single-field keys and advance to
- // each of them using a reverse cursor by specifying a key between their
- // exact key and the current position of the cursor.
- TEST( SortedDataInterface, AdvanceToIndirectReversed ) {
- const std::unique_ptr<HarnessHelper> harnessHelper( newHarnessHelper() );
- const std::unique_ptr<SortedDataInterface> sorted( harnessHelper->newSortedDataInterface( false ) );
+// Insert multiple, non-consecutive, single-field keys and advance to
+// each of them using a reverse cursor by specifying a key between their
+// exact key and the current position of the cursor.
+TEST(SortedDataInterface, AdvanceToIndirectReversed) {
+ const std::unique_ptr<HarnessHelper> harnessHelper(newHarnessHelper());
+ const std::unique_ptr<SortedDataInterface> sorted(harnessHelper->newSortedDataInterface(false));
- BSONObj unusedKey = key0; // smaller than any inserted key
+ BSONObj unusedKey = key0; // smaller than any inserted key
- {
- const std::unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- ASSERT( sorted->isEmpty( opCtx.get() ) );
- }
+ {
+ const std::unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
+ ASSERT(sorted->isEmpty(opCtx.get()));
+ }
+ {
+ const std::unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
{
- const std::unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- {
- WriteUnitOfWork uow( opCtx.get() );
- ASSERT_OK( sorted->insert( opCtx.get(), key1, loc1, true ) );
- ASSERT_OK( sorted->insert( opCtx.get(), key3, loc2, true ) );
- ASSERT_OK( sorted->insert( opCtx.get(), key5, loc3, true ) );
- uow.commit();
- }
+ WriteUnitOfWork uow(opCtx.get());
+ ASSERT_OK(sorted->insert(opCtx.get(), key1, loc1, true));
+ ASSERT_OK(sorted->insert(opCtx.get(), key3, loc2, true));
+ ASSERT_OK(sorted->insert(opCtx.get(), key5, loc3, true));
+ uow.commit();
}
+ }
- {
- const std::unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- ASSERT_EQUALS( 3, sorted->numEntries( opCtx.get() ) );
- }
+ {
+ const std::unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
+ ASSERT_EQUALS(3, sorted->numEntries(opCtx.get()));
+ }
- {
- const std::unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- const std::unique_ptr<SortedDataInterface::Cursor> cursor( sorted->newCursor(opCtx.get(), false) );
+ {
+ const std::unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
+ const std::unique_ptr<SortedDataInterface::Cursor> cursor(
+ sorted->newCursor(opCtx.get(), false));
- ASSERT_EQ(cursor->seek(key5, true), IndexKeyEntry(key5, loc3));
+ ASSERT_EQ(cursor->seek(key5, true), IndexKeyEntry(key5, loc3));
- IndexSeekPoint seekPoint;
- seekPoint.prefixLen = 0;
- BSONElement suffix0;
- seekPoint.keySuffix = {&suffix0};
- seekPoint.suffixInclusive = {true};
+ IndexSeekPoint seekPoint;
+ seekPoint.prefixLen = 0;
+ BSONElement suffix0;
+ seekPoint.keySuffix = {&suffix0};
+ seekPoint.suffixInclusive = {true};
- suffix0 = key4.firstElement();
- ASSERT_EQ(cursor->seek(seekPoint), IndexKeyEntry(key3, loc2));
+ suffix0 = key4.firstElement();
+ ASSERT_EQ(cursor->seek(seekPoint), IndexKeyEntry(key3, loc2));
- suffix0 = key2.firstElement();
- ASSERT_EQ(cursor->seek(seekPoint), IndexKeyEntry(key1, loc1));
- }
+ suffix0 = key2.firstElement();
+ ASSERT_EQ(cursor->seek(seekPoint), IndexKeyEntry(key1, loc1));
+ }
+}
+
+// Insert multiple, non-consecutive, single-field keys and advance to
+// each of them using a forward cursor by specifying a key between their
+// exact key and the current position of the cursor. When advanceTo()
+// is called in non-inclusive mode, the cursor is positioned at the key
+// that comes after the one specified.
+TEST(SortedDataInterface, AdvanceToIndirectExclusive) {
+ const std::unique_ptr<HarnessHelper> harnessHelper(newHarnessHelper());
+ const std::unique_ptr<SortedDataInterface> sorted(harnessHelper->newSortedDataInterface(false));
+
+ BSONObj unusedKey = key6; // larger than any inserted key
+
+ {
+ const std::unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
+ ASSERT(sorted->isEmpty(opCtx.get()));
}
- // Insert multiple, non-consecutive, single-field keys and advance to
- // each of them using a forward cursor by specifying a key between their
- // exact key and the current position of the cursor. When advanceTo()
- // is called in non-inclusive mode, the cursor is positioned at the key
- // that comes after the one specified.
- TEST( SortedDataInterface, AdvanceToIndirectExclusive ) {
- const std::unique_ptr<HarnessHelper> harnessHelper( newHarnessHelper() );
- const std::unique_ptr<SortedDataInterface> sorted( harnessHelper->newSortedDataInterface( false ) );
-
- BSONObj unusedKey = key6; // larger than any inserted key
-
- {
- const std::unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- ASSERT( sorted->isEmpty( opCtx.get() ) );
- }
-
+ {
+ const std::unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
{
- const std::unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- {
- WriteUnitOfWork uow( opCtx.get() );
- ASSERT_OK( sorted->insert( opCtx.get(), key1, loc1, true ) );
- ASSERT_OK( sorted->insert( opCtx.get(), key3, loc2, true ) );
- ASSERT_OK( sorted->insert( opCtx.get(), key5, loc3, true ) );
- uow.commit();
- }
+ WriteUnitOfWork uow(opCtx.get());
+ ASSERT_OK(sorted->insert(opCtx.get(), key1, loc1, true));
+ ASSERT_OK(sorted->insert(opCtx.get(), key3, loc2, true));
+ ASSERT_OK(sorted->insert(opCtx.get(), key5, loc3, true));
+ uow.commit();
}
+ }
- {
- const std::unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- ASSERT_EQUALS( 3, sorted->numEntries( opCtx.get() ) );
- }
+ {
+ const std::unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
+ ASSERT_EQUALS(3, sorted->numEntries(opCtx.get()));
+ }
- {
- const std::unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- const std::unique_ptr<SortedDataInterface::Cursor> cursor( sorted->newCursor(opCtx.get()) );
+ {
+ const std::unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
+ const std::unique_ptr<SortedDataInterface::Cursor> cursor(sorted->newCursor(opCtx.get()));
- ASSERT_EQ(cursor->seek(key1, true), IndexKeyEntry(key1, loc1));
+ ASSERT_EQ(cursor->seek(key1, true), IndexKeyEntry(key1, loc1));
- IndexSeekPoint seekPoint;
- seekPoint.prefixLen = 0;
- BSONElement suffix0;
- seekPoint.keySuffix = {&suffix0};
- seekPoint.suffixInclusive = {false};
+ IndexSeekPoint seekPoint;
+ seekPoint.prefixLen = 0;
+ BSONElement suffix0;
+ seekPoint.keySuffix = {&suffix0};
+ seekPoint.suffixInclusive = {false};
- suffix0 = key2.firstElement();
- ASSERT_EQ(cursor->seek(seekPoint), IndexKeyEntry(key3, loc2));
+ suffix0 = key2.firstElement();
+ ASSERT_EQ(cursor->seek(seekPoint), IndexKeyEntry(key3, loc2));
- suffix0 = key4.firstElement();
- ASSERT_EQ(cursor->seek(seekPoint), IndexKeyEntry(key5, loc3));
+ suffix0 = key4.firstElement();
+ ASSERT_EQ(cursor->seek(seekPoint), IndexKeyEntry(key5, loc3));
- ASSERT_EQ(cursor->seek(key1, true), IndexKeyEntry(key1, loc1));
+ ASSERT_EQ(cursor->seek(key1, true), IndexKeyEntry(key1, loc1));
- suffix0 = key3.firstElement();
- ASSERT_EQ(cursor->seek(seekPoint), IndexKeyEntry(key5, loc3));
- }
+ suffix0 = key3.firstElement();
+ ASSERT_EQ(cursor->seek(seekPoint), IndexKeyEntry(key5, loc3));
+ }
+}
+
+// Insert multiple, non-consecutive, single-field keys and advance to
+// each of them using a reverse cursor by specifying a key between their
+// exact key and the current position of the cursor. When advanceTo()
+// is called in non-inclusive mode, the cursor is positioned at the key
+// that comes before the one specified.
+TEST(SortedDataInterface, AdvanceToIndirectExclusiveReversed) {
+ const std::unique_ptr<HarnessHelper> harnessHelper(newHarnessHelper());
+ const std::unique_ptr<SortedDataInterface> sorted(harnessHelper->newSortedDataInterface(false));
+
+ BSONObj unusedKey = key0; // smaller than any inserted key
+
+ {
+ const std::unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
+ ASSERT(sorted->isEmpty(opCtx.get()));
}
- // Insert multiple, non-consecutive, single-field keys and advance to
- // each of them using a reverse cursor by specifying a key between their
- // exact key and the current position of the cursor. When advanceTo()
- // is called in non-inclusive mode, the cursor is positioned at the key
- // that comes before the one specified.
- TEST( SortedDataInterface, AdvanceToIndirectExclusiveReversed ) {
- const std::unique_ptr<HarnessHelper> harnessHelper( newHarnessHelper() );
- const std::unique_ptr<SortedDataInterface> sorted( harnessHelper->newSortedDataInterface( false ) );
-
- BSONObj unusedKey = key0; // smaller than any inserted key
-
- {
- const std::unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- ASSERT( sorted->isEmpty( opCtx.get() ) );
- }
-
+ {
+ const std::unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
{
- const std::unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- {
- WriteUnitOfWork uow( opCtx.get() );
- ASSERT_OK( sorted->insert( opCtx.get(), key1, loc1, true ) );
- ASSERT_OK( sorted->insert( opCtx.get(), key3, loc2, true ) );
- ASSERT_OK( sorted->insert( opCtx.get(), key5, loc3, true ) );
- uow.commit();
- }
+ WriteUnitOfWork uow(opCtx.get());
+ ASSERT_OK(sorted->insert(opCtx.get(), key1, loc1, true));
+ ASSERT_OK(sorted->insert(opCtx.get(), key3, loc2, true));
+ ASSERT_OK(sorted->insert(opCtx.get(), key5, loc3, true));
+ uow.commit();
}
+ }
- {
- const std::unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- ASSERT_EQUALS( 3, sorted->numEntries( opCtx.get() ) );
- }
+ {
+ const std::unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
+ ASSERT_EQUALS(3, sorted->numEntries(opCtx.get()));
+ }
- {
- const std::unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- const std::unique_ptr<SortedDataInterface::Cursor> cursor( sorted->newCursor(opCtx.get(), false) );
+ {
+ const std::unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
+ const std::unique_ptr<SortedDataInterface::Cursor> cursor(
+ sorted->newCursor(opCtx.get(), false));
- ASSERT_EQ(cursor->seek(key5, true), IndexKeyEntry(key5, loc3));
+ ASSERT_EQ(cursor->seek(key5, true), IndexKeyEntry(key5, loc3));
- IndexSeekPoint seekPoint;
- seekPoint.prefixLen = 0;
- BSONElement suffix0;
- seekPoint.keySuffix = {&suffix0};
- seekPoint.suffixInclusive = {false};
+ IndexSeekPoint seekPoint;
+ seekPoint.prefixLen = 0;
+ BSONElement suffix0;
+ seekPoint.keySuffix = {&suffix0};
+ seekPoint.suffixInclusive = {false};
- suffix0 = key4.firstElement();
- ASSERT_EQ(cursor->seek(seekPoint), IndexKeyEntry(key3, loc2));
+ suffix0 = key4.firstElement();
+ ASSERT_EQ(cursor->seek(seekPoint), IndexKeyEntry(key3, loc2));
- suffix0 = key2.firstElement();
- ASSERT_EQ(cursor->seek(seekPoint), IndexKeyEntry(key1, loc1));
+ suffix0 = key2.firstElement();
+ ASSERT_EQ(cursor->seek(seekPoint), IndexKeyEntry(key1, loc1));
- ASSERT_EQ(cursor->seek(key5, true), IndexKeyEntry(key5, loc3));
+ ASSERT_EQ(cursor->seek(key5, true), IndexKeyEntry(key5, loc3));
- suffix0 = key3.firstElement();
- ASSERT_EQ(cursor->seek(seekPoint), IndexKeyEntry(key1, loc1));
- }
+ suffix0 = key3.firstElement();
+ ASSERT_EQ(cursor->seek(seekPoint), IndexKeyEntry(key1, loc1));
}
+}
-} // namespace mongo
+} // namespace mongo
diff --git a/src/mongo/db/storage/sorted_data_interface_test_cursor_end_position.cpp b/src/mongo/db/storage/sorted_data_interface_test_cursor_end_position.cpp
index a2d4609c82c..190f707c4c0 100644
--- a/src/mongo/db/storage/sorted_data_interface_test_cursor_end_position.cpp
+++ b/src/mongo/db/storage/sorted_data_interface_test_cursor_end_position.cpp
@@ -34,380 +34,384 @@
#include "mongo/unittest/unittest.h"
namespace mongo {
- // Tests setEndPosition with next().
- void testSetEndPosition_Next_Forward(bool unique, bool inclusive) {
- auto harnessHelper = newHarnessHelper();
- auto opCtx = harnessHelper->newOperationContext();
- auto sorted = harnessHelper->newSortedDataInterface(unique, {
- {key1, loc1},
- {key2, loc1},
- {key3, loc1},
- {key4, loc1},
- {key5, loc1},
+// Tests setEndPosition with next().
+void testSetEndPosition_Next_Forward(bool unique, bool inclusive) {
+ auto harnessHelper = newHarnessHelper();
+ auto opCtx = harnessHelper->newOperationContext();
+ auto sorted = harnessHelper->newSortedDataInterface(
+ unique,
+ {
+ {key1, loc1}, {key2, loc1}, {key3, loc1}, {key4, loc1}, {key5, loc1},
});
- // Dup key on end point. Illegal for unique indexes.
- if (!unique) insertToIndex(opCtx, sorted, {{key3, loc2}});
+ // Dup key on end point. Illegal for unique indexes.
+ if (!unique)
+ insertToIndex(opCtx, sorted, {{key3, loc2}});
- auto cursor = sorted->newCursor(opCtx.get());
- cursor->setEndPosition(key3, inclusive);
-
- ASSERT_EQ(cursor->seek(key1, true), IndexKeyEntry(key1, loc1));
- ASSERT_EQ(cursor->next(), IndexKeyEntry(key2, loc1));
- if (inclusive) {
- ASSERT_EQ(cursor->next(), IndexKeyEntry(key3, loc1));
- if (!unique) ASSERT_EQ(cursor->next(), IndexKeyEntry(key3, loc2));
- }
- ASSERT_EQ(cursor->next(), boost::none);
- ASSERT_EQ(cursor->next(), boost::none); // don't resurrect.
- }
- TEST(SortedDataInterface, SetEndPosition_Next_Forward_Unique_Inclusive) {
- testSetEndPosition_Next_Forward(true, true);
- }
- TEST(SortedDataInterface, SetEndPosition_Next_Forward_Unique_Exclusive) {
- testSetEndPosition_Next_Forward(true, false);
- }
- TEST(SortedDataInterface, SetEndPosition_Next_Forward_Standard_Inclusive) {
- testSetEndPosition_Next_Forward(false, true);
- }
- TEST(SortedDataInterface, SetEndPosition_Next_Forward_Standard_Exclusive) {
- testSetEndPosition_Next_Forward(false, false);
- }
-
- void testSetEndPosition_Next_Reverse(bool unique, bool inclusive) {
- auto harnessHelper = newHarnessHelper();
- auto opCtx = harnessHelper->newOperationContext();
- auto sorted = harnessHelper->newSortedDataInterface(unique, {
- {key1, loc1},
- {key2, loc1},
- {key3, loc1},
- {key4, loc1},
- {key5, loc1},
- });
-
- // Dup key on end point. Illegal for unique indexes.
- if (!unique) insertToIndex(opCtx, sorted, {{key3, loc2}});
-
- auto cursor = sorted->newCursor(opCtx.get(), false);
- cursor->setEndPosition(key3, inclusive);
-
- ASSERT_EQ(cursor->seek(key5, true), IndexKeyEntry(key5, loc1));
- ASSERT_EQ(cursor->next(), IndexKeyEntry(key4, loc1));
- if (inclusive) {
- if (!unique) ASSERT_EQ(cursor->next(), IndexKeyEntry(key3, loc2));
- ASSERT_EQ(cursor->next(), IndexKeyEntry(key3, loc1));
- }
- ASSERT_EQ(cursor->next(), boost::none);
- ASSERT_EQ(cursor->next(), boost::none); // don't resurrect.
- }
- TEST(SortedDataInterface, SetEndPosition_Next_Reverse_Unique_Inclusive) {
- testSetEndPosition_Next_Reverse(true, true);
- }
- TEST(SortedDataInterface, SetEndPosition_Next_Reverse_Unique_Exclusive) {
- testSetEndPosition_Next_Reverse(true, false);
- }
- TEST(SortedDataInterface, SetEndPosition_Next_Reverse_Standard_Inclusive) {
- testSetEndPosition_Next_Reverse(false, true);
- }
- TEST(SortedDataInterface, SetEndPosition_Next_Reverse_Standard_Exclusive) {
- testSetEndPosition_Next_Reverse(false, false);
- }
-
- // Tests setEndPosition with seek() and seekExact().
- void testSetEndPosition_Seek_Forward(bool unique, bool inclusive) {
- auto harnessHelper = newHarnessHelper();
- auto opCtx = harnessHelper->newOperationContext();
- auto sorted = harnessHelper->newSortedDataInterface(unique, {
- {key1, loc1},
- // No key2
- {key3, loc1},
- {key4, loc1},
- });
-
- auto cursor = sorted->newCursor(opCtx.get());
- cursor->setEndPosition(key3, inclusive);
-
- // Directly seeking past end is considered out of range.
- ASSERT_EQ(cursor->seek(key4, true), boost::none);
- ASSERT_EQ(cursor->seekExact(key4), boost::none);
-
- // Seeking to key3 directly or indirectly is only returned if endPosition is inclusive.
- auto maybeKey3 = inclusive ? boost::make_optional(IndexKeyEntry(key3, loc1)) : boost::none;
-
- // direct
- ASSERT_EQ(cursor->seek(key3, true), maybeKey3);
- ASSERT_EQ(cursor->seekExact(key3), maybeKey3);
-
- // indirect
- ASSERT_EQ(cursor->seek(key2, true), maybeKey3);
-
- cursor->saveUnpositioned();
- removeFromIndex(opCtx, sorted, {{key3, loc1}});
- cursor->restore(opCtx.get());
-
- ASSERT_EQ(cursor->seek(key2, true), boost::none);
- ASSERT_EQ(cursor->seek(key3, true), boost::none);
- }
- TEST(SortedDataInterface, SetEndPosition_Seek_Forward_Unique_Inclusive) {
- testSetEndPosition_Seek_Forward(true, true);
- }
- TEST(SortedDataInterface, SetEndPosition_Seek_Forward_Unique_Exclusive) {
- testSetEndPosition_Seek_Forward(true, false);
- }
- TEST(SortedDataInterface, SetEndPosition_Seek_Forward_Standard_Inclusive) {
- testSetEndPosition_Seek_Forward(false, true);
- }
- TEST(SortedDataInterface, SetEndPosition_Seek_Forward_Standard_Exclusive) {
- testSetEndPosition_Seek_Forward(false, false);
- }
-
- void testSetEndPosition_Seek_Reverse(bool unique, bool inclusive) {
- auto harnessHelper = newHarnessHelper();
- auto opCtx = harnessHelper->newOperationContext();
- auto sorted = harnessHelper->newSortedDataInterface(unique, {
- {key1, loc1},
- {key2, loc1},
- // No key3
- {key4, loc1},
- });
-
- auto cursor = sorted->newCursor(opCtx.get(), false);
- cursor->setEndPosition(key2, inclusive);
-
- // Directly seeking past end is considered out of range.
- ASSERT_EQ(cursor->seek(key1, true), boost::none);
- ASSERT_EQ(cursor->seekExact(key1), boost::none);
-
- // Seeking to key2 directly or indirectly is only returned if endPosition is inclusive.
- auto maybeKey2 = inclusive ? boost::make_optional(IndexKeyEntry(key2, loc1)) : boost::none;
-
- // direct
- ASSERT_EQ(cursor->seek(key2, true), maybeKey2);
- ASSERT_EQ(cursor->seekExact(key2), maybeKey2);
-
- // indirect
- ASSERT_EQ(cursor->seek(key3, true), maybeKey2);
-
- cursor->saveUnpositioned();
- removeFromIndex(opCtx, sorted, {{key2, loc1}});
- cursor->restore(opCtx.get());
-
- ASSERT_EQ(cursor->seek(key3, true), boost::none);
- ASSERT_EQ(cursor->seek(key2, true), boost::none);
- }
- TEST(SortedDataInterface, SetEndPosition_Seek_Reverse_Unique_Inclusive) {
- testSetEndPosition_Seek_Reverse(true, true);
- }
- TEST(SortedDataInterface, SetEndPosition_Seek_Reverse_Unique_Exclusive) {
- testSetEndPosition_Seek_Reverse(true, false);
- }
- TEST(SortedDataInterface, SetEndPosition_Seek_Reverse_Standard_Inclusive) {
- testSetEndPosition_Seek_Reverse(false, true);
- }
- TEST(SortedDataInterface, SetEndPosition_Seek_Reverse_Standard_Exclusive) {
- testSetEndPosition_Seek_Reverse(false, false);
- }
-
- // Test that restore never lands on the wrong side of the endPosition.
- void testSetEndPosition_Restore_Forward(bool unique) {
- auto harnessHelper = newHarnessHelper();
- auto opCtx = harnessHelper->newOperationContext();
- auto sorted = harnessHelper->newSortedDataInterface(unique, {
- {key1, loc1},
- {key2, loc1},
- {key3, loc1},
- {key4, loc1},
- });
-
- auto cursor = sorted->newCursor(opCtx.get());
- cursor->setEndPosition(key3, false); // Should never see key3 or key4.
-
- ASSERT_EQ(cursor->seek(key1, true), IndexKeyEntry(key1, loc1));
-
- cursor->savePositioned();
- cursor->restore(opCtx.get());
-
- ASSERT_EQ(cursor->next(), IndexKeyEntry(key2, loc1));
-
- cursor->savePositioned();
- removeFromIndex(opCtx, sorted, {
- {key2, loc1},
- {key3, loc1},
- });
- cursor->restore(opCtx.get());
-
- ASSERT_EQ(cursor->next(), boost::none);
- }
- TEST(SortedDataInterface, SetEndPosition_Restore_Forward_Unique) {
- testSetEndPosition_Restore_Forward(true);
- }
- TEST(SortedDataInterface, SetEndPosition_Restore_Forward_Standard) {
- testSetEndPosition_Restore_Forward(false);
- }
-
- void testSetEndPosition_Restore_Reverse(bool unique) {
- auto harnessHelper = newHarnessHelper();
- auto opCtx = harnessHelper->newOperationContext();
- auto sorted = harnessHelper->newSortedDataInterface(unique, {
- {key1, loc1},
- {key2, loc1},
- {key3, loc1},
- {key4, loc1},
- });
-
- auto cursor = sorted->newCursor(opCtx.get(), false);
- cursor->setEndPosition(key2, false); // Should never see key1 or key2.
-
- ASSERT_EQ(cursor->seek(key4, true), IndexKeyEntry(key4, loc1));
-
- cursor->savePositioned();
- cursor->restore(opCtx.get());
+ auto cursor = sorted->newCursor(opCtx.get());
+ cursor->setEndPosition(key3, inclusive);
+ ASSERT_EQ(cursor->seek(key1, true), IndexKeyEntry(key1, loc1));
+ ASSERT_EQ(cursor->next(), IndexKeyEntry(key2, loc1));
+ if (inclusive) {
ASSERT_EQ(cursor->next(), IndexKeyEntry(key3, loc1));
-
- cursor->savePositioned();
- removeFromIndex(opCtx, sorted, {
- {key2, loc1},
- {key3, loc1},
- });
- cursor->restore(opCtx.get());
-
- ASSERT_EQ(cursor->next(), boost::none);
- }
- TEST(SortedDataInterface, SetEndPosition_Restore_Reverse_Unique) {
- testSetEndPosition_Restore_Reverse(true);
- }
- TEST(SortedDataInterface, SetEndPosition_Restore_Reverse_Standard) {
- testSetEndPosition_Restore_Reverse(false);
- }
-
- // Test that restore always updates the end cursor if one is used. Some storage engines use a
- // cursor positioned at the first out-of-range document and have next() check if the current
- // position is the same as the end cursor. End cursor maintenance cannot be directly tested
- // (since implementations are free not to use end cursors) but implementations that incorrectly
- // restore end cursors would tend to fail this test.
- void testSetEndPosition_RestoreEndCursor_Forward(bool unique) {
- auto harnessHelper = newHarnessHelper();
- auto opCtx = harnessHelper->newOperationContext();
- auto sorted = harnessHelper->newSortedDataInterface(unique, {
- {key1, loc1},
- {key4, loc1},
- });
-
- auto cursor = sorted->newCursor(opCtx.get());
- cursor->setEndPosition(key2, true);
-
- ASSERT_EQ(cursor->seek(key1, true), IndexKeyEntry(key1, loc1));
-
- // A potential source of bugs is not restoring end cursor with saveUnpositioned().
- cursor->saveUnpositioned();
- insertToIndex(opCtx, sorted, {
- {key2, loc1}, // in range
- {key3, loc1}, // out of range
- });
- cursor->restore(opCtx.get());
-
- ASSERT_EQ(cursor->seek(key1, true), IndexKeyEntry(key1, loc1));
- ASSERT_EQ(cursor->next(), IndexKeyEntry(key2, loc1));
- ASSERT_EQ(cursor->next(), boost::none);
- }
- TEST(SortedDataInterface, SetEndPosition_RestoreEndCursor_Forward_Unique) {
- testSetEndPosition_RestoreEndCursor_Forward(true);
- }
- TEST(SortedDataInterface, SetEndPosition_RestoreEndCursor_Forward_Standard) {
- testSetEndPosition_RestoreEndCursor_Forward(false);
- }
-
- void testSetEndPosition_RestoreEndCursor_Reverse(bool unique) {
- auto harnessHelper = newHarnessHelper();
- auto opCtx = harnessHelper->newOperationContext();
- auto sorted = harnessHelper->newSortedDataInterface(unique, {
- {key1, loc1},
- {key4, loc1},
+ if (!unique)
+ ASSERT_EQ(cursor->next(), IndexKeyEntry(key3, loc2));
+ }
+ ASSERT_EQ(cursor->next(), boost::none);
+ ASSERT_EQ(cursor->next(), boost::none); // don't resurrect.
+}
+TEST(SortedDataInterface, SetEndPosition_Next_Forward_Unique_Inclusive) {
+ testSetEndPosition_Next_Forward(true, true);
+}
+TEST(SortedDataInterface, SetEndPosition_Next_Forward_Unique_Exclusive) {
+ testSetEndPosition_Next_Forward(true, false);
+}
+TEST(SortedDataInterface, SetEndPosition_Next_Forward_Standard_Inclusive) {
+ testSetEndPosition_Next_Forward(false, true);
+}
+TEST(SortedDataInterface, SetEndPosition_Next_Forward_Standard_Exclusive) {
+ testSetEndPosition_Next_Forward(false, false);
+}
+
+void testSetEndPosition_Next_Reverse(bool unique, bool inclusive) {
+ auto harnessHelper = newHarnessHelper();
+ auto opCtx = harnessHelper->newOperationContext();
+ auto sorted = harnessHelper->newSortedDataInterface(
+ unique,
+ {
+ {key1, loc1}, {key2, loc1}, {key3, loc1}, {key4, loc1}, {key5, loc1},
});
- auto cursor = sorted->newCursor(opCtx.get(), false);
- cursor->setEndPosition(key3, true);
-
- ASSERT_EQ(cursor->seek(key4, true), IndexKeyEntry(key4, loc1));
+ // Dup key on end point. Illegal for unique indexes.
+ if (!unique)
+ insertToIndex(opCtx, sorted, {{key3, loc2}});
- cursor->saveUnpositioned();
- insertToIndex(opCtx, sorted, {
- {key2, loc1}, // in range
- {key3, loc1}, // out of range
- });
- cursor->restore(opCtx.get()); // must restore end cursor even with saveUnpositioned().
+ auto cursor = sorted->newCursor(opCtx.get(), false);
+ cursor->setEndPosition(key3, inclusive);
- ASSERT_EQ(cursor->seek(key4, true), IndexKeyEntry(key4, loc1));
+ ASSERT_EQ(cursor->seek(key5, true), IndexKeyEntry(key5, loc1));
+ ASSERT_EQ(cursor->next(), IndexKeyEntry(key4, loc1));
+ if (inclusive) {
+ if (!unique)
+ ASSERT_EQ(cursor->next(), IndexKeyEntry(key3, loc2));
ASSERT_EQ(cursor->next(), IndexKeyEntry(key3, loc1));
- ASSERT_EQ(cursor->next(), boost::none);
- }
- TEST(SortedDataInterface, SetEndPosition_RestoreEndCursor_Reverse_Standard) {
- testSetEndPosition_RestoreEndCursor_Reverse(true);
- }
- TEST(SortedDataInterface, SetEndPosition_RestoreEndCursor_Reverse_Unique) {
- testSetEndPosition_RestoreEndCursor_Reverse(false);
}
-
- // setEndPosition with empty BSONObj is supposed to mean "no end position", regardless of
- // inclusive flag or direction.
- void testSetEndPosition_Empty_Forward(bool unique, bool inclusive) {
- auto harnessHelper = newHarnessHelper();
- auto opCtx = harnessHelper->newOperationContext();
- auto sorted = harnessHelper->newSortedDataInterface(unique, {
- {key1, loc1},
- {key2, loc1},
- {key3, loc1},
+ ASSERT_EQ(cursor->next(), boost::none);
+ ASSERT_EQ(cursor->next(), boost::none); // don't resurrect.
+}
+TEST(SortedDataInterface, SetEndPosition_Next_Reverse_Unique_Inclusive) {
+ testSetEndPosition_Next_Reverse(true, true);
+}
+TEST(SortedDataInterface, SetEndPosition_Next_Reverse_Unique_Exclusive) {
+ testSetEndPosition_Next_Reverse(true, false);
+}
+TEST(SortedDataInterface, SetEndPosition_Next_Reverse_Standard_Inclusive) {
+ testSetEndPosition_Next_Reverse(false, true);
+}
+TEST(SortedDataInterface, SetEndPosition_Next_Reverse_Standard_Exclusive) {
+ testSetEndPosition_Next_Reverse(false, false);
+}
+
+// Tests setEndPosition with seek() and seekExact().
+void testSetEndPosition_Seek_Forward(bool unique, bool inclusive) {
+ auto harnessHelper = newHarnessHelper();
+ auto opCtx = harnessHelper->newOperationContext();
+ auto sorted = harnessHelper->newSortedDataInterface(unique,
+ {
+ {key1, loc1},
+ // No key2
+ {key3, loc1},
+ {key4, loc1},
+ });
+
+ auto cursor = sorted->newCursor(opCtx.get());
+ cursor->setEndPosition(key3, inclusive);
+
+ // Directly seeking past end is considered out of range.
+ ASSERT_EQ(cursor->seek(key4, true), boost::none);
+ ASSERT_EQ(cursor->seekExact(key4), boost::none);
+
+ // Seeking to key3 directly or indirectly is only returned if endPosition is inclusive.
+ auto maybeKey3 = inclusive ? boost::make_optional(IndexKeyEntry(key3, loc1)) : boost::none;
+
+ // direct
+ ASSERT_EQ(cursor->seek(key3, true), maybeKey3);
+ ASSERT_EQ(cursor->seekExact(key3), maybeKey3);
+
+ // indirect
+ ASSERT_EQ(cursor->seek(key2, true), maybeKey3);
+
+ cursor->saveUnpositioned();
+ removeFromIndex(opCtx, sorted, {{key3, loc1}});
+ cursor->restore(opCtx.get());
+
+ ASSERT_EQ(cursor->seek(key2, true), boost::none);
+ ASSERT_EQ(cursor->seek(key3, true), boost::none);
+}
+TEST(SortedDataInterface, SetEndPosition_Seek_Forward_Unique_Inclusive) {
+ testSetEndPosition_Seek_Forward(true, true);
+}
+TEST(SortedDataInterface, SetEndPosition_Seek_Forward_Unique_Exclusive) {
+ testSetEndPosition_Seek_Forward(true, false);
+}
+TEST(SortedDataInterface, SetEndPosition_Seek_Forward_Standard_Inclusive) {
+ testSetEndPosition_Seek_Forward(false, true);
+}
+TEST(SortedDataInterface, SetEndPosition_Seek_Forward_Standard_Exclusive) {
+ testSetEndPosition_Seek_Forward(false, false);
+}
+
+void testSetEndPosition_Seek_Reverse(bool unique, bool inclusive) {
+ auto harnessHelper = newHarnessHelper();
+ auto opCtx = harnessHelper->newOperationContext();
+ auto sorted = harnessHelper->newSortedDataInterface(unique,
+ {
+ {key1, loc1},
+ {key2, loc1},
+ // No key3
+ {key4, loc1},
+ });
+
+ auto cursor = sorted->newCursor(opCtx.get(), false);
+ cursor->setEndPosition(key2, inclusive);
+
+ // Directly seeking past end is considered out of range.
+ ASSERT_EQ(cursor->seek(key1, true), boost::none);
+ ASSERT_EQ(cursor->seekExact(key1), boost::none);
+
+ // Seeking to key2 directly or indirectly is only returned if endPosition is inclusive.
+ auto maybeKey2 = inclusive ? boost::make_optional(IndexKeyEntry(key2, loc1)) : boost::none;
+
+ // direct
+ ASSERT_EQ(cursor->seek(key2, true), maybeKey2);
+ ASSERT_EQ(cursor->seekExact(key2), maybeKey2);
+
+ // indirect
+ ASSERT_EQ(cursor->seek(key3, true), maybeKey2);
+
+ cursor->saveUnpositioned();
+ removeFromIndex(opCtx, sorted, {{key2, loc1}});
+ cursor->restore(opCtx.get());
+
+ ASSERT_EQ(cursor->seek(key3, true), boost::none);
+ ASSERT_EQ(cursor->seek(key2, true), boost::none);
+}
+TEST(SortedDataInterface, SetEndPosition_Seek_Reverse_Unique_Inclusive) {
+ testSetEndPosition_Seek_Reverse(true, true);
+}
+TEST(SortedDataInterface, SetEndPosition_Seek_Reverse_Unique_Exclusive) {
+ testSetEndPosition_Seek_Reverse(true, false);
+}
+TEST(SortedDataInterface, SetEndPosition_Seek_Reverse_Standard_Inclusive) {
+ testSetEndPosition_Seek_Reverse(false, true);
+}
+TEST(SortedDataInterface, SetEndPosition_Seek_Reverse_Standard_Exclusive) {
+ testSetEndPosition_Seek_Reverse(false, false);
+}
+
+// Test that restore never lands on the wrong side of the endPosition.
+void testSetEndPosition_Restore_Forward(bool unique) {
+ auto harnessHelper = newHarnessHelper();
+ auto opCtx = harnessHelper->newOperationContext();
+ auto sorted = harnessHelper->newSortedDataInterface(
+ unique,
+ {
+ {key1, loc1}, {key2, loc1}, {key3, loc1}, {key4, loc1},
});
- auto cursor = sorted->newCursor(opCtx.get());
- cursor->setEndPosition(BSONObj(), inclusive);
-
- ASSERT_EQ(cursor->seek(key1, true), IndexKeyEntry(key1, loc1));
- ASSERT_EQ(cursor->next(), IndexKeyEntry(key2, loc1));
- ASSERT_EQ(cursor->next(), IndexKeyEntry(key3, loc1));
- ASSERT_EQ(cursor->next(), boost::none);
- }
- TEST(SortedDataInterface, SetEndPosition_Empty_Forward_Unique_Inclusive) {
- testSetEndPosition_Empty_Forward(true, true);
- }
- TEST(SortedDataInterface, SetEndPosition_Empty_Forward_Unique_Exclusive) {
- testSetEndPosition_Empty_Forward(true, false);
- }
- TEST(SortedDataInterface, SetEndPosition_Empty_Forward_Standard_Inclusive) {
- testSetEndPosition_Empty_Forward(false, true);
- }
- TEST(SortedDataInterface, SetEndPosition_Empty_Forward_Standard_Exclusive) {
- testSetEndPosition_Empty_Forward(false, false);
- }
-
- void testSetEndPosition_Empty_Reverse(bool unique, bool inclusive) {
- auto harnessHelper = newHarnessHelper();
- auto opCtx = harnessHelper->newOperationContext();
- auto sorted = harnessHelper->newSortedDataInterface(unique, {
- {key1, loc1},
- {key2, loc1},
- {key3, loc1},
+ auto cursor = sorted->newCursor(opCtx.get());
+ cursor->setEndPosition(key3, false); // Should never see key3 or key4.
+
+ ASSERT_EQ(cursor->seek(key1, true), IndexKeyEntry(key1, loc1));
+
+ cursor->savePositioned();
+ cursor->restore(opCtx.get());
+
+ ASSERT_EQ(cursor->next(), IndexKeyEntry(key2, loc1));
+
+ cursor->savePositioned();
+ removeFromIndex(opCtx,
+ sorted,
+ {
+ {key2, loc1}, {key3, loc1},
+ });
+ cursor->restore(opCtx.get());
+
+ ASSERT_EQ(cursor->next(), boost::none);
+}
+TEST(SortedDataInterface, SetEndPosition_Restore_Forward_Unique) {
+ testSetEndPosition_Restore_Forward(true);
+}
+TEST(SortedDataInterface, SetEndPosition_Restore_Forward_Standard) {
+ testSetEndPosition_Restore_Forward(false);
+}
+
+void testSetEndPosition_Restore_Reverse(bool unique) {
+ auto harnessHelper = newHarnessHelper();
+ auto opCtx = harnessHelper->newOperationContext();
+ auto sorted = harnessHelper->newSortedDataInterface(
+ unique,
+ {
+ {key1, loc1}, {key2, loc1}, {key3, loc1}, {key4, loc1},
});
- auto cursor = sorted->newCursor(opCtx.get(), false);
- cursor->setEndPosition(BSONObj(), inclusive);
-
- ASSERT_EQ(cursor->seek(key3, true), IndexKeyEntry(key3, loc1));
- ASSERT_EQ(cursor->next(), IndexKeyEntry(key2, loc1));
- ASSERT_EQ(cursor->next(), IndexKeyEntry(key1, loc1));
- ASSERT_EQ(cursor->next(), boost::none);
- }
- TEST(SortedDataInterface, SetEndPosition_Empty_Reverse_Unique_Inclusive) {
- testSetEndPosition_Empty_Reverse(true, true);
- }
- TEST(SortedDataInterface, SetEndPosition_Empty_Reverse_Unique_Exclusive) {
- testSetEndPosition_Empty_Reverse(true, false);
- }
- TEST(SortedDataInterface, SetEndPosition_Empty_Reverse_Standard_Inclusive) {
- testSetEndPosition_Empty_Reverse(false, true);
- }
- TEST(SortedDataInterface, SetEndPosition_Empty_Reverse_Standard_Exclusive) {
- testSetEndPosition_Empty_Reverse(false, false);
- }
-} // namespace mongo
+ auto cursor = sorted->newCursor(opCtx.get(), false);
+ cursor->setEndPosition(key2, false); // Should never see key1 or key2.
+
+ ASSERT_EQ(cursor->seek(key4, true), IndexKeyEntry(key4, loc1));
+
+ cursor->savePositioned();
+ cursor->restore(opCtx.get());
+
+ ASSERT_EQ(cursor->next(), IndexKeyEntry(key3, loc1));
+
+ cursor->savePositioned();
+ removeFromIndex(opCtx,
+ sorted,
+ {
+ {key2, loc1}, {key3, loc1},
+ });
+ cursor->restore(opCtx.get());
+
+ ASSERT_EQ(cursor->next(), boost::none);
+}
+TEST(SortedDataInterface, SetEndPosition_Restore_Reverse_Unique) {
+ testSetEndPosition_Restore_Reverse(true);
+}
+TEST(SortedDataInterface, SetEndPosition_Restore_Reverse_Standard) {
+ testSetEndPosition_Restore_Reverse(false);
+}
+
+// Test that restore always updates the end cursor if one is used. Some storage engines use a
+// cursor positioned at the first out-of-range document and have next() check if the current
+// position is the same as the end cursor. End cursor maintenance cannot be directly tested
+// (since implementations are free not to use end cursors) but implementations that incorrectly
+// restore end cursors would tend to fail this test.
+void testSetEndPosition_RestoreEndCursor_Forward(bool unique) {
+ auto harnessHelper = newHarnessHelper();
+ auto opCtx = harnessHelper->newOperationContext();
+ auto sorted = harnessHelper->newSortedDataInterface(unique,
+ {
+ {key1, loc1}, {key4, loc1},
+ });
+
+ auto cursor = sorted->newCursor(opCtx.get());
+ cursor->setEndPosition(key2, true);
+
+ ASSERT_EQ(cursor->seek(key1, true), IndexKeyEntry(key1, loc1));
+
+ // A potential source of bugs is not restoring end cursor with saveUnpositioned().
+ cursor->saveUnpositioned();
+ insertToIndex(opCtx,
+ sorted,
+ {
+ {key2, loc1}, // in range
+ {key3, loc1}, // out of range
+ });
+ cursor->restore(opCtx.get());
+
+ ASSERT_EQ(cursor->seek(key1, true), IndexKeyEntry(key1, loc1));
+ ASSERT_EQ(cursor->next(), IndexKeyEntry(key2, loc1));
+ ASSERT_EQ(cursor->next(), boost::none);
+}
+TEST(SortedDataInterface, SetEndPosition_RestoreEndCursor_Forward_Unique) {
+ testSetEndPosition_RestoreEndCursor_Forward(true);
+}
+TEST(SortedDataInterface, SetEndPosition_RestoreEndCursor_Forward_Standard) {
+ testSetEndPosition_RestoreEndCursor_Forward(false);
+}
+
+void testSetEndPosition_RestoreEndCursor_Reverse(bool unique) {
+ auto harnessHelper = newHarnessHelper();
+ auto opCtx = harnessHelper->newOperationContext();
+ auto sorted = harnessHelper->newSortedDataInterface(unique,
+ {
+ {key1, loc1}, {key4, loc1},
+ });
+
+ auto cursor = sorted->newCursor(opCtx.get(), false);
+ cursor->setEndPosition(key3, true);
+
+ ASSERT_EQ(cursor->seek(key4, true), IndexKeyEntry(key4, loc1));
+
+ cursor->saveUnpositioned();
+ insertToIndex(opCtx,
+ sorted,
+ {
+ {key2, loc1}, // in range
+ {key3, loc1}, // out of range
+ });
+ cursor->restore(opCtx.get()); // must restore end cursor even with saveUnpositioned().
+
+ ASSERT_EQ(cursor->seek(key4, true), IndexKeyEntry(key4, loc1));
+ ASSERT_EQ(cursor->next(), IndexKeyEntry(key3, loc1));
+ ASSERT_EQ(cursor->next(), boost::none);
+}
+TEST(SortedDataInterface, SetEndPosition_RestoreEndCursor_Reverse_Standard) {
+ testSetEndPosition_RestoreEndCursor_Reverse(true);
+}
+TEST(SortedDataInterface, SetEndPosition_RestoreEndCursor_Reverse_Unique) {
+ testSetEndPosition_RestoreEndCursor_Reverse(false);
+}
+
+// setEndPosition with empty BSONObj is supposed to mean "no end position", regardless of
+// inclusive flag or direction.
+void testSetEndPosition_Empty_Forward(bool unique, bool inclusive) {
+ auto harnessHelper = newHarnessHelper();
+ auto opCtx = harnessHelper->newOperationContext();
+ auto sorted = harnessHelper->newSortedDataInterface(unique,
+ {
+ {key1, loc1}, {key2, loc1}, {key3, loc1},
+ });
+
+ auto cursor = sorted->newCursor(opCtx.get());
+ cursor->setEndPosition(BSONObj(), inclusive);
+
+ ASSERT_EQ(cursor->seek(key1, true), IndexKeyEntry(key1, loc1));
+ ASSERT_EQ(cursor->next(), IndexKeyEntry(key2, loc1));
+ ASSERT_EQ(cursor->next(), IndexKeyEntry(key3, loc1));
+ ASSERT_EQ(cursor->next(), boost::none);
+}
+TEST(SortedDataInterface, SetEndPosition_Empty_Forward_Unique_Inclusive) {
+ testSetEndPosition_Empty_Forward(true, true);
+}
+TEST(SortedDataInterface, SetEndPosition_Empty_Forward_Unique_Exclusive) {
+ testSetEndPosition_Empty_Forward(true, false);
+}
+TEST(SortedDataInterface, SetEndPosition_Empty_Forward_Standard_Inclusive) {
+ testSetEndPosition_Empty_Forward(false, true);
+}
+TEST(SortedDataInterface, SetEndPosition_Empty_Forward_Standard_Exclusive) {
+ testSetEndPosition_Empty_Forward(false, false);
+}
+
+void testSetEndPosition_Empty_Reverse(bool unique, bool inclusive) {
+ auto harnessHelper = newHarnessHelper();
+ auto opCtx = harnessHelper->newOperationContext();
+ auto sorted = harnessHelper->newSortedDataInterface(unique,
+ {
+ {key1, loc1}, {key2, loc1}, {key3, loc1},
+ });
+
+ auto cursor = sorted->newCursor(opCtx.get(), false);
+ cursor->setEndPosition(BSONObj(), inclusive);
+
+ ASSERT_EQ(cursor->seek(key3, true), IndexKeyEntry(key3, loc1));
+ ASSERT_EQ(cursor->next(), IndexKeyEntry(key2, loc1));
+ ASSERT_EQ(cursor->next(), IndexKeyEntry(key1, loc1));
+ ASSERT_EQ(cursor->next(), boost::none);
+}
+TEST(SortedDataInterface, SetEndPosition_Empty_Reverse_Unique_Inclusive) {
+ testSetEndPosition_Empty_Reverse(true, true);
+}
+TEST(SortedDataInterface, SetEndPosition_Empty_Reverse_Unique_Exclusive) {
+ testSetEndPosition_Empty_Reverse(true, false);
+}
+TEST(SortedDataInterface, SetEndPosition_Empty_Reverse_Standard_Inclusive) {
+ testSetEndPosition_Empty_Reverse(false, true);
+}
+TEST(SortedDataInterface, SetEndPosition_Empty_Reverse_Standard_Exclusive) {
+ testSetEndPosition_Empty_Reverse(false, false);
+}
+} // namespace mongo
diff --git a/src/mongo/db/storage/sorted_data_interface_test_cursor_locate.cpp b/src/mongo/db/storage/sorted_data_interface_test_cursor_locate.cpp
index b69a39a15fd..80e71b57da2 100644
--- a/src/mongo/db/storage/sorted_data_interface_test_cursor_locate.cpp
+++ b/src/mongo/db/storage/sorted_data_interface_test_cursor_locate.cpp
@@ -37,591 +37,608 @@
namespace mongo {
- // Insert a key and try to locate it using a forward cursor
- // by specifying its exact key and RecordId.
- TEST( SortedDataInterface, Locate ) {
- const std::unique_ptr<HarnessHelper> harnessHelper( newHarnessHelper() );
- const std::unique_ptr<SortedDataInterface> sorted( harnessHelper->newSortedDataInterface( false ) );
-
- {
- const std::unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- const std::unique_ptr<SortedDataInterface::Cursor> cursor( sorted->newCursor(opCtx.get()) );
- ASSERT( !cursor->seek( key1, true ) );
- }
-
- {
- const std::unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- {
- WriteUnitOfWork uow( opCtx.get() );
- ASSERT_OK( sorted->insert( opCtx.get(), key1, loc1, true ) );
- uow.commit();
- }
- }
+// Insert a key and try to locate it using a forward cursor
+// by specifying its exact key and RecordId.
+TEST(SortedDataInterface, Locate) {
+ const std::unique_ptr<HarnessHelper> harnessHelper(newHarnessHelper());
+ const std::unique_ptr<SortedDataInterface> sorted(harnessHelper->newSortedDataInterface(false));
+
+ {
+ const std::unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
+ const std::unique_ptr<SortedDataInterface::Cursor> cursor(sorted->newCursor(opCtx.get()));
+ ASSERT(!cursor->seek(key1, true));
+ }
+ {
+ const std::unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
{
- const std::unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- const std::unique_ptr<SortedDataInterface::Cursor> cursor( sorted->newCursor(opCtx.get()) );
-
- ASSERT_EQ(cursor->seek(key1, true), IndexKeyEntry(key1, loc1));
- ASSERT_EQ(cursor->next(), boost::none);
+ WriteUnitOfWork uow(opCtx.get());
+ ASSERT_OK(sorted->insert(opCtx.get(), key1, loc1, true));
+ uow.commit();
}
}
- // Insert a key and try to locate it using a reverse cursor
- // by specifying its exact key and RecordId.
- TEST( SortedDataInterface, LocateReversed ) {
- const std::unique_ptr<HarnessHelper> harnessHelper( newHarnessHelper() );
- const std::unique_ptr<SortedDataInterface> sorted( harnessHelper->newSortedDataInterface( false ) );
+ {
+ const std::unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
+ const std::unique_ptr<SortedDataInterface::Cursor> cursor(sorted->newCursor(opCtx.get()));
- {
- const std::unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- const std::unique_ptr<SortedDataInterface::Cursor> cursor( sorted->newCursor(opCtx.get(), false) );
- ASSERT( !cursor->seek( key1, true ) );
- }
-
- {
- const std::unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- {
- WriteUnitOfWork uow( opCtx.get() );
- ASSERT_OK( sorted->insert( opCtx.get(), key1, loc1, true ) );
- uow.commit();
- }
- }
+ ASSERT_EQ(cursor->seek(key1, true), IndexKeyEntry(key1, loc1));
+ ASSERT_EQ(cursor->next(), boost::none);
+ }
+}
+
+// Insert a key and try to locate it using a reverse cursor
+// by specifying its exact key and RecordId.
+TEST(SortedDataInterface, LocateReversed) {
+ const std::unique_ptr<HarnessHelper> harnessHelper(newHarnessHelper());
+ const std::unique_ptr<SortedDataInterface> sorted(harnessHelper->newSortedDataInterface(false));
+
+ {
+ const std::unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
+ const std::unique_ptr<SortedDataInterface::Cursor> cursor(
+ sorted->newCursor(opCtx.get(), false));
+ ASSERT(!cursor->seek(key1, true));
+ }
+ {
+ const std::unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
{
- const std::unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- const std::unique_ptr<SortedDataInterface::Cursor> cursor( sorted->newCursor(opCtx.get(), false) );
-
- ASSERT_EQ(cursor->seek(key1, true), IndexKeyEntry(key1, loc1));
- ASSERT_EQ(cursor->next(), boost::none);
+ WriteUnitOfWork uow(opCtx.get());
+ ASSERT_OK(sorted->insert(opCtx.get(), key1, loc1, true));
+ uow.commit();
}
}
- // Insert a compound key and try to locate it using a forward cursor
- // by specifying its exact key and RecordId.
- TEST( SortedDataInterface, LocateCompoundKey ) {
- const std::unique_ptr<HarnessHelper> harnessHelper( newHarnessHelper() );
- const std::unique_ptr<SortedDataInterface> sorted( harnessHelper->newSortedDataInterface( false ) );
-
- {
- const std::unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- const std::unique_ptr<SortedDataInterface::Cursor> cursor( sorted->newCursor(opCtx.get()) );
- ASSERT( !cursor->seek( compoundKey1a, true ) );
- }
+ {
+ const std::unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
+ const std::unique_ptr<SortedDataInterface::Cursor> cursor(
+ sorted->newCursor(opCtx.get(), false));
- {
- const std::unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- {
- WriteUnitOfWork uow( opCtx.get() );
- ASSERT_OK( sorted->insert( opCtx.get(), compoundKey1a, loc1, true ) );
- uow.commit();
- }
- }
+ ASSERT_EQ(cursor->seek(key1, true), IndexKeyEntry(key1, loc1));
+ ASSERT_EQ(cursor->next(), boost::none);
+ }
+}
+
+// Insert a compound key and try to locate it using a forward cursor
+// by specifying its exact key and RecordId.
+TEST(SortedDataInterface, LocateCompoundKey) {
+ const std::unique_ptr<HarnessHelper> harnessHelper(newHarnessHelper());
+ const std::unique_ptr<SortedDataInterface> sorted(harnessHelper->newSortedDataInterface(false));
+
+ {
+ const std::unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
+ const std::unique_ptr<SortedDataInterface::Cursor> cursor(sorted->newCursor(opCtx.get()));
+ ASSERT(!cursor->seek(compoundKey1a, true));
+ }
+ {
+ const std::unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
{
- const std::unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- const std::unique_ptr<SortedDataInterface::Cursor> cursor( sorted->newCursor(opCtx.get()) );
-
- ASSERT_EQ(cursor->seek(compoundKey1a, true), IndexKeyEntry(compoundKey1a, loc1));
- ASSERT_EQ(cursor->next(), boost::none);
+ WriteUnitOfWork uow(opCtx.get());
+ ASSERT_OK(sorted->insert(opCtx.get(), compoundKey1a, loc1, true));
+ uow.commit();
}
}
- // Insert a compound key and try to locate it using a reverse cursor
- // by specifying its exact key and RecordId.
- TEST( SortedDataInterface, LocateCompoundKeyReversed ) {
- const std::unique_ptr<HarnessHelper> harnessHelper( newHarnessHelper() );
- const std::unique_ptr<SortedDataInterface> sorted( harnessHelper->newSortedDataInterface( false ) );
+ {
+ const std::unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
+ const std::unique_ptr<SortedDataInterface::Cursor> cursor(sorted->newCursor(opCtx.get()));
- {
- const std::unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- const std::unique_ptr<SortedDataInterface::Cursor> cursor( sorted->newCursor(opCtx.get(), false) );
- ASSERT( !cursor->seek( compoundKey1a, true ) );
- }
-
- {
- const std::unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- {
- WriteUnitOfWork uow( opCtx.get() );
- ASSERT_OK( sorted->insert( opCtx.get(), compoundKey1a, loc1, true ) );
- uow.commit();
- }
- }
+ ASSERT_EQ(cursor->seek(compoundKey1a, true), IndexKeyEntry(compoundKey1a, loc1));
+ ASSERT_EQ(cursor->next(), boost::none);
+ }
+}
+
+// Insert a compound key and try to locate it using a reverse cursor
+// by specifying its exact key and RecordId.
+TEST(SortedDataInterface, LocateCompoundKeyReversed) {
+ const std::unique_ptr<HarnessHelper> harnessHelper(newHarnessHelper());
+ const std::unique_ptr<SortedDataInterface> sorted(harnessHelper->newSortedDataInterface(false));
+
+ {
+ const std::unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
+ const std::unique_ptr<SortedDataInterface::Cursor> cursor(
+ sorted->newCursor(opCtx.get(), false));
+ ASSERT(!cursor->seek(compoundKey1a, true));
+ }
+ {
+ const std::unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
{
- const std::unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- const std::unique_ptr<SortedDataInterface::Cursor> cursor( sorted->newCursor(opCtx.get(), false) );
-
- ASSERT_EQ(cursor->seek(compoundKey1a, true), IndexKeyEntry(compoundKey1a, loc1));
- ASSERT_EQ(cursor->next(), boost::none);
+ WriteUnitOfWork uow(opCtx.get());
+ ASSERT_OK(sorted->insert(opCtx.get(), compoundKey1a, loc1, true));
+ uow.commit();
}
}
- // Insert multiple keys and try to locate them using a forward cursor
- // by specifying their exact key and RecordId.
- TEST( SortedDataInterface, LocateMultiple ) {
- const std::unique_ptr<HarnessHelper> harnessHelper( newHarnessHelper() );
- const std::unique_ptr<SortedDataInterface> sorted( harnessHelper->newSortedDataInterface( false ) );
+ {
+ const std::unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
+ const std::unique_ptr<SortedDataInterface::Cursor> cursor(
+ sorted->newCursor(opCtx.get(), false));
- {
- const std::unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- const std::unique_ptr<SortedDataInterface::Cursor> cursor( sorted->newCursor(opCtx.get()) );
- ASSERT( !cursor->seek( key1, true ) );
- }
+ ASSERT_EQ(cursor->seek(compoundKey1a, true), IndexKeyEntry(compoundKey1a, loc1));
+ ASSERT_EQ(cursor->next(), boost::none);
+ }
+}
+
+// Insert multiple keys and try to locate them using a forward cursor
+// by specifying their exact key and RecordId.
+TEST(SortedDataInterface, LocateMultiple) {
+ const std::unique_ptr<HarnessHelper> harnessHelper(newHarnessHelper());
+ const std::unique_ptr<SortedDataInterface> sorted(harnessHelper->newSortedDataInterface(false));
+
+ {
+ const std::unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
+ const std::unique_ptr<SortedDataInterface::Cursor> cursor(sorted->newCursor(opCtx.get()));
+ ASSERT(!cursor->seek(key1, true));
+ }
+ {
+ const std::unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
{
- const std::unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- {
- WriteUnitOfWork uow( opCtx.get() );
- ASSERT_OK( sorted->insert( opCtx.get(), key1, loc1, true ) );
- ASSERT_OK( sorted->insert( opCtx.get(), key2, loc2, true ) );
- uow.commit();
- }
+ WriteUnitOfWork uow(opCtx.get());
+ ASSERT_OK(sorted->insert(opCtx.get(), key1, loc1, true));
+ ASSERT_OK(sorted->insert(opCtx.get(), key2, loc2, true));
+ uow.commit();
}
+ }
- {
- const std::unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- const std::unique_ptr<SortedDataInterface::Cursor> cursor( sorted->newCursor(opCtx.get()) );
+ {
+ const std::unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
+ const std::unique_ptr<SortedDataInterface::Cursor> cursor(sorted->newCursor(opCtx.get()));
- ASSERT_EQ(cursor->seek(key1, true), IndexKeyEntry(key1, loc1));
- ASSERT_EQ(cursor->next(), IndexKeyEntry(key2, loc2));
- ASSERT_EQ(cursor->next(), boost::none);
- }
+ ASSERT_EQ(cursor->seek(key1, true), IndexKeyEntry(key1, loc1));
+ ASSERT_EQ(cursor->next(), IndexKeyEntry(key2, loc2));
+ ASSERT_EQ(cursor->next(), boost::none);
+ }
+ {
+ const std::unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
{
- const std::unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- {
- WriteUnitOfWork uow( opCtx.get() );
- ASSERT_OK( sorted->insert( opCtx.get(), key3, loc3, true ) );
- uow.commit();
- }
+ WriteUnitOfWork uow(opCtx.get());
+ ASSERT_OK(sorted->insert(opCtx.get(), key3, loc3, true));
+ uow.commit();
}
+ }
- {
- const std::unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- const std::unique_ptr<SortedDataInterface::Cursor> cursor( sorted->newCursor(opCtx.get()) );
+ {
+ const std::unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
+ const std::unique_ptr<SortedDataInterface::Cursor> cursor(sorted->newCursor(opCtx.get()));
- ASSERT_EQ(cursor->seek(key2, true), IndexKeyEntry(key2, loc2));
- ASSERT_EQ(cursor->next(), IndexKeyEntry(key3, loc3));
- ASSERT_EQ(cursor->next(), boost::none);
+ ASSERT_EQ(cursor->seek(key2, true), IndexKeyEntry(key2, loc2));
+ ASSERT_EQ(cursor->next(), IndexKeyEntry(key3, loc3));
+ ASSERT_EQ(cursor->next(), boost::none);
- ASSERT_EQ(cursor->seek(key1, true), IndexKeyEntry(key1, loc1));
- ASSERT_EQ(cursor->next(), IndexKeyEntry(key2, loc2));
- ASSERT_EQ(cursor->next(), IndexKeyEntry(key3, loc3));
- ASSERT_EQ(cursor->next(), boost::none);
- }
+ ASSERT_EQ(cursor->seek(key1, true), IndexKeyEntry(key1, loc1));
+ ASSERT_EQ(cursor->next(), IndexKeyEntry(key2, loc2));
+ ASSERT_EQ(cursor->next(), IndexKeyEntry(key3, loc3));
+ ASSERT_EQ(cursor->next(), boost::none);
+ }
+}
+
+// Insert multiple keys and try to locate them using a reverse cursor
+// by specifying their exact key and RecordId.
+TEST(SortedDataInterface, LocateMultipleReversed) {
+ const std::unique_ptr<HarnessHelper> harnessHelper(newHarnessHelper());
+ const std::unique_ptr<SortedDataInterface> sorted(harnessHelper->newSortedDataInterface(false));
+
+ {
+ const std::unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
+ const std::unique_ptr<SortedDataInterface::Cursor> cursor(
+ sorted->newCursor(opCtx.get(), false));
+ ASSERT(!cursor->seek(key3, true));
}
- // Insert multiple keys and try to locate them using a reverse cursor
- // by specifying their exact key and RecordId.
- TEST( SortedDataInterface, LocateMultipleReversed ) {
- const std::unique_ptr<HarnessHelper> harnessHelper( newHarnessHelper() );
- const std::unique_ptr<SortedDataInterface> sorted( harnessHelper->newSortedDataInterface( false ) );
-
- {
- const std::unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- const std::unique_ptr<SortedDataInterface::Cursor> cursor( sorted->newCursor(opCtx.get(), false) );
- ASSERT( !cursor->seek( key3, true ) );
- }
-
+ {
+ const std::unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
{
- const std::unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- {
- WriteUnitOfWork uow( opCtx.get() );
- ASSERT_OK( sorted->insert( opCtx.get(), key1, loc1, true ) );
- ASSERT_OK( sorted->insert( opCtx.get(), key2, loc2, true ) );
- uow.commit();
- }
+ WriteUnitOfWork uow(opCtx.get());
+ ASSERT_OK(sorted->insert(opCtx.get(), key1, loc1, true));
+ ASSERT_OK(sorted->insert(opCtx.get(), key2, loc2, true));
+ uow.commit();
}
+ }
- {
- const std::unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- const std::unique_ptr<SortedDataInterface::Cursor> cursor( sorted->newCursor(opCtx.get(), false) );
+ {
+ const std::unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
+ const std::unique_ptr<SortedDataInterface::Cursor> cursor(
+ sorted->newCursor(opCtx.get(), false));
- ASSERT_EQ(cursor->seek(key2, true), IndexKeyEntry(key2, loc2));
- ASSERT_EQ(cursor->next(), IndexKeyEntry(key1, loc1));
- ASSERT_EQ(cursor->next(), boost::none);
- }
+ ASSERT_EQ(cursor->seek(key2, true), IndexKeyEntry(key2, loc2));
+ ASSERT_EQ(cursor->next(), IndexKeyEntry(key1, loc1));
+ ASSERT_EQ(cursor->next(), boost::none);
+ }
+ {
+ const std::unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
{
- const std::unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- {
- WriteUnitOfWork uow( opCtx.get() );
- ASSERT_OK( sorted->insert( opCtx.get(), key3, loc3, true ) );
- uow.commit();
- }
+ WriteUnitOfWork uow(opCtx.get());
+ ASSERT_OK(sorted->insert(opCtx.get(), key3, loc3, true));
+ uow.commit();
}
+ }
- {
- const std::unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- const std::unique_ptr<SortedDataInterface::Cursor> cursor( sorted->newCursor(opCtx.get(), false) );
+ {
+ const std::unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
+ const std::unique_ptr<SortedDataInterface::Cursor> cursor(
+ sorted->newCursor(opCtx.get(), false));
- ASSERT_EQ(cursor->seek(key2, true), IndexKeyEntry(key2, loc2));
- ASSERT_EQ(cursor->next(), IndexKeyEntry(key1, loc1));
- ASSERT_EQ(cursor->next(), boost::none);
+ ASSERT_EQ(cursor->seek(key2, true), IndexKeyEntry(key2, loc2));
+ ASSERT_EQ(cursor->next(), IndexKeyEntry(key1, loc1));
+ ASSERT_EQ(cursor->next(), boost::none);
- ASSERT_EQ(cursor->seek(key3, true), IndexKeyEntry(key3, loc3));
- ASSERT_EQ(cursor->next(), IndexKeyEntry(key2, loc2));
- ASSERT_EQ(cursor->next(), IndexKeyEntry(key1, loc1));
- ASSERT_EQ(cursor->next(), boost::none);
- }
+ ASSERT_EQ(cursor->seek(key3, true), IndexKeyEntry(key3, loc3));
+ ASSERT_EQ(cursor->next(), IndexKeyEntry(key2, loc2));
+ ASSERT_EQ(cursor->next(), IndexKeyEntry(key1, loc1));
+ ASSERT_EQ(cursor->next(), boost::none);
+ }
+}
+
+// Insert multiple compound keys and try to locate them using a forward cursor
+// by specifying their exact key and RecordId.
+TEST(SortedDataInterface, LocateMultipleCompoundKeys) {
+ const std::unique_ptr<HarnessHelper> harnessHelper(newHarnessHelper());
+ const std::unique_ptr<SortedDataInterface> sorted(harnessHelper->newSortedDataInterface(false));
+
+ {
+ const std::unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
+ const std::unique_ptr<SortedDataInterface::Cursor> cursor(sorted->newCursor(opCtx.get()));
+ ASSERT(!cursor->seek(compoundKey1a, true));
}
- // Insert multiple compound keys and try to locate them using a forward cursor
- // by specifying their exact key and RecordId.
- TEST( SortedDataInterface, LocateMultipleCompoundKeys ) {
- const std::unique_ptr<HarnessHelper> harnessHelper( newHarnessHelper() );
- const std::unique_ptr<SortedDataInterface> sorted( harnessHelper->newSortedDataInterface( false ) );
-
- {
- const std::unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- const std::unique_ptr<SortedDataInterface::Cursor> cursor( sorted->newCursor(opCtx.get()) );
- ASSERT( !cursor->seek( compoundKey1a, true ) );
- }
-
+ {
+ const std::unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
{
- const std::unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- {
- WriteUnitOfWork uow( opCtx.get() );
- ASSERT_OK( sorted->insert( opCtx.get(), compoundKey1a, loc1, true ) );
- ASSERT_OK( sorted->insert( opCtx.get(), compoundKey1b, loc2, true ) );
- ASSERT_OK( sorted->insert( opCtx.get(), compoundKey2b, loc3, true ) );
- uow.commit();
- }
+ WriteUnitOfWork uow(opCtx.get());
+ ASSERT_OK(sorted->insert(opCtx.get(), compoundKey1a, loc1, true));
+ ASSERT_OK(sorted->insert(opCtx.get(), compoundKey1b, loc2, true));
+ ASSERT_OK(sorted->insert(opCtx.get(), compoundKey2b, loc3, true));
+ uow.commit();
}
+ }
- {
- const std::unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- const std::unique_ptr<SortedDataInterface::Cursor> cursor( sorted->newCursor(opCtx.get()) );
-
- ASSERT_EQ(cursor->seek(compoundKey1a, true), IndexKeyEntry(compoundKey1a, loc1));
- ASSERT_EQ(cursor->next(), IndexKeyEntry(compoundKey1b, loc2));
- ASSERT_EQ(cursor->next(), IndexKeyEntry(compoundKey2b, loc3));
- ASSERT_EQ(cursor->next(), boost::none);
- }
+ {
+ const std::unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
+ const std::unique_ptr<SortedDataInterface::Cursor> cursor(sorted->newCursor(opCtx.get()));
- {
- const std::unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- {
- WriteUnitOfWork uow( opCtx.get() );
- ASSERT_OK( sorted->insert( opCtx.get(), compoundKey1c, loc4, true ) );
- ASSERT_OK( sorted->insert( opCtx.get(), compoundKey3a, loc5, true ) );
- uow.commit();
- }
- }
+ ASSERT_EQ(cursor->seek(compoundKey1a, true), IndexKeyEntry(compoundKey1a, loc1));
+ ASSERT_EQ(cursor->next(), IndexKeyEntry(compoundKey1b, loc2));
+ ASSERT_EQ(cursor->next(), IndexKeyEntry(compoundKey2b, loc3));
+ ASSERT_EQ(cursor->next(), boost::none);
+ }
+ {
+ const std::unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
{
- const std::unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- const std::unique_ptr<SortedDataInterface::Cursor> cursor( sorted->newCursor(opCtx.get()) );
-
- ASSERT_EQ(cursor->seek(compoundKey1a, true), IndexKeyEntry(compoundKey1a, loc1));
- ASSERT_EQ(cursor->next(), IndexKeyEntry(compoundKey1b, loc2));
- ASSERT_EQ(cursor->next(), IndexKeyEntry(compoundKey1c, loc4));
- ASSERT_EQ(cursor->next(), IndexKeyEntry(compoundKey2b, loc3));
- ASSERT_EQ(cursor->next(), IndexKeyEntry(compoundKey3a, loc5));
- ASSERT_EQ(cursor->next(), boost::none);
+ WriteUnitOfWork uow(opCtx.get());
+ ASSERT_OK(sorted->insert(opCtx.get(), compoundKey1c, loc4, true));
+ ASSERT_OK(sorted->insert(opCtx.get(), compoundKey3a, loc5, true));
+ uow.commit();
}
}
- // Insert multiple compound keys and try to locate them using a reverse cursor
- // by specifying their exact key and RecordId.
- TEST( SortedDataInterface, LocateMultipleCompoundKeysReversed ) {
- const std::unique_ptr<HarnessHelper> harnessHelper( newHarnessHelper() );
- const std::unique_ptr<SortedDataInterface> sorted( harnessHelper->newSortedDataInterface( false ) );
+ {
+ const std::unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
+ const std::unique_ptr<SortedDataInterface::Cursor> cursor(sorted->newCursor(opCtx.get()));
- {
- const std::unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- const std::unique_ptr<SortedDataInterface::Cursor> cursor( sorted->newCursor(opCtx.get(), false) );
- ASSERT( !cursor->seek( compoundKey3a, true ) );
- }
+ ASSERT_EQ(cursor->seek(compoundKey1a, true), IndexKeyEntry(compoundKey1a, loc1));
+ ASSERT_EQ(cursor->next(), IndexKeyEntry(compoundKey1b, loc2));
+ ASSERT_EQ(cursor->next(), IndexKeyEntry(compoundKey1c, loc4));
+ ASSERT_EQ(cursor->next(), IndexKeyEntry(compoundKey2b, loc3));
+ ASSERT_EQ(cursor->next(), IndexKeyEntry(compoundKey3a, loc5));
+ ASSERT_EQ(cursor->next(), boost::none);
+ }
+}
+
+// Insert multiple compound keys and try to locate them using a reverse cursor
+// by specifying their exact key and RecordId.
+TEST(SortedDataInterface, LocateMultipleCompoundKeysReversed) {
+ const std::unique_ptr<HarnessHelper> harnessHelper(newHarnessHelper());
+ const std::unique_ptr<SortedDataInterface> sorted(harnessHelper->newSortedDataInterface(false));
+
+ {
+ const std::unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
+ const std::unique_ptr<SortedDataInterface::Cursor> cursor(
+ sorted->newCursor(opCtx.get(), false));
+ ASSERT(!cursor->seek(compoundKey3a, true));
+ }
+ {
+ const std::unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
{
- const std::unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- {
- WriteUnitOfWork uow( opCtx.get() );
- ASSERT_OK( sorted->insert( opCtx.get(), compoundKey1a, loc1, true ) );
- ASSERT_OK( sorted->insert( opCtx.get(), compoundKey1b, loc2, true ) );
- ASSERT_OK( sorted->insert( opCtx.get(), compoundKey2b, loc3, true ) );
- uow.commit();
- }
+ WriteUnitOfWork uow(opCtx.get());
+ ASSERT_OK(sorted->insert(opCtx.get(), compoundKey1a, loc1, true));
+ ASSERT_OK(sorted->insert(opCtx.get(), compoundKey1b, loc2, true));
+ ASSERT_OK(sorted->insert(opCtx.get(), compoundKey2b, loc3, true));
+ uow.commit();
}
+ }
- {
- const std::unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- const std::unique_ptr<SortedDataInterface::Cursor> cursor( sorted->newCursor(opCtx.get(), false) );
-
- ASSERT_EQ(cursor->seek(compoundKey2b, true), IndexKeyEntry(compoundKey2b, loc3));
- ASSERT_EQ(cursor->next(), IndexKeyEntry(compoundKey1b, loc2));
- ASSERT_EQ(cursor->next(), IndexKeyEntry(compoundKey1a, loc1));
- ASSERT_EQ(cursor->next(), boost::none);
- }
+ {
+ const std::unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
+ const std::unique_ptr<SortedDataInterface::Cursor> cursor(
+ sorted->newCursor(opCtx.get(), false));
- {
- const std::unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- {
- WriteUnitOfWork uow( opCtx.get() );
- ASSERT_OK( sorted->insert( opCtx.get(), compoundKey1c, loc4, true ) );
- ASSERT_OK( sorted->insert( opCtx.get(), compoundKey3a, loc5, true ) );
- uow.commit();
- }
- }
+ ASSERT_EQ(cursor->seek(compoundKey2b, true), IndexKeyEntry(compoundKey2b, loc3));
+ ASSERT_EQ(cursor->next(), IndexKeyEntry(compoundKey1b, loc2));
+ ASSERT_EQ(cursor->next(), IndexKeyEntry(compoundKey1a, loc1));
+ ASSERT_EQ(cursor->next(), boost::none);
+ }
+ {
+ const std::unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
{
- const std::unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- const std::unique_ptr<SortedDataInterface::Cursor> cursor( sorted->newCursor(opCtx.get(), false) );
-
- ASSERT_EQ(cursor->seek(compoundKey3a, true), IndexKeyEntry(compoundKey3a, loc5));
- ASSERT_EQ(cursor->next(), IndexKeyEntry(compoundKey2b, loc3));
- ASSERT_EQ(cursor->next(), IndexKeyEntry(compoundKey1c, loc4));
- ASSERT_EQ(cursor->next(), IndexKeyEntry(compoundKey1b, loc2));
- ASSERT_EQ(cursor->next(), IndexKeyEntry(compoundKey1a, loc1));
- ASSERT_EQ(cursor->next(), boost::none);
+ WriteUnitOfWork uow(opCtx.get());
+ ASSERT_OK(sorted->insert(opCtx.get(), compoundKey1c, loc4, true));
+ ASSERT_OK(sorted->insert(opCtx.get(), compoundKey3a, loc5, true));
+ uow.commit();
}
}
- // Insert multiple keys and try to locate them using a forward cursor
- // by specifying either a smaller key or RecordId.
- TEST( SortedDataInterface, LocateIndirect ) {
- const std::unique_ptr<HarnessHelper> harnessHelper( newHarnessHelper() );
- const std::unique_ptr<SortedDataInterface> sorted( harnessHelper->newSortedDataInterface( false ) );
-
- {
- const std::unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- const std::unique_ptr<SortedDataInterface::Cursor> cursor( sorted->newCursor(opCtx.get()) );
- ASSERT( !cursor->seek( key1, true ) );
- }
+ {
+ const std::unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
+ const std::unique_ptr<SortedDataInterface::Cursor> cursor(
+ sorted->newCursor(opCtx.get(), false));
+
+ ASSERT_EQ(cursor->seek(compoundKey3a, true), IndexKeyEntry(compoundKey3a, loc5));
+ ASSERT_EQ(cursor->next(), IndexKeyEntry(compoundKey2b, loc3));
+ ASSERT_EQ(cursor->next(), IndexKeyEntry(compoundKey1c, loc4));
+ ASSERT_EQ(cursor->next(), IndexKeyEntry(compoundKey1b, loc2));
+ ASSERT_EQ(cursor->next(), IndexKeyEntry(compoundKey1a, loc1));
+ ASSERT_EQ(cursor->next(), boost::none);
+ }
+}
+
+// Insert multiple keys and try to locate them using a forward cursor
+// by specifying either a smaller key or RecordId.
+TEST(SortedDataInterface, LocateIndirect) {
+ const std::unique_ptr<HarnessHelper> harnessHelper(newHarnessHelper());
+ const std::unique_ptr<SortedDataInterface> sorted(harnessHelper->newSortedDataInterface(false));
+
+ {
+ const std::unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
+ const std::unique_ptr<SortedDataInterface::Cursor> cursor(sorted->newCursor(opCtx.get()));
+ ASSERT(!cursor->seek(key1, true));
+ }
+ {
+ const std::unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
{
- const std::unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- {
- WriteUnitOfWork uow( opCtx.get() );
- ASSERT_OK( sorted->insert( opCtx.get(), key1, loc1, true ) );
- ASSERT_OK( sorted->insert( opCtx.get(), key2, loc2, true ) );
- uow.commit();
- }
+ WriteUnitOfWork uow(opCtx.get());
+ ASSERT_OK(sorted->insert(opCtx.get(), key1, loc1, true));
+ ASSERT_OK(sorted->insert(opCtx.get(), key2, loc2, true));
+ uow.commit();
}
+ }
- {
- const std::unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- const std::unique_ptr<SortedDataInterface::Cursor> cursor( sorted->newCursor(opCtx.get()) );
-
- ASSERT_EQ(cursor->seek(key1, false), IndexKeyEntry(key2, loc2));
- ASSERT_EQ(cursor->next(), boost::none);
- }
+ {
+ const std::unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
+ const std::unique_ptr<SortedDataInterface::Cursor> cursor(sorted->newCursor(opCtx.get()));
- {
- const std::unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- {
- WriteUnitOfWork uow( opCtx.get() );
- ASSERT_OK( sorted->insert( opCtx.get(), key3, loc3, true ) );
- uow.commit();
- }
- }
+ ASSERT_EQ(cursor->seek(key1, false), IndexKeyEntry(key2, loc2));
+ ASSERT_EQ(cursor->next(), boost::none);
+ }
+ {
+ const std::unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
{
- const std::unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- const std::unique_ptr<SortedDataInterface::Cursor> cursor( sorted->newCursor(opCtx.get()) );
-
- ASSERT_EQ(cursor->seek(key1, true), IndexKeyEntry(key1, loc1));
- ASSERT_EQ(cursor->next(), IndexKeyEntry(key2, loc2));
- ASSERT_EQ(cursor->next(), IndexKeyEntry(key3, loc3));
- ASSERT_EQ(cursor->next(), boost::none);
+ WriteUnitOfWork uow(opCtx.get());
+ ASSERT_OK(sorted->insert(opCtx.get(), key3, loc3, true));
+ uow.commit();
}
}
- // Insert multiple keys and try to locate them using a reverse cursor
- // by specifying either a larger key or RecordId.
- TEST( SortedDataInterface, LocateIndirectReversed ) {
- const std::unique_ptr<HarnessHelper> harnessHelper( newHarnessHelper() );
- const std::unique_ptr<SortedDataInterface> sorted( harnessHelper->newSortedDataInterface( false ) );
+ {
+ const std::unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
+ const std::unique_ptr<SortedDataInterface::Cursor> cursor(sorted->newCursor(opCtx.get()));
- {
- const std::unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- const std::unique_ptr<SortedDataInterface::Cursor> cursor( sorted->newCursor(opCtx.get(), false) );
- ASSERT( !cursor->seek( key3, true ) );
- }
+ ASSERT_EQ(cursor->seek(key1, true), IndexKeyEntry(key1, loc1));
+ ASSERT_EQ(cursor->next(), IndexKeyEntry(key2, loc2));
+ ASSERT_EQ(cursor->next(), IndexKeyEntry(key3, loc3));
+ ASSERT_EQ(cursor->next(), boost::none);
+ }
+}
+
+// Insert multiple keys and try to locate them using a reverse cursor
+// by specifying either a larger key or RecordId.
+TEST(SortedDataInterface, LocateIndirectReversed) {
+ const std::unique_ptr<HarnessHelper> harnessHelper(newHarnessHelper());
+ const std::unique_ptr<SortedDataInterface> sorted(harnessHelper->newSortedDataInterface(false));
+
+ {
+ const std::unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
+ const std::unique_ptr<SortedDataInterface::Cursor> cursor(
+ sorted->newCursor(opCtx.get(), false));
+ ASSERT(!cursor->seek(key3, true));
+ }
+ {
+ const std::unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
{
- const std::unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- {
- WriteUnitOfWork uow( opCtx.get() );
- ASSERT_OK( sorted->insert( opCtx.get(), key1, loc1, true ) );
- ASSERT_OK( sorted->insert( opCtx.get(), key2, loc2, true ) );
- uow.commit();
- }
+ WriteUnitOfWork uow(opCtx.get());
+ ASSERT_OK(sorted->insert(opCtx.get(), key1, loc1, true));
+ ASSERT_OK(sorted->insert(opCtx.get(), key2, loc2, true));
+ uow.commit();
}
+ }
- {
- const std::unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- const std::unique_ptr<SortedDataInterface::Cursor> cursor( sorted->newCursor(opCtx.get(), false) );
-
- ASSERT_EQ(cursor->seek(key2, false), IndexKeyEntry(key1, loc1));
- ASSERT_EQ(cursor->next(), boost::none);
- }
+ {
+ const std::unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
+ const std::unique_ptr<SortedDataInterface::Cursor> cursor(
+ sorted->newCursor(opCtx.get(), false));
- {
- const std::unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- {
- WriteUnitOfWork uow( opCtx.get() );
- ASSERT_OK( sorted->insert( opCtx.get(), key3, loc3, true ) );
- uow.commit();
- }
- }
+ ASSERT_EQ(cursor->seek(key2, false), IndexKeyEntry(key1, loc1));
+ ASSERT_EQ(cursor->next(), boost::none);
+ }
+ {
+ const std::unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
{
- const std::unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- const std::unique_ptr<SortedDataInterface::Cursor> cursor( sorted->newCursor(opCtx.get(), false) );
-
- ASSERT_EQ(cursor->seek(key3, true), IndexKeyEntry(key3, loc3));
- ASSERT_EQ(cursor->next(), IndexKeyEntry(key2, loc2));
- ASSERT_EQ(cursor->next(), IndexKeyEntry(key1, loc1));
- ASSERT_EQ(cursor->next(), boost::none);
+ WriteUnitOfWork uow(opCtx.get());
+ ASSERT_OK(sorted->insert(opCtx.get(), key3, loc3, true));
+ uow.commit();
}
}
- // Insert multiple compound keys and try to locate them using a forward cursor
- // by specifying either a smaller key or RecordId.
- TEST( SortedDataInterface, LocateIndirectCompoundKeys ) {
- const std::unique_ptr<HarnessHelper> harnessHelper( newHarnessHelper() );
- const std::unique_ptr<SortedDataInterface> sorted( harnessHelper->newSortedDataInterface( false ) );
+ {
+ const std::unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
+ const std::unique_ptr<SortedDataInterface::Cursor> cursor(
+ sorted->newCursor(opCtx.get(), false));
- {
- const std::unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- const std::unique_ptr<SortedDataInterface::Cursor> cursor( sorted->newCursor(opCtx.get()) );
- ASSERT( !cursor->seek( compoundKey1a, true ) );
- }
+ ASSERT_EQ(cursor->seek(key3, true), IndexKeyEntry(key3, loc3));
+ ASSERT_EQ(cursor->next(), IndexKeyEntry(key2, loc2));
+ ASSERT_EQ(cursor->next(), IndexKeyEntry(key1, loc1));
+ ASSERT_EQ(cursor->next(), boost::none);
+ }
+}
+
+// Insert multiple compound keys and try to locate them using a forward cursor
+// by specifying either a smaller key or RecordId.
+TEST(SortedDataInterface, LocateIndirectCompoundKeys) {
+ const std::unique_ptr<HarnessHelper> harnessHelper(newHarnessHelper());
+ const std::unique_ptr<SortedDataInterface> sorted(harnessHelper->newSortedDataInterface(false));
+
+ {
+ const std::unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
+ const std::unique_ptr<SortedDataInterface::Cursor> cursor(sorted->newCursor(opCtx.get()));
+ ASSERT(!cursor->seek(compoundKey1a, true));
+ }
+ {
+ const std::unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
{
- const std::unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- {
- WriteUnitOfWork uow( opCtx.get() );
- ASSERT_OK( sorted->insert( opCtx.get(), compoundKey1a, loc1, true ) );
- ASSERT_OK( sorted->insert( opCtx.get(), compoundKey1b, loc2, true ) );
- ASSERT_OK( sorted->insert( opCtx.get(), compoundKey2b, loc3, true ) );
- uow.commit();
- }
+ WriteUnitOfWork uow(opCtx.get());
+ ASSERT_OK(sorted->insert(opCtx.get(), compoundKey1a, loc1, true));
+ ASSERT_OK(sorted->insert(opCtx.get(), compoundKey1b, loc2, true));
+ ASSERT_OK(sorted->insert(opCtx.get(), compoundKey2b, loc3, true));
+ uow.commit();
}
+ }
- {
- const std::unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- const std::unique_ptr<SortedDataInterface::Cursor> cursor( sorted->newCursor(opCtx.get()) );
-
- ASSERT_EQ(cursor->seek(compoundKey1a, false), IndexKeyEntry(compoundKey1b, loc2));
- ASSERT_EQ(cursor->next(), IndexKeyEntry(compoundKey2b, loc3));
- ASSERT_EQ(cursor->next(), boost::none);
- }
+ {
+ const std::unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
+ const std::unique_ptr<SortedDataInterface::Cursor> cursor(sorted->newCursor(opCtx.get()));
- {
- const std::unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- {
- WriteUnitOfWork uow( opCtx.get() );
- ASSERT_OK( sorted->insert( opCtx.get(), compoundKey1c, loc4, true ) );
- ASSERT_OK( sorted->insert( opCtx.get(), compoundKey3a, loc5, true ) );
- uow.commit();
- }
- }
+ ASSERT_EQ(cursor->seek(compoundKey1a, false), IndexKeyEntry(compoundKey1b, loc2));
+ ASSERT_EQ(cursor->next(), IndexKeyEntry(compoundKey2b, loc3));
+ ASSERT_EQ(cursor->next(), boost::none);
+ }
+ {
+ const std::unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
{
- const std::unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- const std::unique_ptr<SortedDataInterface::Cursor> cursor( sorted->newCursor(opCtx.get()) );
-
- ASSERT_EQ(cursor->seek(compoundKey2a, true), IndexKeyEntry(compoundKey2b, loc3));
- ASSERT_EQ(cursor->next(), IndexKeyEntry(compoundKey3a, loc5));
- ASSERT_EQ(cursor->next(), boost::none);
+ WriteUnitOfWork uow(opCtx.get());
+ ASSERT_OK(sorted->insert(opCtx.get(), compoundKey1c, loc4, true));
+ ASSERT_OK(sorted->insert(opCtx.get(), compoundKey3a, loc5, true));
+ uow.commit();
}
}
- // Insert multiple compound keys and try to locate them using a reverse cursor
- // by specifying either a larger key or RecordId.
- TEST( SortedDataInterface, LocateIndirectCompoundKeysReversed ) {
- const std::unique_ptr<HarnessHelper> harnessHelper( newHarnessHelper() );
- const std::unique_ptr<SortedDataInterface> sorted( harnessHelper->newSortedDataInterface( false ) );
+ {
+ const std::unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
+ const std::unique_ptr<SortedDataInterface::Cursor> cursor(sorted->newCursor(opCtx.get()));
- {
- const std::unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- const std::unique_ptr<SortedDataInterface::Cursor> cursor( sorted->newCursor(opCtx.get(), false) );
- ASSERT( !cursor->seek( compoundKey3a, true ) );
- }
+ ASSERT_EQ(cursor->seek(compoundKey2a, true), IndexKeyEntry(compoundKey2b, loc3));
+ ASSERT_EQ(cursor->next(), IndexKeyEntry(compoundKey3a, loc5));
+ ASSERT_EQ(cursor->next(), boost::none);
+ }
+}
+
+// Insert multiple compound keys and try to locate them using a reverse cursor
+// by specifying either a larger key or RecordId.
+TEST(SortedDataInterface, LocateIndirectCompoundKeysReversed) {
+ const std::unique_ptr<HarnessHelper> harnessHelper(newHarnessHelper());
+ const std::unique_ptr<SortedDataInterface> sorted(harnessHelper->newSortedDataInterface(false));
+
+ {
+ const std::unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
+ const std::unique_ptr<SortedDataInterface::Cursor> cursor(
+ sorted->newCursor(opCtx.get(), false));
+ ASSERT(!cursor->seek(compoundKey3a, true));
+ }
+ {
+ const std::unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
{
- const std::unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- {
- WriteUnitOfWork uow( opCtx.get() );
- ASSERT_OK( sorted->insert( opCtx.get(), compoundKey1a, loc1, true ) );
- ASSERT_OK( sorted->insert( opCtx.get(), compoundKey1b, loc2, true ) );
- ASSERT_OK( sorted->insert( opCtx.get(), compoundKey2b, loc3, true ) );
- uow.commit();
- }
+ WriteUnitOfWork uow(opCtx.get());
+ ASSERT_OK(sorted->insert(opCtx.get(), compoundKey1a, loc1, true));
+ ASSERT_OK(sorted->insert(opCtx.get(), compoundKey1b, loc2, true));
+ ASSERT_OK(sorted->insert(opCtx.get(), compoundKey2b, loc3, true));
+ uow.commit();
}
+ }
- {
- const std::unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- const std::unique_ptr<SortedDataInterface::Cursor> cursor( sorted->newCursor(opCtx.get(), false) );
+ {
+ const std::unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
+ const std::unique_ptr<SortedDataInterface::Cursor> cursor(
+ sorted->newCursor(opCtx.get(), false));
- ASSERT_EQ(cursor->seek(compoundKey2b, false), IndexKeyEntry(compoundKey1b, loc2));
- ASSERT_EQ(cursor->next(), IndexKeyEntry(compoundKey1a, loc1));
- ASSERT_EQ(cursor->next(), boost::none);
- }
+ ASSERT_EQ(cursor->seek(compoundKey2b, false), IndexKeyEntry(compoundKey1b, loc2));
+ ASSERT_EQ(cursor->next(), IndexKeyEntry(compoundKey1a, loc1));
+ ASSERT_EQ(cursor->next(), boost::none);
+ }
+ {
+ const std::unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
{
- const std::unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- {
- WriteUnitOfWork uow( opCtx.get() );
- ASSERT_OK( sorted->insert( opCtx.get(), compoundKey1c, loc4, true ) );
- ASSERT_OK( sorted->insert( opCtx.get(), compoundKey3a, loc5, true ) );
- uow.commit();
- }
+ WriteUnitOfWork uow(opCtx.get());
+ ASSERT_OK(sorted->insert(opCtx.get(), compoundKey1c, loc4, true));
+ ASSERT_OK(sorted->insert(opCtx.get(), compoundKey3a, loc5, true));
+ uow.commit();
}
+ }
- {
- const std::unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- const std::unique_ptr<SortedDataInterface::Cursor> cursor( sorted->newCursor(opCtx.get(), false) );
+ {
+ const std::unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
+ const std::unique_ptr<SortedDataInterface::Cursor> cursor(
+ sorted->newCursor(opCtx.get(), false));
- ASSERT_EQ(cursor->seek(compoundKey1d, true), IndexKeyEntry(compoundKey1c, loc4));
- ASSERT_EQ(cursor->next(), IndexKeyEntry(compoundKey1b, loc2));
- ASSERT_EQ(cursor->next(), IndexKeyEntry(compoundKey1a, loc1));
- ASSERT_EQ(cursor->next(), boost::none);
- }
+ ASSERT_EQ(cursor->seek(compoundKey1d, true), IndexKeyEntry(compoundKey1c, loc4));
+ ASSERT_EQ(cursor->next(), IndexKeyEntry(compoundKey1b, loc2));
+ ASSERT_EQ(cursor->next(), IndexKeyEntry(compoundKey1a, loc1));
+ ASSERT_EQ(cursor->next(), boost::none);
}
+}
- // Call locate on a forward cursor of an empty index and verify that the cursor
- // is positioned at EOF.
- TEST( SortedDataInterface, LocateEmpty ) {
- const std::unique_ptr<HarnessHelper> harnessHelper( newHarnessHelper() );
- const std::unique_ptr<SortedDataInterface> sorted( harnessHelper->newSortedDataInterface( false ) );
+// Call locate on a forward cursor of an empty index and verify that the cursor
+// is positioned at EOF.
+TEST(SortedDataInterface, LocateEmpty) {
+ const std::unique_ptr<HarnessHelper> harnessHelper(newHarnessHelper());
+ const std::unique_ptr<SortedDataInterface> sorted(harnessHelper->newSortedDataInterface(false));
- {
- const std::unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- ASSERT( sorted->isEmpty( opCtx.get() ) );
- }
+ {
+ const std::unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
+ ASSERT(sorted->isEmpty(opCtx.get()));
+ }
- {
- const std::unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- const std::unique_ptr<SortedDataInterface::Cursor> cursor( sorted->newCursor(opCtx.get()) );
+ {
+ const std::unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
+ const std::unique_ptr<SortedDataInterface::Cursor> cursor(sorted->newCursor(opCtx.get()));
- ASSERT( !cursor->seek( BSONObj(), true ) );
- ASSERT( !cursor->next() );
- }
+ ASSERT(!cursor->seek(BSONObj(), true));
+ ASSERT(!cursor->next());
}
+}
- // Call locate on a reverse cursor of an empty index and verify that the cursor
- // is positioned at EOF.
- TEST( SortedDataInterface, LocateEmptyReversed ) {
- const std::unique_ptr<HarnessHelper> harnessHelper( newHarnessHelper() );
- const std::unique_ptr<SortedDataInterface> sorted( harnessHelper->newSortedDataInterface( false ) );
+// Call locate on a reverse cursor of an empty index and verify that the cursor
+// is positioned at EOF.
+TEST(SortedDataInterface, LocateEmptyReversed) {
+ const std::unique_ptr<HarnessHelper> harnessHelper(newHarnessHelper());
+ const std::unique_ptr<SortedDataInterface> sorted(harnessHelper->newSortedDataInterface(false));
- {
- const std::unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- ASSERT( sorted->isEmpty( opCtx.get() ) );
- }
+ {
+ const std::unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
+ ASSERT(sorted->isEmpty(opCtx.get()));
+ }
- {
- const std::unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- const std::unique_ptr<SortedDataInterface::Cursor> cursor( sorted->newCursor(opCtx.get(), false) );
+ {
+ const std::unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
+ const std::unique_ptr<SortedDataInterface::Cursor> cursor(
+ sorted->newCursor(opCtx.get(), false));
- ASSERT( !cursor->seek( BSONObj(), true ) );
- ASSERT( !cursor->next() );
- }
+ ASSERT(!cursor->seek(BSONObj(), true));
+ ASSERT(!cursor->next());
}
+}
-} // namespace mongo
+} // namespace mongo
diff --git a/src/mongo/db/storage/sorted_data_interface_test_cursor_saverestore.cpp b/src/mongo/db/storage/sorted_data_interface_test_cursor_saverestore.cpp
index aa1b2adfe00..679bb3f8c8b 100644
--- a/src/mongo/db/storage/sorted_data_interface_test_cursor_saverestore.cpp
+++ b/src/mongo/db/storage/sorted_data_interface_test_cursor_saverestore.cpp
@@ -37,480 +37,481 @@
namespace mongo {
- // Insert multiple keys and try to iterate through all of them
- // using a forward cursor while calling savePosition() and
- // restorePosition() in succession.
- TEST( SortedDataInterface, SaveAndRestorePositionWhileIterateCursor ) {
- const std::unique_ptr<HarnessHelper> harnessHelper( newHarnessHelper() );
- const std::unique_ptr<SortedDataInterface> sorted( harnessHelper->newSortedDataInterface( false ) );
-
- {
- const std::unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- ASSERT( sorted->isEmpty( opCtx.get() ) );
- }
-
- int nToInsert = 10;
- for ( int i = 0; i < nToInsert; i++ ) {
- const std::unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- {
- WriteUnitOfWork uow( opCtx.get() );
- BSONObj key = BSON( "" << i );
- RecordId loc( 42, i * 2 );
- ASSERT_OK( sorted->insert( opCtx.get(), key, loc, true ) );
- uow.commit();
- }
- }
-
- {
- const std::unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- ASSERT_EQUALS( nToInsert, sorted->numEntries( opCtx.get() ) );
- }
-
- {
- const std::unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- const std::unique_ptr<SortedDataInterface::Cursor> cursor( sorted->newCursor(opCtx.get()) );
- int i = 0;
- for (auto entry = cursor->seek(minKey, true); entry; i++, entry = cursor->next()) {
- ASSERT_LT(i, nToInsert);
- ASSERT_EQ(entry, IndexKeyEntry(BSON( "" << i), RecordId(42, i * 2)));
-
- cursor->savePositioned();
- cursor->restore( opCtx.get() );
- }
- ASSERT( !cursor->next() );
- ASSERT_EQ(i, nToInsert);
- }
+// Insert multiple keys and try to iterate through all of them
+// using a forward cursor while calling savePosition() and
+// restorePosition() in succession.
+TEST(SortedDataInterface, SaveAndRestorePositionWhileIterateCursor) {
+ const std::unique_ptr<HarnessHelper> harnessHelper(newHarnessHelper());
+ const std::unique_ptr<SortedDataInterface> sorted(harnessHelper->newSortedDataInterface(false));
+
+ {
+ const std::unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
+ ASSERT(sorted->isEmpty(opCtx.get()));
}
- // Insert multiple keys and try to iterate through all of them
- // using a reverse cursor while calling savePosition() and
- // restorePosition() in succession.
- TEST( SortedDataInterface, SaveAndRestorePositionWhileIterateCursorReversed ) {
- const std::unique_ptr<HarnessHelper> harnessHelper( newHarnessHelper() );
- const std::unique_ptr<SortedDataInterface> sorted( harnessHelper->newSortedDataInterface( false ) );
-
+ int nToInsert = 10;
+ for (int i = 0; i < nToInsert; i++) {
+ const std::unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
{
- const std::unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- ASSERT( sorted->isEmpty( opCtx.get() ) );
+ WriteUnitOfWork uow(opCtx.get());
+ BSONObj key = BSON("" << i);
+ RecordId loc(42, i * 2);
+ ASSERT_OK(sorted->insert(opCtx.get(), key, loc, true));
+ uow.commit();
}
+ }
- int nToInsert = 10;
- for ( int i = 0; i < nToInsert; i++ ) {
- const std::unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- {
- WriteUnitOfWork uow( opCtx.get() );
- BSONObj key = BSON( "" << i );
- RecordId loc( 42, i * 2 );
- ASSERT_OK( sorted->insert( opCtx.get(), key, loc, true ) );
- uow.commit();
- }
- }
+ {
+ const std::unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
+ ASSERT_EQUALS(nToInsert, sorted->numEntries(opCtx.get()));
+ }
- {
- const std::unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- ASSERT_EQUALS( nToInsert, sorted->numEntries( opCtx.get() ) );
- }
+ {
+ const std::unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
+ const std::unique_ptr<SortedDataInterface::Cursor> cursor(sorted->newCursor(opCtx.get()));
+ int i = 0;
+ for (auto entry = cursor->seek(minKey, true); entry; i++, entry = cursor->next()) {
+ ASSERT_LT(i, nToInsert);
+ ASSERT_EQ(entry, IndexKeyEntry(BSON("" << i), RecordId(42, i * 2)));
- {
- const std::unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- const std::unique_ptr<SortedDataInterface::Cursor> cursor( sorted->newCursor(opCtx.get(), false) );
- int i = nToInsert - 1;
- for (auto entry = cursor->seek(maxKey, true); entry; i--, entry = cursor->next()) {
- ASSERT_GTE(i, 0);
- ASSERT_EQ(entry, IndexKeyEntry(BSON( "" << i), RecordId(42, i * 2)));
-
- cursor->savePositioned();
- cursor->restore( opCtx.get() );
- }
- ASSERT( !cursor->next() );
- ASSERT_EQ(i, -1);
+ cursor->savePositioned();
+ cursor->restore(opCtx.get());
}
+ ASSERT(!cursor->next());
+ ASSERT_EQ(i, nToInsert);
+ }
+}
+
+// Insert multiple keys and try to iterate through all of them
+// using a reverse cursor while calling savePosition() and
+// restorePosition() in succession.
+TEST(SortedDataInterface, SaveAndRestorePositionWhileIterateCursorReversed) {
+ const std::unique_ptr<HarnessHelper> harnessHelper(newHarnessHelper());
+ const std::unique_ptr<SortedDataInterface> sorted(harnessHelper->newSortedDataInterface(false));
+
+ {
+ const std::unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
+ ASSERT(sorted->isEmpty(opCtx.get()));
}
- // Insert the same key multiple times and try to iterate through each
- // occurrence using a forward cursor while calling savePosition() and
- // restorePosition() in succession. Verify that the RecordId is saved
- // as part of the current position of the cursor.
- TEST( SortedDataInterface, SaveAndRestorePositionWhileIterateCursorWithDupKeys ) {
- const std::unique_ptr<HarnessHelper> harnessHelper( newHarnessHelper() );
- const std::unique_ptr<SortedDataInterface> sorted( harnessHelper->newSortedDataInterface( false ) );
-
+ int nToInsert = 10;
+ for (int i = 0; i < nToInsert; i++) {
+ const std::unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
{
- const std::unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- ASSERT( sorted->isEmpty( opCtx.get() ) );
+ WriteUnitOfWork uow(opCtx.get());
+ BSONObj key = BSON("" << i);
+ RecordId loc(42, i * 2);
+ ASSERT_OK(sorted->insert(opCtx.get(), key, loc, true));
+ uow.commit();
}
+ }
- int nToInsert = 10;
- for ( int i = 0; i < nToInsert; i++ ) {
- const std::unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- {
- WriteUnitOfWork uow( opCtx.get() );
- RecordId loc( 42, i * 2 );
- ASSERT_OK( sorted->insert( opCtx.get(), key1, loc, true /* allow duplicates */ ) );
- uow.commit();
- }
- }
+ {
+ const std::unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
+ ASSERT_EQUALS(nToInsert, sorted->numEntries(opCtx.get()));
+ }
- {
- const std::unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- ASSERT_EQUALS( nToInsert, sorted->numEntries( opCtx.get() ) );
- }
+ {
+ const std::unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
+ const std::unique_ptr<SortedDataInterface::Cursor> cursor(
+ sorted->newCursor(opCtx.get(), false));
+ int i = nToInsert - 1;
+ for (auto entry = cursor->seek(maxKey, true); entry; i--, entry = cursor->next()) {
+ ASSERT_GTE(i, 0);
+ ASSERT_EQ(entry, IndexKeyEntry(BSON("" << i), RecordId(42, i * 2)));
- {
- const std::unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- const std::unique_ptr<SortedDataInterface::Cursor> cursor( sorted->newCursor(opCtx.get()) );
- int i = 0;
- for (auto entry = cursor->seek(minKey, true); entry; i++, entry = cursor->next()) {
- ASSERT_LT(i, nToInsert);
- ASSERT_EQ(entry, IndexKeyEntry(key1, RecordId(42, i * 2)));
-
- cursor->savePositioned();
- cursor->restore( opCtx.get() );
- }
- ASSERT( !cursor->next() );
- ASSERT_EQ(i, nToInsert);
+ cursor->savePositioned();
+ cursor->restore(opCtx.get());
}
+ ASSERT(!cursor->next());
+ ASSERT_EQ(i, -1);
+ }
+}
+
+// Insert the same key multiple times and try to iterate through each
+// occurrence using a forward cursor while calling savePosition() and
+// restorePosition() in succession. Verify that the RecordId is saved
+// as part of the current position of the cursor.
+TEST(SortedDataInterface, SaveAndRestorePositionWhileIterateCursorWithDupKeys) {
+ const std::unique_ptr<HarnessHelper> harnessHelper(newHarnessHelper());
+ const std::unique_ptr<SortedDataInterface> sorted(harnessHelper->newSortedDataInterface(false));
+
+ {
+ const std::unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
+ ASSERT(sorted->isEmpty(opCtx.get()));
}
- // Insert the same key multiple times and try to iterate through each
- // occurrence using a reverse cursor while calling savePosition() and
- // restorePosition() in succession. Verify that the RecordId is saved
- // as part of the current position of the cursor.
- TEST( SortedDataInterface, SaveAndRestorePositionWhileIterateCursorWithDupKeysReversed ) {
- const std::unique_ptr<HarnessHelper> harnessHelper( newHarnessHelper() );
- const std::unique_ptr<SortedDataInterface> sorted( harnessHelper->newSortedDataInterface( false ) );
-
+ int nToInsert = 10;
+ for (int i = 0; i < nToInsert; i++) {
+ const std::unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
{
- const std::unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- ASSERT( sorted->isEmpty( opCtx.get() ) );
+ WriteUnitOfWork uow(opCtx.get());
+ RecordId loc(42, i * 2);
+ ASSERT_OK(sorted->insert(opCtx.get(), key1, loc, true /* allow duplicates */));
+ uow.commit();
}
+ }
- int nToInsert = 10;
- for ( int i = 0; i < nToInsert; i++ ) {
- const std::unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- {
- WriteUnitOfWork uow( opCtx.get() );
- RecordId loc( 42, i * 2 );
- ASSERT_OK( sorted->insert( opCtx.get(), key1, loc, true /* allow duplicates */ ) );
- uow.commit();
- }
- }
+ {
+ const std::unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
+ ASSERT_EQUALS(nToInsert, sorted->numEntries(opCtx.get()));
+ }
- {
- const std::unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- ASSERT_EQUALS( nToInsert, sorted->numEntries( opCtx.get() ) );
- }
+ {
+ const std::unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
+ const std::unique_ptr<SortedDataInterface::Cursor> cursor(sorted->newCursor(opCtx.get()));
+ int i = 0;
+ for (auto entry = cursor->seek(minKey, true); entry; i++, entry = cursor->next()) {
+ ASSERT_LT(i, nToInsert);
+ ASSERT_EQ(entry, IndexKeyEntry(key1, RecordId(42, i * 2)));
- {
- const std::unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- const std::unique_ptr<SortedDataInterface::Cursor> cursor( sorted->newCursor(opCtx.get(), false) );
- int i = nToInsert - 1;
- for (auto entry = cursor->seek(maxKey, true); entry; i--, entry = cursor->next()) {
- ASSERT_GTE(i, 0);
- ASSERT_EQ(entry, IndexKeyEntry(key1, RecordId(42, i * 2)));
-
- cursor->savePositioned();
- cursor->restore( opCtx.get() );
- }
- ASSERT( !cursor->next() );
- ASSERT_EQ(i, -1);
+ cursor->savePositioned();
+ cursor->restore(opCtx.get());
}
+ ASSERT(!cursor->next());
+ ASSERT_EQ(i, nToInsert);
+ }
+}
+
+// Insert the same key multiple times and try to iterate through each
+// occurrence using a reverse cursor while calling savePosition() and
+// restorePosition() in succession. Verify that the RecordId is saved
+// as part of the current position of the cursor.
+TEST(SortedDataInterface, SaveAndRestorePositionWhileIterateCursorWithDupKeysReversed) {
+ const std::unique_ptr<HarnessHelper> harnessHelper(newHarnessHelper());
+ const std::unique_ptr<SortedDataInterface> sorted(harnessHelper->newSortedDataInterface(false));
+
+ {
+ const std::unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
+ ASSERT(sorted->isEmpty(opCtx.get()));
}
- // Call savePosition() on a forward cursor without ever calling restorePosition().
- // May be useful to run this test under valgrind to verify there are no leaks.
- TEST( SortedDataInterface, SavePositionWithoutRestore ) {
- const std::unique_ptr<HarnessHelper> harnessHelper( newHarnessHelper() );
- const std::unique_ptr<SortedDataInterface> sorted( harnessHelper->newSortedDataInterface( true ) );
-
+ int nToInsert = 10;
+ for (int i = 0; i < nToInsert; i++) {
+ const std::unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
{
- const std::unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- ASSERT( sorted->isEmpty( opCtx.get() ) );
+ WriteUnitOfWork uow(opCtx.get());
+ RecordId loc(42, i * 2);
+ ASSERT_OK(sorted->insert(opCtx.get(), key1, loc, true /* allow duplicates */));
+ uow.commit();
}
+ }
- {
- const std::unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- {
- WriteUnitOfWork uow( opCtx.get() );
- ASSERT_OK( sorted->insert( opCtx.get(), key1, loc1, false ) );
- uow.commit();
- }
- }
+ {
+ const std::unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
+ ASSERT_EQUALS(nToInsert, sorted->numEntries(opCtx.get()));
+ }
- {
- const std::unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- ASSERT_EQUALS( 1, sorted->numEntries( opCtx.get() ) );
- }
+ {
+ const std::unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
+ const std::unique_ptr<SortedDataInterface::Cursor> cursor(
+ sorted->newCursor(opCtx.get(), false));
+ int i = nToInsert - 1;
+ for (auto entry = cursor->seek(maxKey, true); entry; i--, entry = cursor->next()) {
+ ASSERT_GTE(i, 0);
+ ASSERT_EQ(entry, IndexKeyEntry(key1, RecordId(42, i * 2)));
- {
- const std::unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- const std::unique_ptr<SortedDataInterface::Cursor> cursor( sorted->newCursor(opCtx.get()) );
cursor->savePositioned();
+ cursor->restore(opCtx.get());
}
+ ASSERT(!cursor->next());
+ ASSERT_EQ(i, -1);
}
+}
- // Call savePosition() on a reverse cursor without ever calling restorePosition().
- // May be useful to run this test under valgrind to verify there are no leaks.
- TEST( SortedDataInterface, SavePositionWithoutRestoreReversed ) {
- const std::unique_ptr<HarnessHelper> harnessHelper( newHarnessHelper() );
- const std::unique_ptr<SortedDataInterface> sorted( harnessHelper->newSortedDataInterface( false ) );
-
- {
- const std::unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- ASSERT( sorted->isEmpty( opCtx.get() ) );
- }
-
- {
- const std::unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- {
- WriteUnitOfWork uow( opCtx.get() );
- ASSERT_OK( sorted->insert( opCtx.get(), key1, loc1, true ) );
- uow.commit();
- }
- }
+// Call savePosition() on a forward cursor without ever calling restorePosition().
+// May be useful to run this test under valgrind to verify there are no leaks.
+TEST(SortedDataInterface, SavePositionWithoutRestore) {
+ const std::unique_ptr<HarnessHelper> harnessHelper(newHarnessHelper());
+ const std::unique_ptr<SortedDataInterface> sorted(harnessHelper->newSortedDataInterface(true));
- {
- const std::unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- ASSERT_EQUALS( 1, sorted->numEntries( opCtx.get() ) );
- }
+ {
+ const std::unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
+ ASSERT(sorted->isEmpty(opCtx.get()));
+ }
+ {
+ const std::unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
{
- const std::unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- const std::unique_ptr<SortedDataInterface::Cursor> cursor( sorted->newCursor(opCtx.get(), false) );
- cursor->savePositioned();
+ WriteUnitOfWork uow(opCtx.get());
+ ASSERT_OK(sorted->insert(opCtx.get(), key1, loc1, false));
+ uow.commit();
}
}
-
- // Ensure that restore lands as close as possible to original position, even if data inserted
- // while saved.
- void testSaveAndRestorePositionSeesNewInserts(bool forward, bool unique) {
- auto harnessHelper = newHarnessHelper();
- auto opCtx = harnessHelper->newOperationContext();
- auto sorted = harnessHelper->newSortedDataInterface(unique, {
- {key1, loc1},
- {key3, loc1},
- });
-
- auto cursor = sorted->newCursor(opCtx.get(), forward);
- const auto seekPoint = forward ? key1 : key3;
-
- ASSERT_EQ(cursor->seek(seekPoint, true), IndexKeyEntry(seekPoint, loc1));
- cursor->savePositioned();
- insertToIndex(opCtx, sorted, {{key2, loc1}});
- cursor->restore(opCtx.get());
-
- ASSERT_EQ(cursor->next(), IndexKeyEntry(key2, loc1));
- }
- TEST(SortedDataInterface, SaveAndRestorePositionSeesNewInserts_Forward_Unique) {
- testSaveAndRestorePositionSeesNewInserts(true, true);
+ {
+ const std::unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
+ ASSERT_EQUALS(1, sorted->numEntries(opCtx.get()));
}
- TEST(SortedDataInterface, SaveAndRestorePositionSeesNewInserts_Forward_Standard) {
- testSaveAndRestorePositionSeesNewInserts(true, false);
- }
- TEST(SortedDataInterface, SaveAndRestorePositionSeesNewInserts_Reverse_Unique) {
- testSaveAndRestorePositionSeesNewInserts(false, true);
- }
- TEST(SortedDataInterface, SaveAndRestorePositionSeesNewInserts_Reverse_Standard) {
- testSaveAndRestorePositionSeesNewInserts(false, false);
- }
-
- // Ensure that repeated restores lands as close as possible to original position, even if data
- // inserted while saved and the current position removed.
- void testSaveAndRestorePositionSeesNewInsertsAfterRemove(bool forward, bool unique) {
- auto harnessHelper = newHarnessHelper();
- auto opCtx = harnessHelper->newOperationContext();
- auto sorted = harnessHelper->newSortedDataInterface(unique, {
- {key1, loc1},
- {key3, loc1},
- });
-
- auto cursor = sorted->newCursor(opCtx.get(), forward);
- const auto seekPoint = forward ? key1 : key3;
-
- ASSERT_EQ(cursor->seek(seekPoint, true), IndexKeyEntry(seekPoint, loc1));
+ {
+ const std::unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
+ const std::unique_ptr<SortedDataInterface::Cursor> cursor(sorted->newCursor(opCtx.get()));
cursor->savePositioned();
- removeFromIndex(opCtx, sorted, {{key1, loc1}});
- cursor->restore(opCtx.get());
- // The restore may have seeked since it can't return to the saved position.
-
- cursor->savePositioned(); // Should still save originally saved key as "current position".
- insertToIndex(opCtx, sorted, {{key2, loc1}});
- cursor->restore(opCtx.get());
-
- ASSERT_EQ(cursor->next(), IndexKeyEntry(key2, loc1));
- }
- TEST(SortedDataInterface, SaveAndRestorePositionSeesNewInsertsAfterRemove_Forward_Unique) {
- testSaveAndRestorePositionSeesNewInsertsAfterRemove(true, true);
- }
- TEST(SortedDataInterface, SaveAndRestorePositionSeesNewInsertsAfterRemove_Forward_Standard) {
- testSaveAndRestorePositionSeesNewInsertsAfterRemove(true, false);
- }
- TEST(SortedDataInterface, SaveAndRestorePositionSeesNewInsertsAfterRemove_Reverse_Unique) {
- testSaveAndRestorePositionSeesNewInsertsAfterRemove(false, true);
- }
- TEST(SortedDataInterface, SaveAndRestorePositionSeesNewInsertsAfterRemove_Reverse_Standard) {
- testSaveAndRestorePositionSeesNewInsertsAfterRemove(false, false);
}
+}
- // Ensure that repeated restores lands as close as possible to original position, even if data
- // inserted while saved and the current position removed in a way that temporarily makes the
- // cursor EOF.
- void testSaveAndRestorePositionSeesNewInsertsAfterEOF(bool forward, bool unique) {
- auto harnessHelper = newHarnessHelper();
- auto opCtx = harnessHelper->newOperationContext();
- auto sorted = harnessHelper->newSortedDataInterface(false, {
- {key1, loc1},
- });
+// Call savePosition() on a reverse cursor without ever calling restorePosition().
+// May be useful to run this test under valgrind to verify there are no leaks.
+TEST(SortedDataInterface, SavePositionWithoutRestoreReversed) {
+ const std::unique_ptr<HarnessHelper> harnessHelper(newHarnessHelper());
+ const std::unique_ptr<SortedDataInterface> sorted(harnessHelper->newSortedDataInterface(false));
- auto cursor = sorted->newCursor(opCtx.get(), forward);
-
- ASSERT_EQ(cursor->seek(key1, true), IndexKeyEntry(key1, loc1));
- // next() would return EOF now.
-
- cursor->savePositioned();
- removeFromIndex(opCtx, sorted, {{key1, loc1}});
- cursor->restore(opCtx.get());
- // The restore may have seeked to EOF.
-
- auto insertPoint = forward ? key2 : key0;
- cursor->savePositioned(); // Should still save key1 as "current position".
- insertToIndex(opCtx, sorted, {{insertPoint, loc1}});
- cursor->restore(opCtx.get());
-
- ASSERT_EQ(cursor->next(), IndexKeyEntry(insertPoint, loc1));
- }
-
- TEST(SortedDataInterface, SaveAndRestorePositionSeesNewInsertsAfterEOF_Forward_Unique) {
- testSaveAndRestorePositionSeesNewInsertsAfterEOF(true, true);
- }
- TEST(SortedDataInterface, SaveAndRestorePositionSeesNewInsertsAfterEOF_Forward_Standard) {
- testSaveAndRestorePositionSeesNewInsertsAfterEOF(true, false);
+ {
+ const std::unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
+ ASSERT(sorted->isEmpty(opCtx.get()));
}
- TEST(SortedDataInterface, SaveAndRestorePositionSeesNewInsertsAfterEOF_Reverse_Unique) {
- testSaveAndRestorePositionSeesNewInsertsAfterEOF(false, true);
- }
- TEST(SortedDataInterface, SaveAndRestorePositionSeesNewInsertsAfterEOF_Reverse_Standard) {
- testSaveAndRestorePositionSeesNewInsertsAfterEOF(false, false);
- }
-
- // Make sure we restore to a RecordId at or ahead of save point if same key.
- void testSaveAndRestorePositionConsidersRecordId_Forward(bool unique) {
- auto harnessHelper = newHarnessHelper();
- auto opCtx = harnessHelper->newOperationContext();
- auto sorted = harnessHelper->newSortedDataInterface(unique, {
- {key1, loc1},
- {key2, loc1},
- {key3, loc1},
- });
-
- auto cursor = sorted->newCursor(opCtx.get());
-
- ASSERT_EQ(cursor->seek(key1, true), IndexKeyEntry(key1, loc1));
-
- cursor->savePositioned();
- removeFromIndex(opCtx, sorted, {{key1, loc1}});
- insertToIndex(opCtx, sorted, {{key1, loc2}});
- cursor->restore(opCtx.get()); // Lands on inserted key.
-
- ASSERT_EQ(cursor->next(), IndexKeyEntry(key1, loc2));
-
- cursor->savePositioned();
- removeFromIndex(opCtx, sorted, {{key1, loc2}});
- insertToIndex(opCtx, sorted, {{key1, loc1}});
- cursor->restore(opCtx.get()); // Lands after inserted.
- ASSERT_EQ(cursor->next(), IndexKeyEntry(key2, loc1));
-
- cursor->savePositioned();
- removeFromIndex(opCtx, sorted, {{key2, loc1}});
- cursor->restore(opCtx.get());
-
- cursor->savePositioned();
- insertToIndex(opCtx, sorted, {{key2, loc1}});
- cursor->restore(opCtx.get()); // Lands at same point as initial save.
-
- // Advances from restore point since restore didn't move position.
- ASSERT_EQ(cursor->next(), IndexKeyEntry(key3, loc1));
- }
- TEST(SortedDataInterface, SaveAndRestorePositionConsidersRecordId_Forward_Standard) {
- testSaveAndRestorePositionConsidersRecordId_Forward(false);
- }
- TEST(SortedDataInterface, SaveAndRestorePositionConsidersRecordId_Forward_Unique) {
- testSaveAndRestorePositionConsidersRecordId_Forward(true);
+ {
+ const std::unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
+ {
+ WriteUnitOfWork uow(opCtx.get());
+ ASSERT_OK(sorted->insert(opCtx.get(), key1, loc1, true));
+ uow.commit();
+ }
}
-
- // Make sure we restore to a RecordId at or ahead of save point if same key on reverse cursor.
- void testSaveAndRestorePositionConsidersRecordId_Reverse(bool unique) {
- auto harnessHelper = newHarnessHelper();
- auto opCtx = harnessHelper->newOperationContext();
- auto sorted = harnessHelper->newSortedDataInterface(unique, {
- {key0, loc1},
- {key1, loc1},
- {key2, loc2},
- });
-
- auto cursor = sorted->newCursor(opCtx.get(), false);
- ASSERT_EQ(cursor->seek(key2, true), IndexKeyEntry(key2, loc2));
-
- cursor->savePositioned();
- removeFromIndex(opCtx, sorted, {{key2, loc2}});
- insertToIndex(opCtx, sorted, {{key2, loc1}});
- cursor->restore(opCtx.get());
-
- ASSERT_EQ(cursor->next(), IndexKeyEntry(key2, loc1));
-
- cursor->savePositioned();
- removeFromIndex(opCtx, sorted, {{key2, loc1}});
- insertToIndex(opCtx, sorted, {{key2, loc2}});
- cursor->restore(opCtx.get());
-
- ASSERT_EQ(cursor->next(), IndexKeyEntry(key1, loc1));
-
- cursor->savePositioned();
- removeFromIndex(opCtx, sorted, {{key1, loc1}});
- cursor->restore(opCtx.get());
+ {
+ const std::unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
+ ASSERT_EQUALS(1, sorted->numEntries(opCtx.get()));
+ }
+ {
+ const std::unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
+ const std::unique_ptr<SortedDataInterface::Cursor> cursor(
+ sorted->newCursor(opCtx.get(), false));
cursor->savePositioned();
- insertToIndex(opCtx, sorted, {{key1, loc1}});
- cursor->restore(opCtx.get()); // Lands at same point as initial save.
-
- // Advances from restore point since restore didn't move position.
- ASSERT_EQ(cursor->next(), IndexKeyEntry(key0, loc1));
- }
- TEST(SortedDataInterface, SaveAndRestorePositionConsidersRecordId_Reverse_Standard) {
- testSaveAndRestorePositionConsidersRecordId_Reverse(false);
- }
- TEST(SortedDataInterface, SaveAndRestorePositionConsidersRecordId_Reverse_Unique) {
- testSaveAndRestorePositionConsidersRecordId_Reverse(true);
}
+}
+
+// Ensure that restore lands as close as possible to original position, even if data inserted
+// while saved.
+void testSaveAndRestorePositionSeesNewInserts(bool forward, bool unique) {
+ auto harnessHelper = newHarnessHelper();
+ auto opCtx = harnessHelper->newOperationContext();
+ auto sorted = harnessHelper->newSortedDataInterface(unique,
+ {
+ {key1, loc1}, {key3, loc1},
+ });
+
+ auto cursor = sorted->newCursor(opCtx.get(), forward);
+ const auto seekPoint = forward ? key1 : key3;
+
+ ASSERT_EQ(cursor->seek(seekPoint, true), IndexKeyEntry(seekPoint, loc1));
+
+ cursor->savePositioned();
+ insertToIndex(opCtx, sorted, {{key2, loc1}});
+ cursor->restore(opCtx.get());
+
+ ASSERT_EQ(cursor->next(), IndexKeyEntry(key2, loc1));
+}
+TEST(SortedDataInterface, SaveAndRestorePositionSeesNewInserts_Forward_Unique) {
+ testSaveAndRestorePositionSeesNewInserts(true, true);
+}
+TEST(SortedDataInterface, SaveAndRestorePositionSeesNewInserts_Forward_Standard) {
+ testSaveAndRestorePositionSeesNewInserts(true, false);
+}
+TEST(SortedDataInterface, SaveAndRestorePositionSeesNewInserts_Reverse_Unique) {
+ testSaveAndRestorePositionSeesNewInserts(false, true);
+}
+TEST(SortedDataInterface, SaveAndRestorePositionSeesNewInserts_Reverse_Standard) {
+ testSaveAndRestorePositionSeesNewInserts(false, false);
+}
+
+// Ensure that repeated restores lands as close as possible to original position, even if data
+// inserted while saved and the current position removed.
+void testSaveAndRestorePositionSeesNewInsertsAfterRemove(bool forward, bool unique) {
+ auto harnessHelper = newHarnessHelper();
+ auto opCtx = harnessHelper->newOperationContext();
+ auto sorted = harnessHelper->newSortedDataInterface(unique,
+ {
+ {key1, loc1}, {key3, loc1},
+ });
+
+ auto cursor = sorted->newCursor(opCtx.get(), forward);
+ const auto seekPoint = forward ? key1 : key3;
+
+ ASSERT_EQ(cursor->seek(seekPoint, true), IndexKeyEntry(seekPoint, loc1));
+
+ cursor->savePositioned();
+ removeFromIndex(opCtx, sorted, {{key1, loc1}});
+ cursor->restore(opCtx.get());
+ // The restore may have seeked since it can't return to the saved position.
+
+ cursor->savePositioned(); // Should still save originally saved key as "current position".
+ insertToIndex(opCtx, sorted, {{key2, loc1}});
+ cursor->restore(opCtx.get());
+
+ ASSERT_EQ(cursor->next(), IndexKeyEntry(key2, loc1));
+}
+TEST(SortedDataInterface, SaveAndRestorePositionSeesNewInsertsAfterRemove_Forward_Unique) {
+ testSaveAndRestorePositionSeesNewInsertsAfterRemove(true, true);
+}
+TEST(SortedDataInterface, SaveAndRestorePositionSeesNewInsertsAfterRemove_Forward_Standard) {
+ testSaveAndRestorePositionSeesNewInsertsAfterRemove(true, false);
+}
+TEST(SortedDataInterface, SaveAndRestorePositionSeesNewInsertsAfterRemove_Reverse_Unique) {
+ testSaveAndRestorePositionSeesNewInsertsAfterRemove(false, true);
+}
+TEST(SortedDataInterface, SaveAndRestorePositionSeesNewInsertsAfterRemove_Reverse_Standard) {
+ testSaveAndRestorePositionSeesNewInsertsAfterRemove(false, false);
+}
+
+// Ensure that repeated restores lands as close as possible to original position, even if data
+// inserted while saved and the current position removed in a way that temporarily makes the
+// cursor EOF.
+void testSaveAndRestorePositionSeesNewInsertsAfterEOF(bool forward, bool unique) {
+ auto harnessHelper = newHarnessHelper();
+ auto opCtx = harnessHelper->newOperationContext();
+ auto sorted = harnessHelper->newSortedDataInterface(false,
+ {
+ {key1, loc1},
+ });
+
+ auto cursor = sorted->newCursor(opCtx.get(), forward);
+
+ ASSERT_EQ(cursor->seek(key1, true), IndexKeyEntry(key1, loc1));
+ // next() would return EOF now.
+
+ cursor->savePositioned();
+ removeFromIndex(opCtx, sorted, {{key1, loc1}});
+ cursor->restore(opCtx.get());
+ // The restore may have seeked to EOF.
+
+ auto insertPoint = forward ? key2 : key0;
+ cursor->savePositioned(); // Should still save key1 as "current position".
+ insertToIndex(opCtx, sorted, {{insertPoint, loc1}});
+ cursor->restore(opCtx.get());
+
+ ASSERT_EQ(cursor->next(), IndexKeyEntry(insertPoint, loc1));
+}
+
+TEST(SortedDataInterface, SaveAndRestorePositionSeesNewInsertsAfterEOF_Forward_Unique) {
+ testSaveAndRestorePositionSeesNewInsertsAfterEOF(true, true);
+}
+TEST(SortedDataInterface, SaveAndRestorePositionSeesNewInsertsAfterEOF_Forward_Standard) {
+ testSaveAndRestorePositionSeesNewInsertsAfterEOF(true, false);
+}
+TEST(SortedDataInterface, SaveAndRestorePositionSeesNewInsertsAfterEOF_Reverse_Unique) {
+ testSaveAndRestorePositionSeesNewInsertsAfterEOF(false, true);
+}
+TEST(SortedDataInterface, SaveAndRestorePositionSeesNewInsertsAfterEOF_Reverse_Standard) {
+ testSaveAndRestorePositionSeesNewInsertsAfterEOF(false, false);
+}
+
+// Make sure we restore to a RecordId at or ahead of save point if same key.
+void testSaveAndRestorePositionConsidersRecordId_Forward(bool unique) {
+ auto harnessHelper = newHarnessHelper();
+ auto opCtx = harnessHelper->newOperationContext();
+ auto sorted = harnessHelper->newSortedDataInterface(unique,
+ {
+ {key1, loc1}, {key2, loc1}, {key3, loc1},
+ });
+
+ auto cursor = sorted->newCursor(opCtx.get());
+
+ ASSERT_EQ(cursor->seek(key1, true), IndexKeyEntry(key1, loc1));
+
+ cursor->savePositioned();
+ removeFromIndex(opCtx, sorted, {{key1, loc1}});
+ insertToIndex(opCtx, sorted, {{key1, loc2}});
+ cursor->restore(opCtx.get()); // Lands on inserted key.
+
+ ASSERT_EQ(cursor->next(), IndexKeyEntry(key1, loc2));
+
+ cursor->savePositioned();
+ removeFromIndex(opCtx, sorted, {{key1, loc2}});
+ insertToIndex(opCtx, sorted, {{key1, loc1}});
+ cursor->restore(opCtx.get()); // Lands after inserted.
+
+ ASSERT_EQ(cursor->next(), IndexKeyEntry(key2, loc1));
+
+ cursor->savePositioned();
+ removeFromIndex(opCtx, sorted, {{key2, loc1}});
+ cursor->restore(opCtx.get());
+
+ cursor->savePositioned();
+ insertToIndex(opCtx, sorted, {{key2, loc1}});
+ cursor->restore(opCtx.get()); // Lands at same point as initial save.
+
+ // Advances from restore point since restore didn't move position.
+ ASSERT_EQ(cursor->next(), IndexKeyEntry(key3, loc1));
+}
+TEST(SortedDataInterface, SaveAndRestorePositionConsidersRecordId_Forward_Standard) {
+ testSaveAndRestorePositionConsidersRecordId_Forward(false);
+}
+TEST(SortedDataInterface, SaveAndRestorePositionConsidersRecordId_Forward_Unique) {
+ testSaveAndRestorePositionConsidersRecordId_Forward(true);
+}
+
+// Make sure we restore to a RecordId at or ahead of save point if same key on reverse cursor.
+void testSaveAndRestorePositionConsidersRecordId_Reverse(bool unique) {
+ auto harnessHelper = newHarnessHelper();
+ auto opCtx = harnessHelper->newOperationContext();
+ auto sorted = harnessHelper->newSortedDataInterface(unique,
+ {
+ {key0, loc1}, {key1, loc1}, {key2, loc2},
+ });
+
+ auto cursor = sorted->newCursor(opCtx.get(), false);
+
+ ASSERT_EQ(cursor->seek(key2, true), IndexKeyEntry(key2, loc2));
+
+ cursor->savePositioned();
+ removeFromIndex(opCtx, sorted, {{key2, loc2}});
+ insertToIndex(opCtx, sorted, {{key2, loc1}});
+ cursor->restore(opCtx.get());
+
+ ASSERT_EQ(cursor->next(), IndexKeyEntry(key2, loc1));
+
+ cursor->savePositioned();
+ removeFromIndex(opCtx, sorted, {{key2, loc1}});
+ insertToIndex(opCtx, sorted, {{key2, loc2}});
+ cursor->restore(opCtx.get());
+
+ ASSERT_EQ(cursor->next(), IndexKeyEntry(key1, loc1));
+
+ cursor->savePositioned();
+ removeFromIndex(opCtx, sorted, {{key1, loc1}});
+ cursor->restore(opCtx.get());
+
+ cursor->savePositioned();
+ insertToIndex(opCtx, sorted, {{key1, loc1}});
+ cursor->restore(opCtx.get()); // Lands at same point as initial save.
+
+ // Advances from restore point since restore didn't move position.
+ ASSERT_EQ(cursor->next(), IndexKeyEntry(key0, loc1));
+}
+TEST(SortedDataInterface, SaveAndRestorePositionConsidersRecordId_Reverse_Standard) {
+ testSaveAndRestorePositionConsidersRecordId_Reverse(false);
+}
+TEST(SortedDataInterface, SaveAndRestorePositionConsidersRecordId_Reverse_Unique) {
+ testSaveAndRestorePositionConsidersRecordId_Reverse(true);
+}
+
+// Ensure that SaveUnpositioned allows later use of the cursor.
+TEST(SortedDataInterface, SaveUnpositionedAndRestore) {
+ auto harnessHelper = newHarnessHelper();
+ auto opCtx = harnessHelper->newOperationContext();
+ auto sorted = harnessHelper->newSortedDataInterface(false,
+ {
+ {key1, loc1}, {key2, loc1}, {key3, loc1},
+ });
+
+ auto cursor = sorted->newCursor(opCtx.get());
+
+ ASSERT_EQ(cursor->seek(key2, true), IndexKeyEntry(key2, loc1));
+
+ cursor->saveUnpositioned();
+ removeFromIndex(opCtx, sorted, {{key2, loc1}});
+ cursor->restore(opCtx.get());
- // Ensure that SaveUnpositioned allows later use of the cursor.
- TEST(SortedDataInterface, SaveUnpositionedAndRestore) {
- auto harnessHelper = newHarnessHelper();
- auto opCtx = harnessHelper->newOperationContext();
- auto sorted = harnessHelper->newSortedDataInterface(false, {
- {key1, loc1},
- {key2, loc1},
- {key3, loc1},
- });
-
- auto cursor = sorted->newCursor(opCtx.get());
-
- ASSERT_EQ(cursor->seek(key2, true), IndexKeyEntry(key2, loc1));
-
- cursor->saveUnpositioned();
- removeFromIndex(opCtx, sorted, {{key2, loc1}});
- cursor->restore(opCtx.get());
-
- ASSERT_EQ(cursor->seek(key1, true), IndexKeyEntry(key1, loc1));
-
- cursor->saveUnpositioned();
- cursor->restore(opCtx.get());
-
- ASSERT_EQ(cursor->seek(key3, true), IndexKeyEntry(key3, loc1));
- }
+ ASSERT_EQ(cursor->seek(key1, true), IndexKeyEntry(key1, loc1));
-} // namespace mongo
+ cursor->saveUnpositioned();
+ cursor->restore(opCtx.get());
+
+ ASSERT_EQ(cursor->seek(key3, true), IndexKeyEntry(key3, loc1));
+}
+
+} // namespace mongo
diff --git a/src/mongo/db/storage/sorted_data_interface_test_cursor_seek_exact.cpp b/src/mongo/db/storage/sorted_data_interface_test_cursor_seek_exact.cpp
index 926b23b5237..c767dbee859 100644
--- a/src/mongo/db/storage/sorted_data_interface_test_cursor_seek_exact.cpp
+++ b/src/mongo/db/storage/sorted_data_interface_test_cursor_seek_exact.cpp
@@ -34,106 +34,104 @@
#include "mongo/unittest/unittest.h"
namespace mongo {
- // Tests seekExact when it hits something.
- void testSeekExact_Hit(bool unique, bool forward) {
- auto harnessHelper = newHarnessHelper();
- auto opCtx = harnessHelper->newOperationContext();
- auto sorted = harnessHelper->newSortedDataInterface(unique, {
- {key1, loc1},
- {key2, loc1},
- {key3, loc1},
+// Tests seekExact when it hits something.
+void testSeekExact_Hit(bool unique, bool forward) {
+ auto harnessHelper = newHarnessHelper();
+ auto opCtx = harnessHelper->newOperationContext();
+ auto sorted = harnessHelper->newSortedDataInterface(unique,
+ {
+ {key1, loc1}, {key2, loc1}, {key3, loc1},
+ });
+
+ auto cursor = sorted->newCursor(opCtx.get(), forward);
+
+ ASSERT_EQ(cursor->seekExact(key2), IndexKeyEntry(key2, loc1));
+
+ // Make sure iterating works. We may consider loosening this requirement if it is a hardship
+ // for some storage engines.
+ ASSERT_EQ(cursor->next(), IndexKeyEntry(forward ? key3 : key1, loc1));
+ ASSERT_EQ(cursor->next(), boost::none);
+}
+TEST(SortedDataInterface, SeekExact_Hit_Unique_Forward) {
+ testSeekExact_Hit(true, true);
+}
+TEST(SortedDataInterface, SeekExact_Hit_Unique_Reverse) {
+ testSeekExact_Hit(true, false);
+}
+TEST(SortedDataInterface, SeekExact_Hit_Standard_Forward) {
+ testSeekExact_Hit(false, true);
+}
+TEST(SortedDataInterface, SeekExact_Hit_Standard_Reverse) {
+ testSeekExact_Hit(false, false);
+}
+
+// Tests seekExact when it doesn't hit the query.
+void testSeekExact_Miss(bool unique, bool forward) {
+ auto harnessHelper = newHarnessHelper();
+ auto opCtx = harnessHelper->newOperationContext();
+ auto sorted = harnessHelper->newSortedDataInterface(unique,
+ {
+ {key1, loc1},
+ // No key2.
+ {key3, loc1},
+ });
+
+ auto cursor = sorted->newCursor(opCtx.get(), forward);
+
+ ASSERT_EQ(cursor->seekExact(key2), boost::none);
+
+ // Not testing iteration since the cursors position following a failed seekExact is
+ // undefined. However, you must be able to seek somewhere else.
+ ASSERT_EQ(cursor->seekExact(key1), IndexKeyEntry(key1, loc1));
+}
+TEST(SortedDataInterface, SeekExact_Miss_Unique_Forward) {
+ testSeekExact_Miss(true, true);
+}
+TEST(SortedDataInterface, SeekExact_Miss_Unique_Reverse) {
+ testSeekExact_Miss(true, false);
+}
+TEST(SortedDataInterface, SeekExact_Miss_Standard_Forward) {
+ testSeekExact_Miss(false, true);
+}
+TEST(SortedDataInterface, SeekExact_Miss_Standard_Reverse) {
+ testSeekExact_Miss(false, false);
+}
+
+// Tests seekExact on forward cursor when it hits something with dup keys. Doesn't make sense
+// for unique indexes.
+TEST(SortedDataInterface, SeekExact_HitWithDups_Forward) {
+ auto harnessHelper = newHarnessHelper();
+ auto opCtx = harnessHelper->newOperationContext();
+ auto sorted = harnessHelper->newSortedDataInterface(
+ false,
+ {
+ {key1, loc1}, {key2, loc1}, {key2, loc2}, {key3, loc1},
});
- auto cursor = sorted->newCursor(opCtx.get(), forward);
-
- ASSERT_EQ(cursor->seekExact(key2), IndexKeyEntry(key2, loc1));
-
- // Make sure iterating works. We may consider loosening this requirement if it is a hardship
- // for some storage engines.
- ASSERT_EQ(cursor->next(), IndexKeyEntry(forward ? key3 : key1, loc1));
- ASSERT_EQ(cursor->next(), boost::none);
- }
- TEST(SortedDataInterface, SeekExact_Hit_Unique_Forward) {
- testSeekExact_Hit(true, true);
- }
- TEST(SortedDataInterface, SeekExact_Hit_Unique_Reverse) {
- testSeekExact_Hit(true, false);
- }
- TEST(SortedDataInterface, SeekExact_Hit_Standard_Forward) {
- testSeekExact_Hit(false, true);
- }
- TEST(SortedDataInterface, SeekExact_Hit_Standard_Reverse) {
- testSeekExact_Hit(false, false);
- }
-
- // Tests seekExact when it doesn't hit the query.
- void testSeekExact_Miss(bool unique, bool forward) {
- auto harnessHelper = newHarnessHelper();
- auto opCtx = harnessHelper->newOperationContext();
- auto sorted = harnessHelper->newSortedDataInterface(unique, {
- {key1, loc1},
- // No key2.
- {key3, loc1},
+ auto cursor = sorted->newCursor(opCtx.get());
+
+ ASSERT_EQ(cursor->seekExact(key2), IndexKeyEntry(key2, loc1));
+ ASSERT_EQ(cursor->next(), IndexKeyEntry(key2, loc2));
+ ASSERT_EQ(cursor->next(), IndexKeyEntry(key3, loc1));
+ ASSERT_EQ(cursor->next(), boost::none);
+}
+
+// Tests seekExact on reverse cursor when it hits something with dup keys. Doesn't make sense
+// for unique indexes.
+TEST(SortedDataInterface, SeekExact_HitWithDups_Reverse) {
+ auto harnessHelper = newHarnessHelper();
+ auto opCtx = harnessHelper->newOperationContext();
+ auto sorted = harnessHelper->newSortedDataInterface(
+ false,
+ {
+ {key1, loc1}, {key2, loc1}, {key2, loc2}, {key3, loc1},
});
- auto cursor = sorted->newCursor(opCtx.get(), forward);
-
- ASSERT_EQ(cursor->seekExact(key2), boost::none);
-
- // Not testing iteration since the cursors position following a failed seekExact is
- // undefined. However, you must be able to seek somewhere else.
- ASSERT_EQ(cursor->seekExact(key1), IndexKeyEntry(key1, loc1));
- }
- TEST(SortedDataInterface, SeekExact_Miss_Unique_Forward) {
- testSeekExact_Miss(true, true);
- }
- TEST(SortedDataInterface, SeekExact_Miss_Unique_Reverse) {
- testSeekExact_Miss(true, false);
- }
- TEST(SortedDataInterface, SeekExact_Miss_Standard_Forward) {
- testSeekExact_Miss(false, true);
- }
- TEST(SortedDataInterface, SeekExact_Miss_Standard_Reverse) {
- testSeekExact_Miss(false, false);
- }
-
- // Tests seekExact on forward cursor when it hits something with dup keys. Doesn't make sense
- // for unique indexes.
- TEST(SortedDataInterface, SeekExact_HitWithDups_Forward) {
- auto harnessHelper = newHarnessHelper();
- auto opCtx = harnessHelper->newOperationContext();
- auto sorted = harnessHelper->newSortedDataInterface(false, {
- {key1, loc1},
- {key2, loc1},
- {key2, loc2},
- {key3, loc1},
- });
-
- auto cursor = sorted->newCursor(opCtx.get());
-
- ASSERT_EQ(cursor->seekExact(key2), IndexKeyEntry(key2, loc1));
- ASSERT_EQ(cursor->next(), IndexKeyEntry(key2, loc2));
- ASSERT_EQ(cursor->next(), IndexKeyEntry(key3, loc1));
- ASSERT_EQ(cursor->next(), boost::none);
- }
-
- // Tests seekExact on reverse cursor when it hits something with dup keys. Doesn't make sense
- // for unique indexes.
- TEST(SortedDataInterface, SeekExact_HitWithDups_Reverse) {
- auto harnessHelper = newHarnessHelper();
- auto opCtx = harnessHelper->newOperationContext();
- auto sorted = harnessHelper->newSortedDataInterface(false, {
- {key1, loc1},
- {key2, loc1},
- {key2, loc2},
- {key3, loc1},
- });
-
- auto cursor = sorted->newCursor(opCtx.get(), false);
+ auto cursor = sorted->newCursor(opCtx.get(), false);
- ASSERT_EQ(cursor->seekExact(key2), IndexKeyEntry(key2, loc2));
- ASSERT_EQ(cursor->next(), IndexKeyEntry(key2, loc1));
- ASSERT_EQ(cursor->next(), IndexKeyEntry(key1, loc1));
- ASSERT_EQ(cursor->next(), boost::none);
- }
-} // namespace mongo
+ ASSERT_EQ(cursor->seekExact(key2), IndexKeyEntry(key2, loc2));
+ ASSERT_EQ(cursor->next(), IndexKeyEntry(key2, loc1));
+ ASSERT_EQ(cursor->next(), IndexKeyEntry(key1, loc1));
+ ASSERT_EQ(cursor->next(), boost::none);
+}
+} // namespace mongo
diff --git a/src/mongo/db/storage/sorted_data_interface_test_dupkeycheck.cpp b/src/mongo/db/storage/sorted_data_interface_test_dupkeycheck.cpp
index 0d31abe6f19..1c069da9ebe 100644
--- a/src/mongo/db/storage/sorted_data_interface_test_dupkeycheck.cpp
+++ b/src/mongo/db/storage/sorted_data_interface_test_dupkeycheck.cpp
@@ -37,132 +37,132 @@
namespace mongo {
- // Insert a key and verify that dupKeyCheck() returns a non-OK status for
- // the same key. When dupKeyCheck() is called with the exact (key, RecordId)
- // pair that was inserted, it should still return an OK status.
- TEST( SortedDataInterface, DupKeyCheckAfterInsert ) {
- const std::unique_ptr<HarnessHelper> harnessHelper( newHarnessHelper() );
- const std::unique_ptr<SortedDataInterface> sorted( harnessHelper->newSortedDataInterface( true ) );
-
- {
- const std::unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- ASSERT( sorted->isEmpty( opCtx.get() ) );
- }
+// Insert a key and verify that dupKeyCheck() returns a non-OK status for
+// the same key. When dupKeyCheck() is called with the exact (key, RecordId)
+// pair that was inserted, it should still return an OK status.
+TEST(SortedDataInterface, DupKeyCheckAfterInsert) {
+ const std::unique_ptr<HarnessHelper> harnessHelper(newHarnessHelper());
+ const std::unique_ptr<SortedDataInterface> sorted(harnessHelper->newSortedDataInterface(true));
+
+ {
+ const std::unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
+ ASSERT(sorted->isEmpty(opCtx.get()));
+ }
+ {
+ const std::unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
{
- const std::unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- {
- WriteUnitOfWork uow( opCtx.get() );
- ASSERT_OK( sorted->insert( opCtx.get(), key1, loc1, false ) );
- uow.commit();
- }
+ WriteUnitOfWork uow(opCtx.get());
+ ASSERT_OK(sorted->insert(opCtx.get(), key1, loc1, false));
+ uow.commit();
}
+ }
- {
- const std::unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- ASSERT_EQUALS( 1, sorted->numEntries( opCtx.get() ) );
- }
+ {
+ const std::unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
+ ASSERT_EQUALS(1, sorted->numEntries(opCtx.get()));
+ }
+ {
+ const std::unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
{
- const std::unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- {
- WriteUnitOfWork uow( opCtx.get() );
- ASSERT_OK( sorted->dupKeyCheck( opCtx.get(), key1, loc1 ) );
- ASSERT_NOT_OK( sorted->dupKeyCheck( opCtx.get(), key1, RecordId::min() ) );
- uow.commit();
- }
+ WriteUnitOfWork uow(opCtx.get());
+ ASSERT_OK(sorted->dupKeyCheck(opCtx.get(), key1, loc1));
+ ASSERT_NOT_OK(sorted->dupKeyCheck(opCtx.get(), key1, RecordId::min()));
+ uow.commit();
}
}
+}
- // Verify that dupKeyCheck() returns an OK status for a key that does
- // not exist in the index.
- TEST( SortedDataInterface, DupKeyCheckEmpty ) {
- const std::unique_ptr<HarnessHelper> harnessHelper( newHarnessHelper() );
- const std::unique_ptr<SortedDataInterface> sorted( harnessHelper->newSortedDataInterface( true ) );
+// Verify that dupKeyCheck() returns an OK status for a key that does
+// not exist in the index.
+TEST(SortedDataInterface, DupKeyCheckEmpty) {
+ const std::unique_ptr<HarnessHelper> harnessHelper(newHarnessHelper());
+ const std::unique_ptr<SortedDataInterface> sorted(harnessHelper->newSortedDataInterface(true));
- {
- const std::unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- ASSERT( sorted->isEmpty( opCtx.get() ) );
- }
+ {
+ const std::unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
+ ASSERT(sorted->isEmpty(opCtx.get()));
+ }
+ {
+ const std::unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
{
- const std::unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- {
- WriteUnitOfWork uow( opCtx.get() );
- ASSERT_OK( sorted->dupKeyCheck( opCtx.get(), key1, loc1 ) );
- uow.commit();
- }
+ WriteUnitOfWork uow(opCtx.get());
+ ASSERT_OK(sorted->dupKeyCheck(opCtx.get(), key1, loc1));
+ uow.commit();
}
}
+}
- // Insert a key and verify that dupKeyCheck() acknowledges the duplicate key, even
- // when the insert key is located at a RecordId that comes after the one specified.
- TEST( SortedDataInterface, DupKeyCheckWhenDiskLocBefore ) {
- const std::unique_ptr<HarnessHelper> harnessHelper( newHarnessHelper() );
- const std::unique_ptr<SortedDataInterface> sorted( harnessHelper->newSortedDataInterface( true ) );
+// Insert a key and verify that dupKeyCheck() acknowledges the duplicate key, even
+// when the insert key is located at a RecordId that comes after the one specified.
+TEST(SortedDataInterface, DupKeyCheckWhenDiskLocBefore) {
+ const std::unique_ptr<HarnessHelper> harnessHelper(newHarnessHelper());
+ const std::unique_ptr<SortedDataInterface> sorted(harnessHelper->newSortedDataInterface(true));
- {
- const std::unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- ASSERT( sorted->isEmpty( opCtx.get() ) );
- }
+ {
+ const std::unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
+ ASSERT(sorted->isEmpty(opCtx.get()));
+ }
+ {
+ const std::unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
{
- const std::unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- {
- WriteUnitOfWork uow( opCtx.get() );
- ASSERT_OK( sorted->insert( opCtx.get(), key1, loc1, true ) );
- uow.commit();
- }
+ WriteUnitOfWork uow(opCtx.get());
+ ASSERT_OK(sorted->insert(opCtx.get(), key1, loc1, true));
+ uow.commit();
}
+ }
- {
- const std::unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- ASSERT_EQUALS( 1, sorted->numEntries( opCtx.get() ) );
- }
+ {
+ const std::unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
+ ASSERT_EQUALS(1, sorted->numEntries(opCtx.get()));
+ }
+ {
+ const std::unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
{
- const std::unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- {
- WriteUnitOfWork uow( opCtx.get() );
- ASSERT_NOT_OK( sorted->dupKeyCheck( opCtx.get(), key1, RecordId::min() ) );
- uow.commit();
- }
+ WriteUnitOfWork uow(opCtx.get());
+ ASSERT_NOT_OK(sorted->dupKeyCheck(opCtx.get(), key1, RecordId::min()));
+ uow.commit();
}
}
+}
- // Insert a key and verify that dupKeyCheck() acknowledges the duplicate key, even
- // when the insert key is located at a RecordId that comes before the one specified.
- TEST( SortedDataInterface, DupKeyCheckWhenDiskLocAfter ) {
- const std::unique_ptr<HarnessHelper> harnessHelper( newHarnessHelper() );
- const std::unique_ptr<SortedDataInterface> sorted( harnessHelper->newSortedDataInterface( true ) );
+// Insert a key and verify that dupKeyCheck() acknowledges the duplicate key, even
+// when the insert key is located at a RecordId that comes before the one specified.
+TEST(SortedDataInterface, DupKeyCheckWhenDiskLocAfter) {
+ const std::unique_ptr<HarnessHelper> harnessHelper(newHarnessHelper());
+ const std::unique_ptr<SortedDataInterface> sorted(harnessHelper->newSortedDataInterface(true));
- {
- const std::unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- ASSERT( sorted->isEmpty( opCtx.get() ) );
- }
+ {
+ const std::unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
+ ASSERT(sorted->isEmpty(opCtx.get()));
+ }
+ {
+ const std::unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
{
- const std::unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- {
- WriteUnitOfWork uow( opCtx.get() );
- ASSERT_OK( sorted->insert( opCtx.get(), key1, loc1, true ) );
- uow.commit();
- }
+ WriteUnitOfWork uow(opCtx.get());
+ ASSERT_OK(sorted->insert(opCtx.get(), key1, loc1, true));
+ uow.commit();
}
+ }
- {
- const std::unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- ASSERT_EQUALS( 1, sorted->numEntries( opCtx.get() ) );
- }
+ {
+ const std::unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
+ ASSERT_EQUALS(1, sorted->numEntries(opCtx.get()));
+ }
+ {
+ const std::unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
{
- const std::unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- {
- WriteUnitOfWork uow( opCtx.get() );
- ASSERT_NOT_OK( sorted->dupKeyCheck( opCtx.get(), key1, RecordId::max() ) );
- uow.commit();
- }
+ WriteUnitOfWork uow(opCtx.get());
+ ASSERT_NOT_OK(sorted->dupKeyCheck(opCtx.get(), key1, RecordId::max()));
+ uow.commit();
}
}
+}
-} // namespace mongo
+} // namespace mongo
diff --git a/src/mongo/db/storage/sorted_data_interface_test_fullvalidate.cpp b/src/mongo/db/storage/sorted_data_interface_test_fullvalidate.cpp
index a7ee8544dc0..6f1b4575a09 100644
--- a/src/mongo/db/storage/sorted_data_interface_test_fullvalidate.cpp
+++ b/src/mongo/db/storage/sorted_data_interface_test_fullvalidate.cpp
@@ -37,41 +37,41 @@
namespace mongo {
- // Insert multiple keys and verify that fullValidate() either sets
- // the `numKeysOut` as the number of entries in the index, or as -1.
- TEST( SortedDataInterface, FullValidate ) {
- const std::unique_ptr<HarnessHelper> harnessHelper( newHarnessHelper() );
- const std::unique_ptr<SortedDataInterface> sorted( harnessHelper->newSortedDataInterface( false ) );
+// Insert multiple keys and verify that fullValidate() either sets
+// the `numKeysOut` as the number of entries in the index, or as -1.
+TEST(SortedDataInterface, FullValidate) {
+ const std::unique_ptr<HarnessHelper> harnessHelper(newHarnessHelper());
+ const std::unique_ptr<SortedDataInterface> sorted(harnessHelper->newSortedDataInterface(false));
- {
- const std::unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- ASSERT( sorted->isEmpty( opCtx.get() ) );
- }
-
- int nToInsert = 10;
- for ( int i = 0; i < nToInsert; i++ ) {
- const std::unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- {
- WriteUnitOfWork uow( opCtx.get() );
- BSONObj key = BSON( "" << i );
- RecordId loc( 42, i * 2 );
- ASSERT_OK( sorted->insert( opCtx.get(), key, loc, true ) );
- uow.commit();
- }
- }
+ {
+ const std::unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
+ ASSERT(sorted->isEmpty(opCtx.get()));
+ }
+ int nToInsert = 10;
+ for (int i = 0; i < nToInsert; i++) {
+ const std::unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
{
- const std::unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- ASSERT_EQUALS( nToInsert, sorted->numEntries( opCtx.get() ) );
+ WriteUnitOfWork uow(opCtx.get());
+ BSONObj key = BSON("" << i);
+ RecordId loc(42, i * 2);
+ ASSERT_OK(sorted->insert(opCtx.get(), key, loc, true));
+ uow.commit();
}
+ }
- {
- long long numKeysOut;
- const std::unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- sorted->fullValidate(opCtx.get(), false, &numKeysOut, NULL);
- // fullValidate() can set numKeysOut as the number of existing keys or -1.
- ASSERT( numKeysOut == nToInsert || numKeysOut == -1 );
- }
+ {
+ const std::unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
+ ASSERT_EQUALS(nToInsert, sorted->numEntries(opCtx.get()));
+ }
+
+ {
+ long long numKeysOut;
+ const std::unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
+ sorted->fullValidate(opCtx.get(), false, &numKeysOut, NULL);
+ // fullValidate() can set numKeysOut as the number of existing keys or -1.
+ ASSERT(numKeysOut == nToInsert || numKeysOut == -1);
}
+}
-} // namespace mongo
+} // namespace mongo
diff --git a/src/mongo/db/storage/sorted_data_interface_test_harness.cpp b/src/mongo/db/storage/sorted_data_interface_test_harness.cpp
index 286c3038f8c..13929c7eacc 100644
--- a/src/mongo/db/storage/sorted_data_interface_test_harness.cpp
+++ b/src/mongo/db/storage/sorted_data_interface_test_harness.cpp
@@ -37,535 +37,524 @@
#include "mongo/unittest/unittest.h"
namespace mongo {
- std::unique_ptr<SortedDataInterface> HarnessHelper::newSortedDataInterface(
- bool unique,
- std::initializer_list<IndexKeyEntry> toInsert) {
- invariant(std::is_sorted(toInsert.begin(), toInsert.end(),
- IndexEntryComparison(Ordering::make(BSONObj()))));
-
- auto index = newSortedDataInterface(unique);
- insertToIndex(this, index, toInsert);
- return index;
- }
-
- void insertToIndex(unowned_ptr<OperationContext> txn,
- unowned_ptr<SortedDataInterface> index,
- std::initializer_list<IndexKeyEntry> toInsert) {
- WriteUnitOfWork wuow(txn);
- for (auto&& entry : toInsert) {
- ASSERT_OK(index->insert(txn, entry.key, entry.loc, true));
- }
- wuow.commit();
+std::unique_ptr<SortedDataInterface> HarnessHelper::newSortedDataInterface(
+ bool unique, std::initializer_list<IndexKeyEntry> toInsert) {
+ invariant(std::is_sorted(
+ toInsert.begin(), toInsert.end(), IndexEntryComparison(Ordering::make(BSONObj()))));
+
+ auto index = newSortedDataInterface(unique);
+ insertToIndex(this, index, toInsert);
+ return index;
+}
+
+void insertToIndex(unowned_ptr<OperationContext> txn,
+ unowned_ptr<SortedDataInterface> index,
+ std::initializer_list<IndexKeyEntry> toInsert) {
+ WriteUnitOfWork wuow(txn);
+ for (auto&& entry : toInsert) {
+ ASSERT_OK(index->insert(txn, entry.key, entry.loc, true));
}
-
- void removeFromIndex(unowned_ptr<OperationContext> txn,
- unowned_ptr<SortedDataInterface> index,
- std::initializer_list<IndexKeyEntry> toRemove) {
- WriteUnitOfWork wuow(txn);
- for (auto&& entry : toRemove) {
- index->unindex(txn, entry.key, entry.loc, true);
- }
- wuow.commit();
+ wuow.commit();
+}
+
+void removeFromIndex(unowned_ptr<OperationContext> txn,
+ unowned_ptr<SortedDataInterface> index,
+ std::initializer_list<IndexKeyEntry> toRemove) {
+ WriteUnitOfWork wuow(txn);
+ for (auto&& entry : toRemove) {
+ index->unindex(txn, entry.key, entry.loc, true);
}
+ wuow.commit();
+}
- TEST( SortedDataInterface, InsertWithDups1 ) {
- const std::unique_ptr<HarnessHelper> harnessHelper( newHarnessHelper() );
- const std::unique_ptr<SortedDataInterface> sorted( harnessHelper->newSortedDataInterface( false ) );
+TEST(SortedDataInterface, InsertWithDups1) {
+ const std::unique_ptr<HarnessHelper> harnessHelper(newHarnessHelper());
+ const std::unique_ptr<SortedDataInterface> sorted(harnessHelper->newSortedDataInterface(false));
+ {
+ const std::unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
{
- const std::unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- {
- WriteUnitOfWork uow( opCtx.get() );
- sorted->insert( opCtx.get(), BSON( "" << 1 ), RecordId( 5, 2 ), true );
- uow.commit();
- }
+ WriteUnitOfWork uow(opCtx.get());
+ sorted->insert(opCtx.get(), BSON("" << 1), RecordId(5, 2), true);
+ uow.commit();
}
+ }
+ {
+ const std::unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
{
- const std::unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- {
- WriteUnitOfWork uow( opCtx.get() );
- sorted->insert( opCtx.get(), BSON( "" << 1 ), RecordId( 6, 2 ), true );
- uow.commit();
- }
+ WriteUnitOfWork uow(opCtx.get());
+ sorted->insert(opCtx.get(), BSON("" << 1), RecordId(6, 2), true);
+ uow.commit();
}
+ }
- {
- const std::unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- ASSERT_EQUALS( 2, sorted->numEntries( opCtx.get() ) );
+ {
+ const std::unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
+ ASSERT_EQUALS(2, sorted->numEntries(opCtx.get()));
- long long x = 0;
- sorted->fullValidate(opCtx.get(), false, &x, NULL);
- ASSERT_EQUALS( 2, x );
- }
+ long long x = 0;
+ sorted->fullValidate(opCtx.get(), false, &x, NULL);
+ ASSERT_EQUALS(2, x);
}
+}
- TEST( SortedDataInterface, InsertWithDups2 ) {
- const std::unique_ptr<HarnessHelper> harnessHelper( newHarnessHelper() );
- const std::unique_ptr<SortedDataInterface> sorted( harnessHelper->newSortedDataInterface( false ) );
-
- {
- const std::unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- {
- WriteUnitOfWork uow( opCtx.get() );
- sorted->insert( opCtx.get(), BSON( "" << 1 ), RecordId( 5, 18 ), true );
- uow.commit();
- }
- }
+TEST(SortedDataInterface, InsertWithDups2) {
+ const std::unique_ptr<HarnessHelper> harnessHelper(newHarnessHelper());
+ const std::unique_ptr<SortedDataInterface> sorted(harnessHelper->newSortedDataInterface(false));
+ {
+ const std::unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
{
- const std::unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- {
- WriteUnitOfWork uow( opCtx.get() );
- sorted->insert( opCtx.get(), BSON( "" << 1 ), RecordId( 5, 20 ), true );
- uow.commit();
- }
+ WriteUnitOfWork uow(opCtx.get());
+ sorted->insert(opCtx.get(), BSON("" << 1), RecordId(5, 18), true);
+ uow.commit();
}
+ }
+ {
+ const std::unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
{
- const std::unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- ASSERT_EQUALS( 2, sorted->numEntries( opCtx.get() ) );
+ WriteUnitOfWork uow(opCtx.get());
+ sorted->insert(opCtx.get(), BSON("" << 1), RecordId(5, 20), true);
+ uow.commit();
}
}
- TEST( SortedDataInterface, InsertWithDups3AndRollback ) {
- const std::unique_ptr<HarnessHelper> harnessHelper( newHarnessHelper() );
- const std::unique_ptr<SortedDataInterface> sorted( harnessHelper->newSortedDataInterface( false ) );
+ {
+ const std::unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
+ ASSERT_EQUALS(2, sorted->numEntries(opCtx.get()));
+ }
+}
- {
- const std::unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- {
- WriteUnitOfWork uow( opCtx.get() );
- sorted->insert( opCtx.get(), BSON( "" << 1 ), RecordId( 5, 18 ), true );
- uow.commit();
- }
- }
+TEST(SortedDataInterface, InsertWithDups3AndRollback) {
+ const std::unique_ptr<HarnessHelper> harnessHelper(newHarnessHelper());
+ const std::unique_ptr<SortedDataInterface> sorted(harnessHelper->newSortedDataInterface(false));
+ {
+ const std::unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
{
- const std::unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- {
- WriteUnitOfWork uow( opCtx.get() );
- sorted->insert( opCtx.get(), BSON( "" << 1 ), RecordId( 5, 20 ), true );
- // no commit
- }
+ WriteUnitOfWork uow(opCtx.get());
+ sorted->insert(opCtx.get(), BSON("" << 1), RecordId(5, 18), true);
+ uow.commit();
}
+ }
+ {
+ const std::unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
{
- const std::unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- ASSERT_EQUALS( 1, sorted->numEntries( opCtx.get() ) );
+ WriteUnitOfWork uow(opCtx.get());
+ sorted->insert(opCtx.get(), BSON("" << 1), RecordId(5, 20), true);
+ // no commit
}
}
- TEST( SortedDataInterface, InsertNoDups1 ) {
- const std::unique_ptr<HarnessHelper> harnessHelper( newHarnessHelper() );
- const std::unique_ptr<SortedDataInterface> sorted( harnessHelper->newSortedDataInterface( true ) );
+ {
+ const std::unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
+ ASSERT_EQUALS(1, sorted->numEntries(opCtx.get()));
+ }
+}
- {
- const std::unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- {
- WriteUnitOfWork uow( opCtx.get() );
- sorted->insert( opCtx.get(), BSON( "" << 1 ), RecordId( 5, 18 ), false );
- uow.commit();
- }
- }
+TEST(SortedDataInterface, InsertNoDups1) {
+ const std::unique_ptr<HarnessHelper> harnessHelper(newHarnessHelper());
+ const std::unique_ptr<SortedDataInterface> sorted(harnessHelper->newSortedDataInterface(true));
+ {
+ const std::unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
{
- const std::unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- {
- WriteUnitOfWork uow( opCtx.get() );
- sorted->insert( opCtx.get(), BSON( "" << 2 ), RecordId( 5, 20 ), false );
- uow.commit();
- }
+ WriteUnitOfWork uow(opCtx.get());
+ sorted->insert(opCtx.get(), BSON("" << 1), RecordId(5, 18), false);
+ uow.commit();
}
+ }
+ {
+ const std::unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
{
- const std::unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- ASSERT_EQUALS( 2, sorted->numEntries( opCtx.get() ) );
+ WriteUnitOfWork uow(opCtx.get());
+ sorted->insert(opCtx.get(), BSON("" << 2), RecordId(5, 20), false);
+ uow.commit();
}
-
}
- TEST( SortedDataInterface, InsertNoDups2 ) {
- const std::unique_ptr<HarnessHelper> harnessHelper( newHarnessHelper() );
- const std::unique_ptr<SortedDataInterface> sorted( harnessHelper->newSortedDataInterface( true ) );
+ {
+ const std::unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
+ ASSERT_EQUALS(2, sorted->numEntries(opCtx.get()));
+ }
+}
- {
- const std::unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- {
- WriteUnitOfWork uow( opCtx.get() );
- sorted->insert( opCtx.get(), BSON( "" << 1 ), RecordId( 5, 2 ), false );
- uow.commit();
- }
- }
+TEST(SortedDataInterface, InsertNoDups2) {
+ const std::unique_ptr<HarnessHelper> harnessHelper(newHarnessHelper());
+ const std::unique_ptr<SortedDataInterface> sorted(harnessHelper->newSortedDataInterface(true));
+ {
+ const std::unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
{
- const std::unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- {
- WriteUnitOfWork uow( opCtx.get() );
- sorted->insert( opCtx.get(), BSON( "" << 1 ), RecordId( 5, 4 ), false );
- uow.commit();
- }
+ WriteUnitOfWork uow(opCtx.get());
+ sorted->insert(opCtx.get(), BSON("" << 1), RecordId(5, 2), false);
+ uow.commit();
}
+ }
+ {
+ const std::unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
{
- const std::unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- ASSERT_EQUALS( 1, sorted->numEntries( opCtx.get() ) );
+ WriteUnitOfWork uow(opCtx.get());
+ sorted->insert(opCtx.get(), BSON("" << 1), RecordId(5, 4), false);
+ uow.commit();
}
+ }
+ {
+ const std::unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
+ ASSERT_EQUALS(1, sorted->numEntries(opCtx.get()));
}
+}
- TEST( SortedDataInterface, Unindex1 ) {
- const std::unique_ptr<HarnessHelper> harnessHelper( newHarnessHelper() );
- const std::unique_ptr<SortedDataInterface> sorted( harnessHelper->newSortedDataInterface( false ) );
-
- {
- const std::unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- {
- WriteUnitOfWork uow( opCtx.get() );
- sorted->insert( opCtx.get(), BSON( "" << 1 ), RecordId( 5, 18 ), true );
- uow.commit();
- }
- }
+TEST(SortedDataInterface, Unindex1) {
+ const std::unique_ptr<HarnessHelper> harnessHelper(newHarnessHelper());
+ const std::unique_ptr<SortedDataInterface> sorted(harnessHelper->newSortedDataInterface(false));
+ {
+ const std::unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
{
- const std::unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- ASSERT_EQUALS( 1, sorted->numEntries( opCtx.get() ) );
+ WriteUnitOfWork uow(opCtx.get());
+ sorted->insert(opCtx.get(), BSON("" << 1), RecordId(5, 18), true);
+ uow.commit();
}
+ }
- {
- const std::unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- {
- WriteUnitOfWork uow( opCtx.get() );
- sorted->unindex( opCtx.get(), BSON( "" << 1 ), RecordId( 5, 20 ), true );
- ASSERT_EQUALS( 1, sorted->numEntries( opCtx.get() ) );
- uow.commit();
- }
- }
+ {
+ const std::unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
+ ASSERT_EQUALS(1, sorted->numEntries(opCtx.get()));
+ }
+ {
+ const std::unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
{
- const std::unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- ASSERT_EQUALS( 1, sorted->numEntries( opCtx.get() ) );
+ WriteUnitOfWork uow(opCtx.get());
+ sorted->unindex(opCtx.get(), BSON("" << 1), RecordId(5, 20), true);
+ ASSERT_EQUALS(1, sorted->numEntries(opCtx.get()));
+ uow.commit();
}
+ }
- {
- const std::unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- {
- WriteUnitOfWork uow( opCtx.get() );
- sorted->unindex( opCtx.get(), BSON( "" << 2 ), RecordId( 5, 18 ), true );
- ASSERT_EQUALS( 1, sorted->numEntries( opCtx.get() ) );
- uow.commit();
- }
- }
+ {
+ const std::unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
+ ASSERT_EQUALS(1, sorted->numEntries(opCtx.get()));
+ }
+ {
+ const std::unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
{
- const std::unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- ASSERT_EQUALS( 1, sorted->numEntries( opCtx.get() ) );
+ WriteUnitOfWork uow(opCtx.get());
+ sorted->unindex(opCtx.get(), BSON("" << 2), RecordId(5, 18), true);
+ ASSERT_EQUALS(1, sorted->numEntries(opCtx.get()));
+ uow.commit();
}
+ }
+ {
+ const std::unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
+ ASSERT_EQUALS(1, sorted->numEntries(opCtx.get()));
+ }
- {
- const std::unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- {
- WriteUnitOfWork uow( opCtx.get() );
- sorted->unindex( opCtx.get(), BSON( "" << 1 ), RecordId( 5, 18 ), true );
- ASSERT( sorted->isEmpty( opCtx.get() ) );
- uow.commit();
- }
- }
+ {
+ const std::unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
{
- const std::unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- ASSERT( sorted->isEmpty( opCtx.get() ) );
+ WriteUnitOfWork uow(opCtx.get());
+ sorted->unindex(opCtx.get(), BSON("" << 1), RecordId(5, 18), true);
+ ASSERT(sorted->isEmpty(opCtx.get()));
+ uow.commit();
}
+ }
+ {
+ const std::unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
+ ASSERT(sorted->isEmpty(opCtx.get()));
}
+}
- TEST( SortedDataInterface, Unindex2Rollback ) {
- const std::unique_ptr<HarnessHelper> harnessHelper( newHarnessHelper() );
- const std::unique_ptr<SortedDataInterface> sorted( harnessHelper->newSortedDataInterface( false ) );
-
- {
- const std::unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- {
- WriteUnitOfWork uow( opCtx.get() );
- sorted->insert( opCtx.get(), BSON( "" << 1 ), RecordId( 5, 18 ), true );
- uow.commit();
- }
- }
+TEST(SortedDataInterface, Unindex2Rollback) {
+ const std::unique_ptr<HarnessHelper> harnessHelper(newHarnessHelper());
+ const std::unique_ptr<SortedDataInterface> sorted(harnessHelper->newSortedDataInterface(false));
+ {
+ const std::unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
{
- const std::unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- ASSERT_EQUALS( 1, sorted->numEntries( opCtx.get() ) );
+ WriteUnitOfWork uow(opCtx.get());
+ sorted->insert(opCtx.get(), BSON("" << 1), RecordId(5, 18), true);
+ uow.commit();
}
+ }
- {
- const std::unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- {
- WriteUnitOfWork uow( opCtx.get() );
- sorted->unindex( opCtx.get(), BSON( "" << 1 ), RecordId( 5, 18 ), true );
- ASSERT( sorted->isEmpty( opCtx.get() ) );
- // no commit
- }
- }
+ {
+ const std::unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
+ ASSERT_EQUALS(1, sorted->numEntries(opCtx.get()));
+ }
+ {
+ const std::unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
{
- const std::unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- ASSERT_EQUALS( 1, sorted->numEntries( opCtx.get() ) );
+ WriteUnitOfWork uow(opCtx.get());
+ sorted->unindex(opCtx.get(), BSON("" << 1), RecordId(5, 18), true);
+ ASSERT(sorted->isEmpty(opCtx.get()));
+ // no commit
}
-
}
+ {
+ const std::unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
+ ASSERT_EQUALS(1, sorted->numEntries(opCtx.get()));
+ }
+}
- TEST( SortedDataInterface, CursorIterate1 ) {
- const std::unique_ptr<HarnessHelper> harnessHelper( newHarnessHelper() );
- const std::unique_ptr<SortedDataInterface> sorted( harnessHelper->newSortedDataInterface( false ) );
- int N = 5;
- for ( int i = 0; i < N; i++ ) {
- const std::unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- {
- WriteUnitOfWork uow( opCtx.get() );
- ASSERT_OK( sorted->insert( opCtx.get(), BSON( "" << i ), RecordId( 5, i * 2 ), true ) );
- uow.commit();
- }
- }
+TEST(SortedDataInterface, CursorIterate1) {
+ const std::unique_ptr<HarnessHelper> harnessHelper(newHarnessHelper());
+ const std::unique_ptr<SortedDataInterface> sorted(harnessHelper->newSortedDataInterface(false));
+ int N = 5;
+ for (int i = 0; i < N; i++) {
+ const std::unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
{
- const std::unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- const std::unique_ptr<SortedDataInterface::Cursor> cursor( sorted->newCursor(opCtx.get()) );
- int n = 0;
- for (auto entry = cursor->seek(BSONObj(), true); entry; entry = cursor->next()) {
- ASSERT_EQ(entry, IndexKeyEntry(BSON("" << n), RecordId(5, n * 2)));
- n++;
- }
- ASSERT_EQUALS( N, n );
+ WriteUnitOfWork uow(opCtx.get());
+ ASSERT_OK(sorted->insert(opCtx.get(), BSON("" << i), RecordId(5, i * 2), true));
+ uow.commit();
}
-
-
}
- TEST( SortedDataInterface, CursorIterate1WithSaveRestore ) {
- const std::unique_ptr<HarnessHelper> harnessHelper( newHarnessHelper() );
- const std::unique_ptr<SortedDataInterface> sorted( harnessHelper->newSortedDataInterface( false ) );
-
- int N = 5;
- for ( int i = 0; i < N; i++ ) {
- const std::unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- {
- WriteUnitOfWork uow( opCtx.get() );
- sorted->insert( opCtx.get(), BSON( "" << i ), RecordId( 5, i * 2 ), true );
- uow.commit();
- }
+ {
+ const std::unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
+ const std::unique_ptr<SortedDataInterface::Cursor> cursor(sorted->newCursor(opCtx.get()));
+ int n = 0;
+ for (auto entry = cursor->seek(BSONObj(), true); entry; entry = cursor->next()) {
+ ASSERT_EQ(entry, IndexKeyEntry(BSON("" << n), RecordId(5, n * 2)));
+ n++;
}
+ ASSERT_EQUALS(N, n);
+ }
+}
+
+TEST(SortedDataInterface, CursorIterate1WithSaveRestore) {
+ const std::unique_ptr<HarnessHelper> harnessHelper(newHarnessHelper());
+ const std::unique_ptr<SortedDataInterface> sorted(harnessHelper->newSortedDataInterface(false));
+ int N = 5;
+ for (int i = 0; i < N; i++) {
+ const std::unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
{
- const std::unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- const std::unique_ptr<SortedDataInterface::Cursor> cursor( sorted->newCursor(opCtx.get()) );
- int n = 0;
- for (auto entry = cursor->seek(BSONObj(), true); entry; entry = cursor->next()) {
- ASSERT_EQ(entry, IndexKeyEntry(BSON("" << n), RecordId(5, n * 2)));
- n++;
- cursor->savePositioned();
- cursor->restore( opCtx.get() );
- }
- ASSERT_EQUALS( N, n );
+ WriteUnitOfWork uow(opCtx.get());
+ sorted->insert(opCtx.get(), BSON("" << i), RecordId(5, i * 2), true);
+ uow.commit();
}
-
}
+ {
+ const std::unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
+ const std::unique_ptr<SortedDataInterface::Cursor> cursor(sorted->newCursor(opCtx.get()));
+ int n = 0;
+ for (auto entry = cursor->seek(BSONObj(), true); entry; entry = cursor->next()) {
+ ASSERT_EQ(entry, IndexKeyEntry(BSON("" << n), RecordId(5, n * 2)));
+ n++;
+ cursor->savePositioned();
+ cursor->restore(opCtx.get());
+ }
+ ASSERT_EQUALS(N, n);
+ }
+}
- TEST( SortedDataInterface, CursorIterateAllDupKeysWithSaveRestore ) {
- const std::unique_ptr<HarnessHelper> harnessHelper( newHarnessHelper() );
- const std::unique_ptr<SortedDataInterface> sorted( harnessHelper->newSortedDataInterface( false ) );
- int N = 5;
- for ( int i = 0; i < N; i++ ) {
- const std::unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- {
- WriteUnitOfWork uow( opCtx.get() );
- sorted->insert( opCtx.get(), BSON( "" << 5 ), RecordId( 5, i * 2 ), true );
- uow.commit();
- }
- }
+TEST(SortedDataInterface, CursorIterateAllDupKeysWithSaveRestore) {
+ const std::unique_ptr<HarnessHelper> harnessHelper(newHarnessHelper());
+ const std::unique_ptr<SortedDataInterface> sorted(harnessHelper->newSortedDataInterface(false));
+ int N = 5;
+ for (int i = 0; i < N; i++) {
+ const std::unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
{
- const std::unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- const std::unique_ptr<SortedDataInterface::Cursor> cursor( sorted->newCursor(opCtx.get()) );
- int n = 0;
- for (auto entry = cursor->seek(BSONObj(), true); entry; entry = cursor->next()) {
- ASSERT_EQ(entry, IndexKeyEntry(BSON("" << 5), RecordId(5, n * 2)));
- n++;
- cursor->savePositioned();
- cursor->restore( opCtx.get() );
- }
- ASSERT_EQUALS( N, n );
+ WriteUnitOfWork uow(opCtx.get());
+ sorted->insert(opCtx.get(), BSON("" << 5), RecordId(5, i * 2), true);
+ uow.commit();
}
-
}
+ {
+ const std::unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
+ const std::unique_ptr<SortedDataInterface::Cursor> cursor(sorted->newCursor(opCtx.get()));
+ int n = 0;
+ for (auto entry = cursor->seek(BSONObj(), true); entry; entry = cursor->next()) {
+ ASSERT_EQ(entry, IndexKeyEntry(BSON("" << 5), RecordId(5, n * 2)));
+ n++;
+ cursor->savePositioned();
+ cursor->restore(opCtx.get());
+ }
+ ASSERT_EQUALS(N, n);
+ }
+}
- TEST( SortedDataInterface, Locate1 ) {
- const std::unique_ptr<HarnessHelper> harnessHelper( newHarnessHelper() );
- const std::unique_ptr<SortedDataInterface> sorted( harnessHelper->newSortedDataInterface( false ) );
-
- BSONObj key = BSON( "" << 1 );
- RecordId loc( 5, 16 );
- {
- const std::unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- const std::unique_ptr<SortedDataInterface::Cursor> cursor( sorted->newCursor(opCtx.get()) );
- ASSERT( !cursor->seek( key, true ) );
- }
+TEST(SortedDataInterface, Locate1) {
+ const std::unique_ptr<HarnessHelper> harnessHelper(newHarnessHelper());
+ const std::unique_ptr<SortedDataInterface> sorted(harnessHelper->newSortedDataInterface(false));
- {
- const std::unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- {
- WriteUnitOfWork uow( opCtx.get() );
- Status res = sorted->insert( opCtx.get(), key, loc, true );
- ASSERT_OK( res );
- uow.commit();
- }
- }
+ BSONObj key = BSON("" << 1);
+ RecordId loc(5, 16);
- {
- const std::unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- const std::unique_ptr<SortedDataInterface::Cursor> cursor( sorted->newCursor(opCtx.get()) );
- ASSERT_EQ(cursor->seek(key, true), IndexKeyEntry(key, loc));
- }
+ {
+ const std::unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
+ const std::unique_ptr<SortedDataInterface::Cursor> cursor(sorted->newCursor(opCtx.get()));
+ ASSERT(!cursor->seek(key, true));
}
- TEST( SortedDataInterface, Locate2 ) {
- const std::unique_ptr<HarnessHelper> harnessHelper( newHarnessHelper() );
- const std::unique_ptr<SortedDataInterface> sorted( harnessHelper->newSortedDataInterface( false ) );
-
- {
- const std::unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- {
- WriteUnitOfWork uow( opCtx.get() );
-
- ASSERT_OK( sorted->insert( opCtx.get(), BSON( "" << 1 ), RecordId(1,2), true ) );
- ASSERT_OK( sorted->insert( opCtx.get(), BSON( "" << 2 ), RecordId(1,4), true ) );
- ASSERT_OK( sorted->insert( opCtx.get(), BSON( "" << 3 ), RecordId(1,6), true ) );
- uow.commit();
- }
- }
-
+ {
+ const std::unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
{
- const std::unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- const std::unique_ptr<SortedDataInterface::Cursor> cursor( sorted->newCursor(opCtx.get()) );
- ASSERT_EQ(cursor->seek(BSON("a" << 2), true),
- IndexKeyEntry(BSON("" << 2), RecordId(1, 4)));
-
- ASSERT_EQ(cursor->next(), IndexKeyEntry(BSON("" << 3), RecordId(1, 6)));
- ASSERT_EQ(cursor->next(), boost::none);
+ WriteUnitOfWork uow(opCtx.get());
+ Status res = sorted->insert(opCtx.get(), key, loc, true);
+ ASSERT_OK(res);
+ uow.commit();
}
}
- TEST( SortedDataInterface, Locate2Empty ) {
- const std::unique_ptr<HarnessHelper> harnessHelper( newHarnessHelper() );
- const std::unique_ptr<SortedDataInterface> sorted( harnessHelper->newSortedDataInterface( false ) );
+ {
+ const std::unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
+ const std::unique_ptr<SortedDataInterface::Cursor> cursor(sorted->newCursor(opCtx.get()));
+ ASSERT_EQ(cursor->seek(key, true), IndexKeyEntry(key, loc));
+ }
+}
- {
- const std::unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- {
- WriteUnitOfWork uow( opCtx.get() );
-
- ASSERT_OK( sorted->insert( opCtx.get(), BSON( "" << 1 ), RecordId(1,2), true ) );
- ASSERT_OK( sorted->insert( opCtx.get(), BSON( "" << 2 ), RecordId(1,4), true ) );
- ASSERT_OK( sorted->insert( opCtx.get(), BSON( "" << 3 ), RecordId(1,6), true ) );
- uow.commit();
- }
- }
+TEST(SortedDataInterface, Locate2) {
+ const std::unique_ptr<HarnessHelper> harnessHelper(newHarnessHelper());
+ const std::unique_ptr<SortedDataInterface> sorted(harnessHelper->newSortedDataInterface(false));
+ {
+ const std::unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
{
- const std::unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- const std::unique_ptr<SortedDataInterface::Cursor> cursor( sorted->newCursor(opCtx.get()) );
- ASSERT_EQ(cursor->seek(BSONObj(), true), IndexKeyEntry(BSON("" << 1), RecordId(1, 2)));
- }
+ WriteUnitOfWork uow(opCtx.get());
- {
- const std::unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- const std::unique_ptr<SortedDataInterface::Cursor> cursor( sorted->newCursor(opCtx.get(), false) );
- ASSERT_EQ(cursor->seek(BSONObj(), false), boost::none);
+ ASSERT_OK(sorted->insert(opCtx.get(), BSON("" << 1), RecordId(1, 2), true));
+ ASSERT_OK(sorted->insert(opCtx.get(), BSON("" << 2), RecordId(1, 4), true));
+ ASSERT_OK(sorted->insert(opCtx.get(), BSON("" << 3), RecordId(1, 6), true));
+ uow.commit();
}
-
}
+ {
+ const std::unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
+ const std::unique_ptr<SortedDataInterface::Cursor> cursor(sorted->newCursor(opCtx.get()));
+ ASSERT_EQ(cursor->seek(BSON("a" << 2), true), IndexKeyEntry(BSON("" << 2), RecordId(1, 4)));
- TEST( SortedDataInterface, Locate3Descending ) {
- const std::unique_ptr<HarnessHelper> harnessHelper( newHarnessHelper() );
- const std::unique_ptr<SortedDataInterface> sorted( harnessHelper->newSortedDataInterface( false ) );
+ ASSERT_EQ(cursor->next(), IndexKeyEntry(BSON("" << 3), RecordId(1, 6)));
+ ASSERT_EQ(cursor->next(), boost::none);
+ }
+}
- auto buildEntry = [](int i) { return IndexKeyEntry(BSON("" << i), RecordId(1, i*2)); };
+TEST(SortedDataInterface, Locate2Empty) {
+ const std::unique_ptr<HarnessHelper> harnessHelper(newHarnessHelper());
+ const std::unique_ptr<SortedDataInterface> sorted(harnessHelper->newSortedDataInterface(false));
+ {
+ const std::unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
{
- const std::unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- for ( int i = 0; i < 10; i++ ) {
- if ( i == 6 )
- continue;
- WriteUnitOfWork uow( opCtx.get() );
- auto entry = buildEntry(i);
- ASSERT_OK( sorted->insert( opCtx.get(), entry.key, entry.loc, true ) );
- uow.commit();
- }
+ WriteUnitOfWork uow(opCtx.get());
+
+ ASSERT_OK(sorted->insert(opCtx.get(), BSON("" << 1), RecordId(1, 2), true));
+ ASSERT_OK(sorted->insert(opCtx.get(), BSON("" << 2), RecordId(1, 4), true));
+ ASSERT_OK(sorted->insert(opCtx.get(), BSON("" << 3), RecordId(1, 6), true));
+ uow.commit();
}
+ }
+ {
const std::unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
- std::unique_ptr<SortedDataInterface::Cursor> cursor(sorted->newCursor(opCtx.get(), true));
- ASSERT_EQ(cursor->seek(BSON("" << 5), true), buildEntry(5));
- ASSERT_EQ(cursor->next(), buildEntry(7));
+ const std::unique_ptr<SortedDataInterface::Cursor> cursor(sorted->newCursor(opCtx.get()));
+ ASSERT_EQ(cursor->seek(BSONObj(), true), IndexKeyEntry(BSON("" << 1), RecordId(1, 2)));
+ }
- cursor = sorted->newCursor(opCtx.get(), /*forward*/false);
- ASSERT_EQ(cursor->seek(BSON("" << 5), /*inclusive*/false), buildEntry(4));
+ {
+ const std::unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
+ const std::unique_ptr<SortedDataInterface::Cursor> cursor(
+ sorted->newCursor(opCtx.get(), false));
+ ASSERT_EQ(cursor->seek(BSONObj(), false), boost::none);
+ }
+}
- cursor = sorted->newCursor(opCtx.get(), /*forward*/false);
- ASSERT_EQ(cursor->seek(BSON("" << 5), /*inclusive*/true), buildEntry(5));
- ASSERT_EQ(cursor->next(), buildEntry(4));
- cursor = sorted->newCursor(opCtx.get(), /*forward*/false);
- ASSERT_EQ(cursor->seek(BSON("" << 5), /*inclusive*/false), buildEntry(4));
- ASSERT_EQ(cursor->next(), buildEntry(3));
+TEST(SortedDataInterface, Locate3Descending) {
+ const std::unique_ptr<HarnessHelper> harnessHelper(newHarnessHelper());
+ const std::unique_ptr<SortedDataInterface> sorted(harnessHelper->newSortedDataInterface(false));
- cursor = sorted->newCursor(opCtx.get(), /*forward*/false);
- ASSERT_EQ(cursor->seek(BSON("" << 6), /*inclusive*/true), buildEntry(5));
- ASSERT_EQ(cursor->next(), buildEntry(4));
+ auto buildEntry = [](int i) { return IndexKeyEntry(BSON("" << i), RecordId(1, i * 2)); };
- cursor = sorted->newCursor(opCtx.get(), /*forward*/false);
- ASSERT_EQ(cursor->seek(BSON("" << 500), /*inclusive*/true), buildEntry(9));
- ASSERT_EQ(cursor->next(), buildEntry(8));
+ {
+ const std::unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
+ for (int i = 0; i < 10; i++) {
+ if (i == 6)
+ continue;
+ WriteUnitOfWork uow(opCtx.get());
+ auto entry = buildEntry(i);
+ ASSERT_OK(sorted->insert(opCtx.get(), entry.key, entry.loc, true));
+ uow.commit();
+ }
}
- TEST( SortedDataInterface, Locate4 ) {
- auto harnessHelper = newHarnessHelper();
- auto sorted = harnessHelper->newSortedDataInterface(false, {
- {BSON("" << 1), RecordId(1, 2)},
- {BSON("" << 1), RecordId(1, 4)},
- {BSON("" << 1), RecordId(1, 6)},
- {BSON("" << 2), RecordId(1, 8)},
- });
+ const std::unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
+ std::unique_ptr<SortedDataInterface::Cursor> cursor(sorted->newCursor(opCtx.get(), true));
+ ASSERT_EQ(cursor->seek(BSON("" << 5), true), buildEntry(5));
+ ASSERT_EQ(cursor->next(), buildEntry(7));
+
+ cursor = sorted->newCursor(opCtx.get(), /*forward*/ false);
+ ASSERT_EQ(cursor->seek(BSON("" << 5), /*inclusive*/ false), buildEntry(4));
+
+ cursor = sorted->newCursor(opCtx.get(), /*forward*/ false);
+ ASSERT_EQ(cursor->seek(BSON("" << 5), /*inclusive*/ true), buildEntry(5));
+ ASSERT_EQ(cursor->next(), buildEntry(4));
+
+ cursor = sorted->newCursor(opCtx.get(), /*forward*/ false);
+ ASSERT_EQ(cursor->seek(BSON("" << 5), /*inclusive*/ false), buildEntry(4));
+ ASSERT_EQ(cursor->next(), buildEntry(3));
+
+ cursor = sorted->newCursor(opCtx.get(), /*forward*/ false);
+ ASSERT_EQ(cursor->seek(BSON("" << 6), /*inclusive*/ true), buildEntry(5));
+ ASSERT_EQ(cursor->next(), buildEntry(4));
+
+ cursor = sorted->newCursor(opCtx.get(), /*forward*/ false);
+ ASSERT_EQ(cursor->seek(BSON("" << 500), /*inclusive*/ true), buildEntry(9));
+ ASSERT_EQ(cursor->next(), buildEntry(8));
+}
+
+TEST(SortedDataInterface, Locate4) {
+ auto harnessHelper = newHarnessHelper();
+ auto sorted = harnessHelper->newSortedDataInterface(false,
+ {
+ {BSON("" << 1), RecordId(1, 2)},
+ {BSON("" << 1), RecordId(1, 4)},
+ {BSON("" << 1), RecordId(1, 6)},
+ {BSON("" << 2), RecordId(1, 8)},
+ });
+
+ {
+ auto opCtx = harnessHelper->newOperationContext();
+ auto cursor = sorted->newCursor(opCtx.get());
+ ASSERT_EQ(cursor->seek(BSON("a" << 1), true), IndexKeyEntry(BSON("" << 1), RecordId(1, 2)));
+
+ ASSERT_EQ(cursor->next(), IndexKeyEntry(BSON("" << 1), RecordId(1, 4)));
+ ASSERT_EQ(cursor->next(), IndexKeyEntry(BSON("" << 1), RecordId(1, 6)));
+ ASSERT_EQ(cursor->next(), IndexKeyEntry(BSON("" << 2), RecordId(1, 8)));
+ ASSERT_EQ(cursor->next(), boost::none);
+ }
- {
- auto opCtx = harnessHelper->newOperationContext();
- auto cursor = sorted->newCursor(opCtx.get());
- ASSERT_EQ(cursor->seek(BSON("a" << 1), true),
- IndexKeyEntry(BSON("" << 1), RecordId(1, 2)));
-
- ASSERT_EQ(cursor->next(), IndexKeyEntry(BSON("" << 1), RecordId(1, 4)));
- ASSERT_EQ(cursor->next(), IndexKeyEntry(BSON("" << 1), RecordId(1, 6)));
- ASSERT_EQ(cursor->next(), IndexKeyEntry(BSON("" << 2), RecordId(1, 8)));
- ASSERT_EQ(cursor->next(), boost::none);
- }
+ {
+ auto opCtx = harnessHelper->newOperationContext();
+ auto cursor = sorted->newCursor(opCtx.get(), false);
+ ASSERT_EQ(cursor->seek(BSON("a" << 1), true), IndexKeyEntry(BSON("" << 1), RecordId(1, 6)));
- {
- auto opCtx = harnessHelper->newOperationContext();
- auto cursor = sorted->newCursor(opCtx.get(), false);
- ASSERT_EQ(cursor->seek(BSON("a" << 1), true),
- IndexKeyEntry(BSON("" << 1), RecordId(1, 6)));
-
- ASSERT_EQ(cursor->next(), IndexKeyEntry(BSON("" << 1), RecordId(1, 4)));
- ASSERT_EQ(cursor->next(), IndexKeyEntry(BSON("" << 1), RecordId(1, 2)));
- ASSERT_EQ(cursor->next(), boost::none);
- }
+ ASSERT_EQ(cursor->next(), IndexKeyEntry(BSON("" << 1), RecordId(1, 4)));
+ ASSERT_EQ(cursor->next(), IndexKeyEntry(BSON("" << 1), RecordId(1, 2)));
+ ASSERT_EQ(cursor->next(), boost::none);
}
+}
-} // namespace mongo
+} // namespace mongo
diff --git a/src/mongo/db/storage/sorted_data_interface_test_harness.h b/src/mongo/db/storage/sorted_data_interface_test_harness.h
index 3905b120507..e90f6606e86 100644
--- a/src/mongo/db/storage/sorted_data_interface_test_harness.h
+++ b/src/mongo/db/storage/sorted_data_interface_test_harness.h
@@ -42,90 +42,99 @@
namespace mongo {
- const BSONObj key0 = BSON( "" << 0 );
- const BSONObj key1 = BSON( "" << 1 );
- const BSONObj key2 = BSON( "" << 2 );
- const BSONObj key3 = BSON( "" << 3 );
- const BSONObj key4 = BSON( "" << 4 );
- const BSONObj key5 = BSON( "" << 5 );
- const BSONObj key6 = BSON( "" << 6 );
-
- const BSONObj compoundKey1a = BSON( "" << 1 << "" << "a" );
- const BSONObj compoundKey1b = BSON( "" << 1 << "" << "b" );
- const BSONObj compoundKey1c = BSON( "" << 1 << "" << "c" );
- const BSONObj compoundKey1d = BSON( "" << 1 << "" << "d" );
- const BSONObj compoundKey2a = BSON( "" << 2 << "" << "a" );
- const BSONObj compoundKey2b = BSON( "" << 2 << "" << "b" );
- const BSONObj compoundKey2c = BSON( "" << 2 << "" << "c" );
- const BSONObj compoundKey3a = BSON( "" << 3 << "" << "a" );
- const BSONObj compoundKey3b = BSON( "" << 3 << "" << "b" );
- const BSONObj compoundKey3c = BSON( "" << 3 << "" << "c" );
-
- const RecordId loc1( 0, 42 );
- const RecordId loc2( 0, 44 );
- const RecordId loc3( 0, 46 );
- const RecordId loc4( 0, 48 );
- const RecordId loc5( 0, 50 );
- const RecordId loc6( 0, 52 );
- const RecordId loc7( 0, 54 );
- const RecordId loc8( 0, 56 );
-
- class RecoveryUnit;
-
- class HarnessHelper {
- public:
- HarnessHelper(){}
- virtual ~HarnessHelper() = default;
-
- virtual std::unique_ptr<SortedDataInterface> newSortedDataInterface( bool unique ) = 0;
- virtual std::unique_ptr<RecoveryUnit> newRecoveryUnit() = 0;
-
- virtual std::unique_ptr<OperationContext> newOperationContext() {
- return stdx::make_unique<OperationContextNoop>(newRecoveryUnit().release());
- }
-
- /**
- * Creates a new SDI with some initial data.
- *
- * For clarity to readers, toInsert must be sorted.
- */
- std::unique_ptr<SortedDataInterface> newSortedDataInterface(
- bool unique,
- std::initializer_list<IndexKeyEntry> toInsert);
- };
-
- /**
- * Inserts all entries in toInsert into index.
- * ASSERT_OKs the inserts.
- * Always uses dupsAllowed=true.
- *
- * Should be used for declaring and changing conditions, not for testing inserts.
- */
- void insertToIndex(unowned_ptr<OperationContext> txn,
- unowned_ptr<SortedDataInterface> index,
- std::initializer_list<IndexKeyEntry> toInsert);
-
- inline void insertToIndex(unowned_ptr<HarnessHelper> harness,
- unowned_ptr<SortedDataInterface> index,
- std::initializer_list<IndexKeyEntry> toInsert) {
- insertToIndex(harness->newOperationContext(), index, toInsert);
+const BSONObj key0 = BSON("" << 0);
+const BSONObj key1 = BSON("" << 1);
+const BSONObj key2 = BSON("" << 2);
+const BSONObj key3 = BSON("" << 3);
+const BSONObj key4 = BSON("" << 4);
+const BSONObj key5 = BSON("" << 5);
+const BSONObj key6 = BSON("" << 6);
+
+const BSONObj compoundKey1a = BSON("" << 1 << ""
+ << "a");
+const BSONObj compoundKey1b = BSON("" << 1 << ""
+ << "b");
+const BSONObj compoundKey1c = BSON("" << 1 << ""
+ << "c");
+const BSONObj compoundKey1d = BSON("" << 1 << ""
+ << "d");
+const BSONObj compoundKey2a = BSON("" << 2 << ""
+ << "a");
+const BSONObj compoundKey2b = BSON("" << 2 << ""
+ << "b");
+const BSONObj compoundKey2c = BSON("" << 2 << ""
+ << "c");
+const BSONObj compoundKey3a = BSON("" << 3 << ""
+ << "a");
+const BSONObj compoundKey3b = BSON("" << 3 << ""
+ << "b");
+const BSONObj compoundKey3c = BSON("" << 3 << ""
+ << "c");
+
+const RecordId loc1(0, 42);
+const RecordId loc2(0, 44);
+const RecordId loc3(0, 46);
+const RecordId loc4(0, 48);
+const RecordId loc5(0, 50);
+const RecordId loc6(0, 52);
+const RecordId loc7(0, 54);
+const RecordId loc8(0, 56);
+
+class RecoveryUnit;
+
+class HarnessHelper {
+public:
+ HarnessHelper() {}
+ virtual ~HarnessHelper() = default;
+
+ virtual std::unique_ptr<SortedDataInterface> newSortedDataInterface(bool unique) = 0;
+ virtual std::unique_ptr<RecoveryUnit> newRecoveryUnit() = 0;
+
+ virtual std::unique_ptr<OperationContext> newOperationContext() {
+ return stdx::make_unique<OperationContextNoop>(newRecoveryUnit().release());
}
/**
- * Removes all entries in toRemove from index.
- * Always uses dupsAllowed=true.
+ * Creates a new SDI with some initial data.
*
- * Should be used for declaring and changing conditions, not for testing removes.
+ * For clarity to readers, toInsert must be sorted.
*/
- void removeFromIndex(unowned_ptr<OperationContext> txn,
- unowned_ptr<SortedDataInterface> index,
- std::initializer_list<IndexKeyEntry> toRemove);
-
- inline void removeFromIndex(unowned_ptr<HarnessHelper> harness,
- unowned_ptr<SortedDataInterface> index,
- std::initializer_list<IndexKeyEntry> toRemove) {
- removeFromIndex(harness->newOperationContext(), index, toRemove);
- }
+ std::unique_ptr<SortedDataInterface> newSortedDataInterface(
+ bool unique, std::initializer_list<IndexKeyEntry> toInsert);
+};
+
+/**
+ * Inserts all entries in toInsert into index.
+ * ASSERT_OKs the inserts.
+ * Always uses dupsAllowed=true.
+ *
+ * Should be used for declaring and changing conditions, not for testing inserts.
+ */
+void insertToIndex(unowned_ptr<OperationContext> txn,
+ unowned_ptr<SortedDataInterface> index,
+ std::initializer_list<IndexKeyEntry> toInsert);
+
+inline void insertToIndex(unowned_ptr<HarnessHelper> harness,
+ unowned_ptr<SortedDataInterface> index,
+ std::initializer_list<IndexKeyEntry> toInsert) {
+ insertToIndex(harness->newOperationContext(), index, toInsert);
+}
+
+/**
+ * Removes all entries in toRemove from index.
+ * Always uses dupsAllowed=true.
+ *
+ * Should be used for declaring and changing conditions, not for testing removes.
+ */
+void removeFromIndex(unowned_ptr<OperationContext> txn,
+ unowned_ptr<SortedDataInterface> index,
+ std::initializer_list<IndexKeyEntry> toRemove);
+
+inline void removeFromIndex(unowned_ptr<HarnessHelper> harness,
+ unowned_ptr<SortedDataInterface> index,
+ std::initializer_list<IndexKeyEntry> toRemove) {
+ removeFromIndex(harness->newOperationContext(), index, toRemove);
+}
- std::unique_ptr<HarnessHelper> newHarnessHelper();
+std::unique_ptr<HarnessHelper> newHarnessHelper();
}
diff --git a/src/mongo/db/storage/sorted_data_interface_test_insert.cpp b/src/mongo/db/storage/sorted_data_interface_test_insert.cpp
index 2aa254f2f3f..71ec797cc17 100644
--- a/src/mongo/db/storage/sorted_data_interface_test_insert.cpp
+++ b/src/mongo/db/storage/sorted_data_interface_test_insert.cpp
@@ -37,325 +37,327 @@
namespace mongo {
- // Insert a key and verify that the number of entries in the index equals 1.
- TEST( SortedDataInterface, Insert ) {
- const std::unique_ptr<HarnessHelper> harnessHelper( newHarnessHelper() );
- const std::unique_ptr<SortedDataInterface> sorted( harnessHelper->newSortedDataInterface( false ) );
-
- {
- const std::unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- ASSERT( sorted->isEmpty( opCtx.get() ) );
- }
+// Insert a key and verify that the number of entries in the index equals 1.
+TEST(SortedDataInterface, Insert) {
+ const std::unique_ptr<HarnessHelper> harnessHelper(newHarnessHelper());
+ const std::unique_ptr<SortedDataInterface> sorted(harnessHelper->newSortedDataInterface(false));
+
+ {
+ const std::unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
+ ASSERT(sorted->isEmpty(opCtx.get()));
+ }
+ {
+ const std::unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
{
- const std::unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- {
- WriteUnitOfWork uow( opCtx.get() );
- ASSERT_OK( sorted->insert( opCtx.get(), key1, loc1, true ) );
- uow.commit();
- }
+ WriteUnitOfWork uow(opCtx.get());
+ ASSERT_OK(sorted->insert(opCtx.get(), key1, loc1, true));
+ uow.commit();
}
+ }
- {
- const std::unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- ASSERT_EQUALS( 1, sorted->numEntries( opCtx.get() ) );
- }
+ {
+ const std::unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
+ ASSERT_EQUALS(1, sorted->numEntries(opCtx.get()));
}
+}
- // Insert a compound key and verify that the number of entries in the index equals 1.
- TEST( SortedDataInterface, InsertCompoundKey ) {
- const std::unique_ptr<HarnessHelper> harnessHelper( newHarnessHelper() );
- const std::unique_ptr<SortedDataInterface> sorted( harnessHelper->newSortedDataInterface( false ) );
+// Insert a compound key and verify that the number of entries in the index equals 1.
+TEST(SortedDataInterface, InsertCompoundKey) {
+ const std::unique_ptr<HarnessHelper> harnessHelper(newHarnessHelper());
+ const std::unique_ptr<SortedDataInterface> sorted(harnessHelper->newSortedDataInterface(false));
- {
- const std::unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- ASSERT( sorted->isEmpty( opCtx.get() ) );
- }
+ {
+ const std::unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
+ ASSERT(sorted->isEmpty(opCtx.get()));
+ }
+ {
+ const std::unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
{
- const std::unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- {
- WriteUnitOfWork uow( opCtx.get() );
- ASSERT_OK( sorted->insert( opCtx.get(), compoundKey1a, loc1, true ) );
- uow.commit();
- }
+ WriteUnitOfWork uow(opCtx.get());
+ ASSERT_OK(sorted->insert(opCtx.get(), compoundKey1a, loc1, true));
+ uow.commit();
}
+ }
+ {
+ const std::unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
+ ASSERT_EQUALS(1, sorted->numEntries(opCtx.get()));
+ }
+}
+
+// Insert multiple, distinct keys at the same RecordId and verify that the
+// number of entries in the index equals the number that were inserted, even
+// when duplicates are not allowed.
+TEST(SortedDataInterface, InsertSameDiskLoc) {
+ const std::unique_ptr<HarnessHelper> harnessHelper(newHarnessHelper());
+ const std::unique_ptr<SortedDataInterface> sorted(harnessHelper->newSortedDataInterface(false));
+
+ {
+ const std::unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
+ ASSERT(sorted->isEmpty(opCtx.get()));
+ }
+
+ {
+ const std::unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
{
- const std::unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- ASSERT_EQUALS( 1, sorted->numEntries( opCtx.get() ) );
+ WriteUnitOfWork uow(opCtx.get());
+ ASSERT_OK(sorted->insert(opCtx.get(), key1, loc1, true));
+ ASSERT_OK(sorted->insert(opCtx.get(), key2, loc1, true));
+ uow.commit();
}
}
- // Insert multiple, distinct keys at the same RecordId and verify that the
- // number of entries in the index equals the number that were inserted, even
- // when duplicates are not allowed.
- TEST( SortedDataInterface, InsertSameDiskLoc ) {
- const std::unique_ptr<HarnessHelper> harnessHelper( newHarnessHelper() );
- const std::unique_ptr<SortedDataInterface> sorted( harnessHelper->newSortedDataInterface( false ) );
+ {
+ const std::unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
+ ASSERT_EQUALS(2, sorted->numEntries(opCtx.get()));
+ }
+ {
+ const std::unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
{
- const std::unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- ASSERT( sorted->isEmpty( opCtx.get() ) );
+ WriteUnitOfWork uow(opCtx.get());
+ ASSERT_OK(sorted->insert(opCtx.get(), key3, loc1, true));
+ uow.commit();
}
+ }
- {
- const std::unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- {
- WriteUnitOfWork uow( opCtx.get() );
- ASSERT_OK( sorted->insert( opCtx.get(), key1, loc1, true ) );
- ASSERT_OK( sorted->insert( opCtx.get(), key2, loc1, true ) );
- uow.commit();
- }
- }
+ {
+ const std::unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
+ ASSERT_EQUALS(3, sorted->numEntries(opCtx.get()));
+ }
+}
+
+// Insert multiple, distinct keys at the same RecordId and verify that the
+// number of entries in the index equals the number that were inserted, even
+// when duplicates are allowed.
+TEST(SortedDataInterface, InsertSameDiskLocWithDupsAllowed) {
+ const std::unique_ptr<HarnessHelper> harnessHelper(newHarnessHelper());
+ const std::unique_ptr<SortedDataInterface> sorted(harnessHelper->newSortedDataInterface(true));
+
+ {
+ const std::unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
+ ASSERT(sorted->isEmpty(opCtx.get()));
+ }
+ {
+ const std::unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
{
- const std::unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- ASSERT_EQUALS( 2, sorted->numEntries( opCtx.get() ) );
+ WriteUnitOfWork uow(opCtx.get());
+ ASSERT_OK(sorted->insert(opCtx.get(), key1, loc1, false));
+ ASSERT_OK(sorted->insert(opCtx.get(), key2, loc1, true /* allow duplicates */));
+ uow.commit();
}
+ }
- {
- const std::unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- {
- WriteUnitOfWork uow( opCtx.get() );
- ASSERT_OK( sorted->insert( opCtx.get(), key3, loc1, true ) );
- uow.commit();
- }
- }
+ {
+ const std::unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
+ ASSERT_EQUALS(2, sorted->numEntries(opCtx.get()));
+ }
+ {
+ const std::unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
{
- const std::unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- ASSERT_EQUALS( 3, sorted->numEntries( opCtx.get() ) );
+ WriteUnitOfWork uow(opCtx.get());
+ ASSERT_OK(sorted->insert(opCtx.get(), key3, loc1, true /* allow duplicates */));
+ uow.commit();
}
}
- // Insert multiple, distinct keys at the same RecordId and verify that the
- // number of entries in the index equals the number that were inserted, even
- // when duplicates are allowed.
- TEST( SortedDataInterface, InsertSameDiskLocWithDupsAllowed ) {
- const std::unique_ptr<HarnessHelper> harnessHelper( newHarnessHelper() );
- const std::unique_ptr<SortedDataInterface> sorted( harnessHelper->newSortedDataInterface( true ) );
+ {
+ const std::unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
+ ASSERT_EQUALS(3, sorted->numEntries(opCtx.get()));
+ }
+}
- {
- const std::unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- ASSERT( sorted->isEmpty( opCtx.get() ) );
- }
+// Insert the same key multiple times and verify that only 1 entry exists
+// in the index when duplicates are not allowed.
+TEST(SortedDataInterface, InsertSameKey) {
+ const std::unique_ptr<HarnessHelper> harnessHelper(newHarnessHelper());
+ const std::unique_ptr<SortedDataInterface> sorted(harnessHelper->newSortedDataInterface(true));
- {
- const std::unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- {
- WriteUnitOfWork uow( opCtx.get() );
- ASSERT_OK( sorted->insert( opCtx.get(), key1, loc1, false ) );
- ASSERT_OK( sorted->insert( opCtx.get(), key2, loc1, true /* allow duplicates */ ) );
- uow.commit();
- }
- }
+ {
+ const std::unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
+ ASSERT(sorted->isEmpty(opCtx.get()));
+ }
+ {
+ const std::unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
{
- const std::unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- ASSERT_EQUALS( 2, sorted->numEntries( opCtx.get() ) );
+ WriteUnitOfWork uow(opCtx.get());
+ ASSERT_OK(sorted->insert(opCtx.get(), key1, loc1, false));
+ ASSERT_NOT_OK(sorted->insert(opCtx.get(), key1, loc2, false));
+ uow.commit();
}
+ }
- {
- const std::unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- {
- WriteUnitOfWork uow( opCtx.get() );
- ASSERT_OK( sorted->insert( opCtx.get(), key3, loc1, true /* allow duplicates */ ) );
- uow.commit();
- }
- }
+ {
+ const std::unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
+ ASSERT_EQUALS(1, sorted->numEntries(opCtx.get()));
+ }
+ {
+ const std::unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
{
- const std::unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- ASSERT_EQUALS( 3, sorted->numEntries( opCtx.get() ) );
+ WriteUnitOfWork uow(opCtx.get());
+ ASSERT_NOT_OK(sorted->insert(opCtx.get(), key1, loc2, false));
+ uow.commit();
}
}
- // Insert the same key multiple times and verify that only 1 entry exists
- // in the index when duplicates are not allowed.
- TEST( SortedDataInterface, InsertSameKey ) {
- const std::unique_ptr<HarnessHelper> harnessHelper( newHarnessHelper() );
- const std::unique_ptr<SortedDataInterface> sorted( harnessHelper->newSortedDataInterface( true ) );
+ {
+ const std::unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
+ ASSERT_EQUALS(1, sorted->numEntries(opCtx.get()));
+ }
+}
- {
- const std::unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- ASSERT( sorted->isEmpty( opCtx.get() ) );
- }
+namespace {
- {
- const std::unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- {
- WriteUnitOfWork uow( opCtx.get() );
- ASSERT_OK( sorted->insert( opCtx.get(), key1, loc1, false ) );
- ASSERT_NOT_OK( sorted->insert( opCtx.get(), key1, loc2, false ) );
- uow.commit();
- }
- }
+// Insert the same key multiple times and verify that all entries exists
+// in the index when duplicates are allowed. Since it is illegal to open a cursor to an unique
+// index while the unique constraint is violated, this is tested by running the test 3 times,
+// removing all but one loc each time and verifying the correct loc remains.
+void _testInsertSameKeyWithDupsAllowed(const RecordId locs[3]) {
+ for (int keeper = 0; keeper < 3; keeper++) {
+ const std::unique_ptr<HarnessHelper> harnessHelper(newHarnessHelper());
+ const std::unique_ptr<SortedDataInterface> sorted(
+ harnessHelper->newSortedDataInterface(true));
{
- const std::unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- ASSERT_EQUALS( 1, sorted->numEntries( opCtx.get() ) );
+ const std::unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
+ ASSERT(sorted->isEmpty(opCtx.get()));
}
{
- const std::unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
+ const std::unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
{
- WriteUnitOfWork uow( opCtx.get() );
- ASSERT_NOT_OK( sorted->insert( opCtx.get(), key1, loc2, false ) );
+ WriteUnitOfWork uow(opCtx.get());
+ ASSERT_OK(sorted->insert(opCtx.get(), key1, locs[0], false));
+ ASSERT_OK(sorted->insert(opCtx.get(), key1, locs[1], true));
+ ASSERT_OK(sorted->insert(opCtx.get(), key1, locs[2], true));
uow.commit();
}
}
{
- const std::unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- ASSERT_EQUALS( 1, sorted->numEntries( opCtx.get() ) );
- }
- }
-
-namespace {
-
- // Insert the same key multiple times and verify that all entries exists
- // in the index when duplicates are allowed. Since it is illegal to open a cursor to an unique
- // index while the unique constraint is violated, this is tested by running the test 3 times,
- // removing all but one loc each time and verifying the correct loc remains.
- void _testInsertSameKeyWithDupsAllowed(const RecordId locs[3]) {
- for (int keeper = 0; keeper < 3; keeper++) {
- const std::unique_ptr<HarnessHelper> harnessHelper( newHarnessHelper() );
- const std::unique_ptr<SortedDataInterface> sorted( harnessHelper->newSortedDataInterface( true ) );
-
+ const std::unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
{
- const std::unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- ASSERT( sorted->isEmpty( opCtx.get() ) );
- }
-
- {
- const std::unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- {
- WriteUnitOfWork uow( opCtx.get() );
- ASSERT_OK(sorted->insert(opCtx.get(), key1, locs[0], false));
- ASSERT_OK(sorted->insert(opCtx.get(), key1, locs[1], true));
- ASSERT_OK(sorted->insert(opCtx.get(), key1, locs[2], true));
- uow.commit();
- }
- }
-
- {
- const std::unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- {
- WriteUnitOfWork uow( opCtx.get() );
- for (int i = 0; i < 3; i++) {
- if (i != keeper) {
- sorted->unindex(opCtx.get(), key1, locs[i], true);
- }
+ WriteUnitOfWork uow(opCtx.get());
+ for (int i = 0; i < 3; i++) {
+ if (i != keeper) {
+ sorted->unindex(opCtx.get(), key1, locs[i], true);
}
- uow.commit();
}
+ uow.commit();
}
+ }
- {
- const std::unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- ASSERT_EQUALS( 1, sorted->numEntries( opCtx.get() ) );
+ {
+ const std::unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
+ ASSERT_EQUALS(1, sorted->numEntries(opCtx.get()));
- const std::unique_ptr<SortedDataInterface::Cursor> cursor(sorted->newCursor(opCtx.get()));
- ASSERT_EQ(cursor->seek(key1, true), IndexKeyEntry(key1, locs[keeper]));
- }
+ const std::unique_ptr<SortedDataInterface::Cursor> cursor(
+ sorted->newCursor(opCtx.get()));
+ ASSERT_EQ(cursor->seek(key1, true), IndexKeyEntry(key1, locs[keeper]));
}
}
+}
} // namespace
- TEST( SortedDataInterface, InsertSameKeyWithDupsAllowedLocsAscending ) {
- const RecordId locs[3] = {loc1, loc2, loc3};
- _testInsertSameKeyWithDupsAllowed(locs);
+TEST(SortedDataInterface, InsertSameKeyWithDupsAllowedLocsAscending) {
+ const RecordId locs[3] = {loc1, loc2, loc3};
+ _testInsertSameKeyWithDupsAllowed(locs);
+}
+
+TEST(SortedDataInterface, InsertSameKeyWithDupsAllowedLocsDescending) {
+ const RecordId locs[3] = {loc3, loc2, loc1};
+ _testInsertSameKeyWithDupsAllowed(locs);
+}
+
+// Insert multiple keys and verify that the number of entries
+// in the index equals the number that were inserted.
+TEST(SortedDataInterface, InsertMultiple) {
+ const std::unique_ptr<HarnessHelper> harnessHelper(newHarnessHelper());
+ const std::unique_ptr<SortedDataInterface> sorted(harnessHelper->newSortedDataInterface(true));
+
+ {
+ const std::unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
+ ASSERT(sorted->isEmpty(opCtx.get()));
}
- TEST( SortedDataInterface, InsertSameKeyWithDupsAllowedLocsDescending ) {
- const RecordId locs[3] = {loc3, loc2, loc1};
- _testInsertSameKeyWithDupsAllowed(locs);
- }
-
- // Insert multiple keys and verify that the number of entries
- // in the index equals the number that were inserted.
- TEST( SortedDataInterface, InsertMultiple ) {
- const std::unique_ptr<HarnessHelper> harnessHelper( newHarnessHelper() );
- const std::unique_ptr<SortedDataInterface> sorted( harnessHelper->newSortedDataInterface( true ) );
-
+ {
+ const std::unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
{
- const std::unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- ASSERT( sorted->isEmpty( opCtx.get() ) );
- }
-
- {
- const std::unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- {
- WriteUnitOfWork uow( opCtx.get() );
- ASSERT_OK( sorted->insert( opCtx.get(), key1, loc1, false ) );
- ASSERT_OK( sorted->insert( opCtx.get(), key2, loc2, false ) );
- uow.commit();
- }
+ WriteUnitOfWork uow(opCtx.get());
+ ASSERT_OK(sorted->insert(opCtx.get(), key1, loc1, false));
+ ASSERT_OK(sorted->insert(opCtx.get(), key2, loc2, false));
+ uow.commit();
}
+ }
- {
- const std::unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- ASSERT_EQUALS( 2, sorted->numEntries( opCtx.get() ) );
- }
+ {
+ const std::unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
+ ASSERT_EQUALS(2, sorted->numEntries(opCtx.get()));
+ }
+ {
+ const std::unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
{
- const std::unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- {
- WriteUnitOfWork uow( opCtx.get() );
- ASSERT_OK( sorted->insert( opCtx.get(), key3, loc3, false ) );
- uow.commit();
- }
+ WriteUnitOfWork uow(opCtx.get());
+ ASSERT_OK(sorted->insert(opCtx.get(), key3, loc3, false));
+ uow.commit();
}
+ }
- {
- const std::unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- ASSERT_EQUALS( 3, sorted->numEntries( opCtx.get() ) );
- }
+ {
+ const std::unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
+ ASSERT_EQUALS(3, sorted->numEntries(opCtx.get()));
}
+}
- // Insert multiple compound keys and verify that the number of entries
- // in the index equals the number that were inserted.
- TEST( SortedDataInterface, InsertMultipleCompoundKeys ) {
- const std::unique_ptr<HarnessHelper> harnessHelper( newHarnessHelper() );
- const std::unique_ptr<SortedDataInterface> sorted( harnessHelper->newSortedDataInterface( true ) );
+// Insert multiple compound keys and verify that the number of entries
+// in the index equals the number that were inserted.
+TEST(SortedDataInterface, InsertMultipleCompoundKeys) {
+ const std::unique_ptr<HarnessHelper> harnessHelper(newHarnessHelper());
+ const std::unique_ptr<SortedDataInterface> sorted(harnessHelper->newSortedDataInterface(true));
- {
- const std::unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- ASSERT( sorted->isEmpty( opCtx.get() ) );
- }
+ {
+ const std::unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
+ ASSERT(sorted->isEmpty(opCtx.get()));
+ }
+ {
+ const std::unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
{
- const std::unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- {
- WriteUnitOfWork uow( opCtx.get() );
- ASSERT_OK( sorted->insert( opCtx.get(), compoundKey1a, loc1, false ) );
- ASSERT_OK( sorted->insert( opCtx.get(), compoundKey1b, loc2, false ) );
- ASSERT_OK( sorted->insert( opCtx.get(), compoundKey2b, loc3, false ) );
- uow.commit();
- }
+ WriteUnitOfWork uow(opCtx.get());
+ ASSERT_OK(sorted->insert(opCtx.get(), compoundKey1a, loc1, false));
+ ASSERT_OK(sorted->insert(opCtx.get(), compoundKey1b, loc2, false));
+ ASSERT_OK(sorted->insert(opCtx.get(), compoundKey2b, loc3, false));
+ uow.commit();
}
+ }
- {
- const std::unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- ASSERT_EQUALS( 3, sorted->numEntries( opCtx.get() ) );
- }
+ {
+ const std::unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
+ ASSERT_EQUALS(3, sorted->numEntries(opCtx.get()));
+ }
+ {
+ const std::unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
{
- const std::unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- {
- WriteUnitOfWork uow( opCtx.get() );
- ASSERT_OK( sorted->insert( opCtx.get(), compoundKey1c, loc4, false ) );
- ASSERT_OK( sorted->insert( opCtx.get(), compoundKey3a, loc5, false ) );
- uow.commit();
- }
+ WriteUnitOfWork uow(opCtx.get());
+ ASSERT_OK(sorted->insert(opCtx.get(), compoundKey1c, loc4, false));
+ ASSERT_OK(sorted->insert(opCtx.get(), compoundKey3a, loc5, false));
+ uow.commit();
}
+ }
- {
- const std::unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- ASSERT_EQUALS( 5, sorted->numEntries( opCtx.get() ) );
- }
+ {
+ const std::unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
+ ASSERT_EQUALS(5, sorted->numEntries(opCtx.get()));
}
+}
-} // namespace mongo
+} // namespace mongo
diff --git a/src/mongo/db/storage/sorted_data_interface_test_isempty.cpp b/src/mongo/db/storage/sorted_data_interface_test_isempty.cpp
index 8dcef9a3770..16ea19ca815 100644
--- a/src/mongo/db/storage/sorted_data_interface_test_isempty.cpp
+++ b/src/mongo/db/storage/sorted_data_interface_test_isempty.cpp
@@ -37,46 +37,46 @@
namespace mongo {
- // Verify that isEmpty() returns true when the index is empty,
- // returns false when a key is inserted, and returns true again
- // when that is unindex.
- TEST( SortedDataInterface, IsEmpty ) {
- const std::unique_ptr<HarnessHelper> harnessHelper( newHarnessHelper() );
- const std::unique_ptr<SortedDataInterface> sorted( harnessHelper->newSortedDataInterface( true ) );
+// Verify that isEmpty() returns true when the index is empty,
+// returns false when a key is inserted, and returns true again
+// when that is unindex.
+TEST(SortedDataInterface, IsEmpty) {
+ const std::unique_ptr<HarnessHelper> harnessHelper(newHarnessHelper());
+ const std::unique_ptr<SortedDataInterface> sorted(harnessHelper->newSortedDataInterface(true));
- {
- const std::unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- ASSERT( sorted->isEmpty( opCtx.get() ) );
- }
+ {
+ const std::unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
+ ASSERT(sorted->isEmpty(opCtx.get()));
+ }
+ {
+ const std::unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
{
- const std::unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- {
- WriteUnitOfWork uow( opCtx.get() );
- ASSERT_OK( sorted->insert( opCtx.get(), key1, loc1, false ) );
- uow.commit();
- }
+ WriteUnitOfWork uow(opCtx.get());
+ ASSERT_OK(sorted->insert(opCtx.get(), key1, loc1, false));
+ uow.commit();
}
+ }
- {
- const std::unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- ASSERT( !sorted->isEmpty( opCtx.get() ) );
- }
+ {
+ const std::unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
+ ASSERT(!sorted->isEmpty(opCtx.get()));
+ }
+ {
+ const std::unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
{
- const std::unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- {
- WriteUnitOfWork uow( opCtx.get() );
- sorted->unindex( opCtx.get(), key1, loc1, false );
- ASSERT( sorted->isEmpty( opCtx.get() ) );
- uow.commit();
- }
+ WriteUnitOfWork uow(opCtx.get());
+ sorted->unindex(opCtx.get(), key1, loc1, false);
+ ASSERT(sorted->isEmpty(opCtx.get()));
+ uow.commit();
}
+ }
- {
- const std::unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- ASSERT( sorted->isEmpty( opCtx.get() ) );
- }
+ {
+ const std::unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
+ ASSERT(sorted->isEmpty(opCtx.get()));
}
+}
-} // namespace mongo
+} // namespace mongo
diff --git a/src/mongo/db/storage/sorted_data_interface_test_rollback.cpp b/src/mongo/db/storage/sorted_data_interface_test_rollback.cpp
index 4b1125e5f07..c99627bf4d1 100644
--- a/src/mongo/db/storage/sorted_data_interface_test_rollback.cpp
+++ b/src/mongo/db/storage/sorted_data_interface_test_rollback.cpp
@@ -37,119 +37,119 @@
namespace mongo {
- // Insert multiple keys and verify that omitting the commit()
- // on the WriteUnitOfWork causes the changes to not become visible.
- TEST( SortedDataInterface, InsertWithoutCommit ) {
- const std::unique_ptr<HarnessHelper> harnessHelper( newHarnessHelper() );
- const std::unique_ptr<SortedDataInterface> sorted( harnessHelper->newSortedDataInterface( true ) );
-
- {
- const std::unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- ASSERT( sorted->isEmpty( opCtx.get() ) );
- }
-
- {
- const std::unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- {
- WriteUnitOfWork uow( opCtx.get() );
- ASSERT_OK( sorted->insert( opCtx.get(), key1, loc1, false ) );
- // no commit
- }
- }
+// Insert multiple keys and verify that omitting the commit()
+// on the WriteUnitOfWork causes the changes to not become visible.
+TEST(SortedDataInterface, InsertWithoutCommit) {
+ const std::unique_ptr<HarnessHelper> harnessHelper(newHarnessHelper());
+ const std::unique_ptr<SortedDataInterface> sorted(harnessHelper->newSortedDataInterface(true));
+
+ {
+ const std::unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
+ ASSERT(sorted->isEmpty(opCtx.get()));
+ }
+ {
+ const std::unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
{
- const std::unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- ASSERT( sorted->isEmpty( opCtx.get() ) );
+ WriteUnitOfWork uow(opCtx.get());
+ ASSERT_OK(sorted->insert(opCtx.get(), key1, loc1, false));
+ // no commit
}
+ }
- {
- const std::unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- {
- WriteUnitOfWork uow( opCtx.get() );
- ASSERT_OK( sorted->insert( opCtx.get(), key2, loc1, false ) );
- ASSERT_OK( sorted->insert( opCtx.get(), key3, loc2, false ) );
- // no commit
- }
- }
+ {
+ const std::unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
+ ASSERT(sorted->isEmpty(opCtx.get()));
+ }
+ {
+ const std::unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
{
- const std::unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- ASSERT( sorted->isEmpty( opCtx.get() ) );
+ WriteUnitOfWork uow(opCtx.get());
+ ASSERT_OK(sorted->insert(opCtx.get(), key2, loc1, false));
+ ASSERT_OK(sorted->insert(opCtx.get(), key3, loc2, false));
+ // no commit
}
}
- // Insert multiple keys, then unindex those same keys and verify that
- // omitting the commit() on the WriteUnitOfWork causes the changes to
- // not become visible.
- TEST( SortedDataInterface, UnindexWithoutCommit ) {
- const std::unique_ptr<HarnessHelper> harnessHelper( newHarnessHelper() );
- const std::unique_ptr<SortedDataInterface> sorted( harnessHelper->newSortedDataInterface( false ) );
-
- {
- const std::unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- ASSERT( sorted->isEmpty( opCtx.get() ) );
- }
+ {
+ const std::unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
+ ASSERT(sorted->isEmpty(opCtx.get()));
+ }
+}
+
+// Insert multiple keys, then unindex those same keys and verify that
+// omitting the commit() on the WriteUnitOfWork causes the changes to
+// not become visible.
+TEST(SortedDataInterface, UnindexWithoutCommit) {
+ const std::unique_ptr<HarnessHelper> harnessHelper(newHarnessHelper());
+ const std::unique_ptr<SortedDataInterface> sorted(harnessHelper->newSortedDataInterface(false));
+
+ {
+ const std::unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
+ ASSERT(sorted->isEmpty(opCtx.get()));
+ }
+ {
+ const std::unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
{
- const std::unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- {
- WriteUnitOfWork uow( opCtx.get() );
- ASSERT_OK( sorted->insert( opCtx.get(), key1, loc1, true ) );
- ASSERT_OK( sorted->insert( opCtx.get(), key2, loc2, true ) );
- uow.commit();
- }
+ WriteUnitOfWork uow(opCtx.get());
+ ASSERT_OK(sorted->insert(opCtx.get(), key1, loc1, true));
+ ASSERT_OK(sorted->insert(opCtx.get(), key2, loc2, true));
+ uow.commit();
}
+ }
- {
- const std::unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- ASSERT_EQUALS( 2, sorted->numEntries( opCtx.get() ) );
- }
+ {
+ const std::unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
+ ASSERT_EQUALS(2, sorted->numEntries(opCtx.get()));
+ }
+ {
+ const std::unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
{
- const std::unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- {
- WriteUnitOfWork uow( opCtx.get() );
- sorted->unindex( opCtx.get(), key2, loc2, true );
- ASSERT_EQUALS( 1, sorted->numEntries( opCtx.get() ) );
- // no commit
- }
+ WriteUnitOfWork uow(opCtx.get());
+ sorted->unindex(opCtx.get(), key2, loc2, true);
+ ASSERT_EQUALS(1, sorted->numEntries(opCtx.get()));
+ // no commit
}
+ }
- {
- const std::unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- ASSERT_EQUALS( 2, sorted->numEntries( opCtx.get() ) );
- }
+ {
+ const std::unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
+ ASSERT_EQUALS(2, sorted->numEntries(opCtx.get()));
+ }
+ {
+ const std::unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
{
- const std::unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- {
- WriteUnitOfWork uow( opCtx.get() );
- ASSERT_OK( sorted->insert( opCtx.get(), key3, loc3, true ) );
- uow.commit();
- }
+ WriteUnitOfWork uow(opCtx.get());
+ ASSERT_OK(sorted->insert(opCtx.get(), key3, loc3, true));
+ uow.commit();
}
+ }
- {
- const std::unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- ASSERT_EQUALS( 3, sorted->numEntries( opCtx.get() ) );
- }
+ {
+ const std::unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
+ ASSERT_EQUALS(3, sorted->numEntries(opCtx.get()));
+ }
+ {
+ const std::unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
{
- const std::unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- {
- WriteUnitOfWork uow( opCtx.get() );
- sorted->unindex( opCtx.get(), key1, loc1, true );
- ASSERT_EQUALS( 2, sorted->numEntries( opCtx.get() ) );
- sorted->unindex( opCtx.get(), key3, loc3, true );
- ASSERT_EQUALS( 1, sorted->numEntries( opCtx.get() ) );
- // no commit
- }
+ WriteUnitOfWork uow(opCtx.get());
+ sorted->unindex(opCtx.get(), key1, loc1, true);
+ ASSERT_EQUALS(2, sorted->numEntries(opCtx.get()));
+ sorted->unindex(opCtx.get(), key3, loc3, true);
+ ASSERT_EQUALS(1, sorted->numEntries(opCtx.get()));
+ // no commit
}
+ }
- {
- const std::unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- ASSERT_EQUALS( 3, sorted->numEntries( opCtx.get() ) );
- }
+ {
+ const std::unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
+ ASSERT_EQUALS(3, sorted->numEntries(opCtx.get()));
}
+}
-} // namespace mongo
+} // namespace mongo
diff --git a/src/mongo/db/storage/sorted_data_interface_test_spaceused.cpp b/src/mongo/db/storage/sorted_data_interface_test_spaceused.cpp
index 49914ff161c..64171093fb2 100644
--- a/src/mongo/db/storage/sorted_data_interface_test_spaceused.cpp
+++ b/src/mongo/db/storage/sorted_data_interface_test_spaceused.cpp
@@ -37,67 +37,67 @@
namespace mongo {
- // Verify that an empty index takes up no space.
- TEST( SortedDataInterface, GetSpaceUsedBytesEmpty ) {
- const std::unique_ptr<HarnessHelper> harnessHelper( newHarnessHelper() );
- const std::unique_ptr<SortedDataInterface> sorted( harnessHelper->newSortedDataInterface( false ) );
+// Verify that an empty index takes up no space.
+TEST(SortedDataInterface, GetSpaceUsedBytesEmpty) {
+ const std::unique_ptr<HarnessHelper> harnessHelper(newHarnessHelper());
+ const std::unique_ptr<SortedDataInterface> sorted(harnessHelper->newSortedDataInterface(false));
- {
- const std::unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- ASSERT( sorted->isEmpty( opCtx.get() ) );
- }
-
- // SERVER-15416 mmapv1 test harness does not use SimpleRecordStoreV1 as its record store
- // and HeapRecordStoreBtree::dataSize does not have an actual implementation
- // {
- // const std::unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- // ASSERT( sorted->getSpaceUsedBytes( opCtx.get() ) == 0 );
- // }
+ {
+ const std::unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
+ ASSERT(sorted->isEmpty(opCtx.get()));
}
- // Verify that a nonempty index takes up some space.
- TEST( SortedDataInterface, GetSpaceUsedBytesNonEmpty ) {
- const std::unique_ptr<HarnessHelper> harnessHelper( newHarnessHelper() );
- const std::unique_ptr<SortedDataInterface> sorted( harnessHelper->newSortedDataInterface( false ) );
+ // SERVER-15416 mmapv1 test harness does not use SimpleRecordStoreV1 as its record store
+ // and HeapRecordStoreBtree::dataSize does not have an actual implementation
+ // {
+ // const std::unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
+ // ASSERT( sorted->getSpaceUsedBytes( opCtx.get() ) == 0 );
+ // }
+}
- {
- const std::unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- ASSERT( sorted->isEmpty( opCtx.get() ) );
- }
+// Verify that a nonempty index takes up some space.
+TEST(SortedDataInterface, GetSpaceUsedBytesNonEmpty) {
+ const std::unique_ptr<HarnessHelper> harnessHelper(newHarnessHelper());
+ const std::unique_ptr<SortedDataInterface> sorted(harnessHelper->newSortedDataInterface(false));
- int nToInsert = 10;
- for ( int i = 0; i < nToInsert; i++ ) {
- const std::unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- {
- WriteUnitOfWork uow( opCtx.get() );
- BSONObj key = BSON( "" << i );
- RecordId loc( 42, i * 2 );
- ASSERT_OK( sorted->insert( opCtx.get(), key, loc, true ) );
- uow.commit();
- }
- }
+ {
+ const std::unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
+ ASSERT(sorted->isEmpty(opCtx.get()));
+ }
+ int nToInsert = 10;
+ for (int i = 0; i < nToInsert; i++) {
+ const std::unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
{
- const std::unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- ASSERT_EQUALS( nToInsert, sorted->numEntries( opCtx.get() ) );
+ WriteUnitOfWork uow(opCtx.get());
+ BSONObj key = BSON("" << i);
+ RecordId loc(42, i * 2);
+ ASSERT_OK(sorted->insert(opCtx.get(), key, loc, true));
+ uow.commit();
}
+ }
- // SERVER-15416 mmapv1 test harness does not use SimpleRecordStoreV1 as its record store
- // and HeapRecordStoreBtree::dataSize does not have an actual implementation
- // long long spaceUsedBytes;
- // {
- // const std::unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- // spaceUsedBytes = sorted->getSpaceUsedBytes( opCtx.get() );
- // ASSERT( spaceUsedBytes > 0 );
- // }
-
- // {
- // // getSpaceUsedBytes() returns the same value when called multiple times
- // // and there were not interleaved write operations.
- // const std::unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- // ASSERT_EQUALS( spaceUsedBytes, sorted->getSpaceUsedBytes( opCtx.get() ) );
- // ASSERT_EQUALS( spaceUsedBytes, sorted->getSpaceUsedBytes( opCtx.get() ) );
- // }
+ {
+ const std::unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
+ ASSERT_EQUALS(nToInsert, sorted->numEntries(opCtx.get()));
}
-} // namespace mongo
+ // SERVER-15416 mmapv1 test harness does not use SimpleRecordStoreV1 as its record store
+ // and HeapRecordStoreBtree::dataSize does not have an actual implementation
+ // long long spaceUsedBytes;
+ // {
+ // const std::unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
+ // spaceUsedBytes = sorted->getSpaceUsedBytes( opCtx.get() );
+ // ASSERT( spaceUsedBytes > 0 );
+ // }
+
+ // {
+ // // getSpaceUsedBytes() returns the same value when called multiple times
+ // // and there were not interleaved write operations.
+ // const std::unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
+ // ASSERT_EQUALS( spaceUsedBytes, sorted->getSpaceUsedBytes( opCtx.get() ) );
+ // ASSERT_EQUALS( spaceUsedBytes, sorted->getSpaceUsedBytes( opCtx.get() ) );
+ // }
+}
+
+} // namespace mongo
diff --git a/src/mongo/db/storage/sorted_data_interface_test_touch.cpp b/src/mongo/db/storage/sorted_data_interface_test_touch.cpp
index 83a3c314ef9..07ec83fb9c7 100644
--- a/src/mongo/db/storage/sorted_data_interface_test_touch.cpp
+++ b/src/mongo/db/storage/sorted_data_interface_test_touch.cpp
@@ -37,51 +37,51 @@
namespace mongo {
- // Verify that calling touch() on an empty index returns an OK status.
- TEST( SortedDataInterface, TouchEmpty ) {
- const std::unique_ptr<HarnessHelper> harnessHelper( newHarnessHelper() );
- const std::unique_ptr<SortedDataInterface> sorted( harnessHelper->newSortedDataInterface( false ) );
+// Verify that calling touch() on an empty index returns an OK status.
+TEST(SortedDataInterface, TouchEmpty) {
+ const std::unique_ptr<HarnessHelper> harnessHelper(newHarnessHelper());
+ const std::unique_ptr<SortedDataInterface> sorted(harnessHelper->newSortedDataInterface(false));
- {
- const std::unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- Status status = sorted->touch( opCtx.get() );
- ASSERT( status.isOK() || status.code() == ErrorCodes::CommandNotSupported );
- }
+ {
+ const std::unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
+ Status status = sorted->touch(opCtx.get());
+ ASSERT(status.isOK() || status.code() == ErrorCodes::CommandNotSupported);
}
+}
- // Verify that calling touch() on a nonempty index returns an OK status.
- TEST( SortedDataInterface, TouchNonEmpty ) {
- const std::unique_ptr<HarnessHelper> harnessHelper( newHarnessHelper() );
- const std::unique_ptr<SortedDataInterface> sorted( harnessHelper->newSortedDataInterface( true ) );
+// Verify that calling touch() on a nonempty index returns an OK status.
+TEST(SortedDataInterface, TouchNonEmpty) {
+ const std::unique_ptr<HarnessHelper> harnessHelper(newHarnessHelper());
+ const std::unique_ptr<SortedDataInterface> sorted(harnessHelper->newSortedDataInterface(true));
- {
- const std::unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- ASSERT( sorted->isEmpty( opCtx.get() ) );
- }
+ {
+ const std::unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
+ ASSERT(sorted->isEmpty(opCtx.get()));
+ }
+ {
+ const std::unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
{
- const std::unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- {
- WriteUnitOfWork uow( opCtx.get() );
- ASSERT_OK( sorted->insert( opCtx.get(), key1, loc1, false ) );
- ASSERT_OK( sorted->insert( opCtx.get(), key2, loc2, false ) );
- ASSERT_OK( sorted->insert( opCtx.get(), key3, loc3, false ) );
- uow.commit();
- }
+ WriteUnitOfWork uow(opCtx.get());
+ ASSERT_OK(sorted->insert(opCtx.get(), key1, loc1, false));
+ ASSERT_OK(sorted->insert(opCtx.get(), key2, loc2, false));
+ ASSERT_OK(sorted->insert(opCtx.get(), key3, loc3, false));
+ uow.commit();
}
+ }
- {
- const std::unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- ASSERT_EQUALS( 3, sorted->numEntries( opCtx.get() ) );
- }
+ {
+ const std::unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
+ ASSERT_EQUALS(3, sorted->numEntries(opCtx.get()));
+ }
- {
- const std::unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- // XXX does not verify the index was brought into memory
- // (even if supported by storage engine)
- Status status = sorted->touch( opCtx.get() );
- ASSERT( status.isOK() || status.code() == ErrorCodes::CommandNotSupported );
- }
+ {
+ const std::unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
+ // XXX does not verify the index was brought into memory
+ // (even if supported by storage engine)
+ Status status = sorted->touch(opCtx.get());
+ ASSERT(status.isOK() || status.code() == ErrorCodes::CommandNotSupported);
}
+}
-} // namespace mongo
+} // namespace mongo
diff --git a/src/mongo/db/storage/sorted_data_interface_test_unindex.cpp b/src/mongo/db/storage/sorted_data_interface_test_unindex.cpp
index b15d1e883e2..d1101a90d12 100644
--- a/src/mongo/db/storage/sorted_data_interface_test_unindex.cpp
+++ b/src/mongo/db/storage/sorted_data_interface_test_unindex.cpp
@@ -37,249 +37,249 @@
namespace mongo {
- // Insert a key and verify that it can be unindexed.
- TEST( SortedDataInterface, Unindex ) {
- const std::unique_ptr<HarnessHelper> harnessHelper( newHarnessHelper() );
- const std::unique_ptr<SortedDataInterface> sorted( harnessHelper->newSortedDataInterface( false ) );
-
- {
- const std::unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- ASSERT( sorted->isEmpty( opCtx.get() ) );
- }
+// Insert a key and verify that it can be unindexed.
+TEST(SortedDataInterface, Unindex) {
+ const std::unique_ptr<HarnessHelper> harnessHelper(newHarnessHelper());
+ const std::unique_ptr<SortedDataInterface> sorted(harnessHelper->newSortedDataInterface(false));
+
+ {
+ const std::unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
+ ASSERT(sorted->isEmpty(opCtx.get()));
+ }
+ {
+ const std::unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
{
- const std::unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- {
- WriteUnitOfWork uow( opCtx.get() );
- ASSERT_OK( sorted->insert( opCtx.get(), key1, loc1, true ) );
- uow.commit();
- }
+ WriteUnitOfWork uow(opCtx.get());
+ ASSERT_OK(sorted->insert(opCtx.get(), key1, loc1, true));
+ uow.commit();
}
+ }
- {
- const std::unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- ASSERT_EQUALS( 1, sorted->numEntries( opCtx.get() ) );
- }
+ {
+ const std::unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
+ ASSERT_EQUALS(1, sorted->numEntries(opCtx.get()));
+ }
+ {
+ const std::unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
{
- const std::unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- {
- WriteUnitOfWork uow( opCtx.get() );
- sorted->unindex( opCtx.get(), key1, loc1, true );
- ASSERT( sorted->isEmpty( opCtx.get() ) );
- uow.commit();
- }
+ WriteUnitOfWork uow(opCtx.get());
+ sorted->unindex(opCtx.get(), key1, loc1, true);
+ ASSERT(sorted->isEmpty(opCtx.get()));
+ uow.commit();
}
+ }
- {
- const std::unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- ASSERT( sorted->isEmpty( opCtx.get() ) );
- }
+ {
+ const std::unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
+ ASSERT(sorted->isEmpty(opCtx.get()));
}
+}
- // Insert a compound key and verify that it can be unindexed.
- TEST( SortedDataInterface, UnindexCompoundKey ) {
- const std::unique_ptr<HarnessHelper> harnessHelper( newHarnessHelper() );
- const std::unique_ptr<SortedDataInterface> sorted( harnessHelper->newSortedDataInterface( false ) );
+// Insert a compound key and verify that it can be unindexed.
+TEST(SortedDataInterface, UnindexCompoundKey) {
+ const std::unique_ptr<HarnessHelper> harnessHelper(newHarnessHelper());
+ const std::unique_ptr<SortedDataInterface> sorted(harnessHelper->newSortedDataInterface(false));
- {
- const std::unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- ASSERT( sorted->isEmpty( opCtx.get() ) );
- }
+ {
+ const std::unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
+ ASSERT(sorted->isEmpty(opCtx.get()));
+ }
+ {
+ const std::unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
{
- const std::unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- {
- WriteUnitOfWork uow( opCtx.get() );
- ASSERT_OK( sorted->insert( opCtx.get(), compoundKey1a, loc1, true ) );
- uow.commit();
- }
+ WriteUnitOfWork uow(opCtx.get());
+ ASSERT_OK(sorted->insert(opCtx.get(), compoundKey1a, loc1, true));
+ uow.commit();
}
+ }
- {
- const std::unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- ASSERT_EQUALS( 1, sorted->numEntries( opCtx.get() ) );
- }
+ {
+ const std::unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
+ ASSERT_EQUALS(1, sorted->numEntries(opCtx.get()));
+ }
+ {
+ const std::unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
{
- const std::unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- {
- WriteUnitOfWork uow( opCtx.get() );
- sorted->unindex( opCtx.get(), compoundKey1a, loc1, true );
- ASSERT( sorted->isEmpty( opCtx.get() ) );
- uow.commit();
- }
+ WriteUnitOfWork uow(opCtx.get());
+ sorted->unindex(opCtx.get(), compoundKey1a, loc1, true);
+ ASSERT(sorted->isEmpty(opCtx.get()));
+ uow.commit();
}
+ }
- {
- const std::unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- ASSERT( sorted->isEmpty( opCtx.get() ) );
- }
+ {
+ const std::unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
+ ASSERT(sorted->isEmpty(opCtx.get()));
}
+}
- // Insert multiple, distinct keys and verify that they can be unindexed.
- TEST( SortedDataInterface, UnindexMultipleDistinct ) {
- const std::unique_ptr<HarnessHelper> harnessHelper( newHarnessHelper() );
- const std::unique_ptr<SortedDataInterface> sorted( harnessHelper->newSortedDataInterface( false ) );
+// Insert multiple, distinct keys and verify that they can be unindexed.
+TEST(SortedDataInterface, UnindexMultipleDistinct) {
+ const std::unique_ptr<HarnessHelper> harnessHelper(newHarnessHelper());
+ const std::unique_ptr<SortedDataInterface> sorted(harnessHelper->newSortedDataInterface(false));
- {
- const std::unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- ASSERT( sorted->isEmpty( opCtx.get() ) );
- }
+ {
+ const std::unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
+ ASSERT(sorted->isEmpty(opCtx.get()));
+ }
+ {
+ const std::unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
{
- const std::unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- {
- WriteUnitOfWork uow( opCtx.get() );
- ASSERT_OK( sorted->insert( opCtx.get(), key1, loc1, true ) );
- ASSERT_OK( sorted->insert( opCtx.get(), key2, loc2, true ) );
- uow.commit();
- }
+ WriteUnitOfWork uow(opCtx.get());
+ ASSERT_OK(sorted->insert(opCtx.get(), key1, loc1, true));
+ ASSERT_OK(sorted->insert(opCtx.get(), key2, loc2, true));
+ uow.commit();
}
+ }
- {
- const std::unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- ASSERT_EQUALS( 2, sorted->numEntries( opCtx.get() ) );
- }
+ {
+ const std::unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
+ ASSERT_EQUALS(2, sorted->numEntries(opCtx.get()));
+ }
+ {
+ const std::unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
{
- const std::unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- {
- WriteUnitOfWork uow( opCtx.get() );
- sorted->unindex( opCtx.get(), key2, loc2, true );
- ASSERT_EQUALS( 1, sorted->numEntries( opCtx.get() ) );
- uow.commit();
- }
+ WriteUnitOfWork uow(opCtx.get());
+ sorted->unindex(opCtx.get(), key2, loc2, true);
+ ASSERT_EQUALS(1, sorted->numEntries(opCtx.get()));
+ uow.commit();
}
+ }
- {
- const std::unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- ASSERT_EQUALS( 1, sorted->numEntries( opCtx.get() ) );
- }
+ {
+ const std::unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
+ ASSERT_EQUALS(1, sorted->numEntries(opCtx.get()));
+ }
+ {
+ const std::unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
{
- const std::unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- {
- WriteUnitOfWork uow( opCtx.get() );
- ASSERT_OK( sorted->insert( opCtx.get(), key3, loc3, true ) );
- uow.commit();
- }
+ WriteUnitOfWork uow(opCtx.get());
+ ASSERT_OK(sorted->insert(opCtx.get(), key3, loc3, true));
+ uow.commit();
}
+ }
- {
- const std::unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- ASSERT_EQUALS( 2, sorted->numEntries( opCtx.get() ) );
- }
+ {
+ const std::unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
+ ASSERT_EQUALS(2, sorted->numEntries(opCtx.get()));
+ }
+ {
+ const std::unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
{
- const std::unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- {
- WriteUnitOfWork uow( opCtx.get() );
- sorted->unindex( opCtx.get(), key1, loc1, true );
- ASSERT_EQUALS( 1, sorted->numEntries( opCtx.get() ) );
- sorted->unindex( opCtx.get(), key3, loc3, true );
- ASSERT( sorted->isEmpty( opCtx.get() ) );
- uow.commit();
- }
+ WriteUnitOfWork uow(opCtx.get());
+ sorted->unindex(opCtx.get(), key1, loc1, true);
+ ASSERT_EQUALS(1, sorted->numEntries(opCtx.get()));
+ sorted->unindex(opCtx.get(), key3, loc3, true);
+ ASSERT(sorted->isEmpty(opCtx.get()));
+ uow.commit();
}
+ }
- {
- const std::unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- ASSERT( sorted->isEmpty( opCtx.get() ) );
- }
+ {
+ const std::unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
+ ASSERT(sorted->isEmpty(opCtx.get()));
}
+}
- // Insert the same key multiple times and verify that each occurrence can be unindexed.
- TEST( SortedDataInterface, UnindexMultipleSameKey ) {
- const std::unique_ptr<HarnessHelper> harnessHelper( newHarnessHelper() );
- const std::unique_ptr<SortedDataInterface> sorted( harnessHelper->newSortedDataInterface( false ) );
+// Insert the same key multiple times and verify that each occurrence can be unindexed.
+TEST(SortedDataInterface, UnindexMultipleSameKey) {
+ const std::unique_ptr<HarnessHelper> harnessHelper(newHarnessHelper());
+ const std::unique_ptr<SortedDataInterface> sorted(harnessHelper->newSortedDataInterface(false));
- {
- const std::unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- ASSERT( sorted->isEmpty( opCtx.get() ) );
- }
+ {
+ const std::unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
+ ASSERT(sorted->isEmpty(opCtx.get()));
+ }
+ {
+ const std::unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
{
- const std::unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- {
- WriteUnitOfWork uow( opCtx.get() );
- ASSERT_OK( sorted->insert( opCtx.get(), key1, loc1, true ) );
- ASSERT_OK( sorted->insert( opCtx.get(), key1, loc2, true /* allow duplicates */ ) );
- uow.commit();
- }
+ WriteUnitOfWork uow(opCtx.get());
+ ASSERT_OK(sorted->insert(opCtx.get(), key1, loc1, true));
+ ASSERT_OK(sorted->insert(opCtx.get(), key1, loc2, true /* allow duplicates */));
+ uow.commit();
}
+ }
- {
- const std::unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- ASSERT_EQUALS( 2, sorted->numEntries( opCtx.get() ) );
- }
+ {
+ const std::unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
+ ASSERT_EQUALS(2, sorted->numEntries(opCtx.get()));
+ }
+ {
+ const std::unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
{
- const std::unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- {
- WriteUnitOfWork uow( opCtx.get() );
- sorted->unindex( opCtx.get(), key1, loc2, true );
- ASSERT_EQUALS( 1, sorted->numEntries( opCtx.get() ) );
- uow.commit();
- }
+ WriteUnitOfWork uow(opCtx.get());
+ sorted->unindex(opCtx.get(), key1, loc2, true);
+ ASSERT_EQUALS(1, sorted->numEntries(opCtx.get()));
+ uow.commit();
}
+ }
- {
- const std::unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- ASSERT_EQUALS( 1, sorted->numEntries( opCtx.get() ) );
- }
+ {
+ const std::unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
+ ASSERT_EQUALS(1, sorted->numEntries(opCtx.get()));
+ }
+ {
+ const std::unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
{
- const std::unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- {
- WriteUnitOfWork uow( opCtx.get() );
- ASSERT_OK( sorted->insert( opCtx.get(), key1, loc3, true /* allow duplicates */ ) );
- uow.commit();
- }
+ WriteUnitOfWork uow(opCtx.get());
+ ASSERT_OK(sorted->insert(opCtx.get(), key1, loc3, true /* allow duplicates */));
+ uow.commit();
}
+ }
- {
- const std::unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- ASSERT_EQUALS( 2, sorted->numEntries( opCtx.get() ) );
- }
+ {
+ const std::unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
+ ASSERT_EQUALS(2, sorted->numEntries(opCtx.get()));
+ }
+ {
+ const std::unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
{
- const std::unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- {
- WriteUnitOfWork uow( opCtx.get() );
- sorted->unindex( opCtx.get(), key1, loc1, true);
- ASSERT_EQUALS( 1, sorted->numEntries( opCtx.get() ) );
- sorted->unindex( opCtx.get(), key1, loc3, true );
- ASSERT( sorted->isEmpty( opCtx.get() ) );
- uow.commit();
- }
+ WriteUnitOfWork uow(opCtx.get());
+ sorted->unindex(opCtx.get(), key1, loc1, true);
+ ASSERT_EQUALS(1, sorted->numEntries(opCtx.get()));
+ sorted->unindex(opCtx.get(), key1, loc3, true);
+ ASSERT(sorted->isEmpty(opCtx.get()));
+ uow.commit();
}
+ }
- {
- const std::unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- ASSERT( sorted->isEmpty( opCtx.get() ) );
- }
+ {
+ const std::unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
+ ASSERT(sorted->isEmpty(opCtx.get()));
}
+}
- // Call unindex() on a nonexistent key and verify the result is false.
- TEST( SortedDataInterface, UnindexEmpty ) {
- const std::unique_ptr<HarnessHelper> harnessHelper( newHarnessHelper() );
- const std::unique_ptr<SortedDataInterface> sorted( harnessHelper->newSortedDataInterface( false ) );
+// Call unindex() on a nonexistent key and verify the result is false.
+TEST(SortedDataInterface, UnindexEmpty) {
+ const std::unique_ptr<HarnessHelper> harnessHelper(newHarnessHelper());
+ const std::unique_ptr<SortedDataInterface> sorted(harnessHelper->newSortedDataInterface(false));
- {
- const std::unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- ASSERT( sorted->isEmpty( opCtx.get() ) );
- }
+ {
+ const std::unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
+ ASSERT(sorted->isEmpty(opCtx.get()));
+ }
+ {
+ const std::unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
{
- const std::unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- {
- WriteUnitOfWork uow( opCtx.get() );
- sorted->unindex( opCtx.get(), key1, loc1, true );
- ASSERT( sorted->isEmpty( opCtx.get() ) );
- uow.commit();
- }
+ WriteUnitOfWork uow(opCtx.get());
+ sorted->unindex(opCtx.get(), key1, loc1, true);
+ ASSERT(sorted->isEmpty(opCtx.get()));
+ uow.commit();
}
}
+}
-} // namespace mongo
+} // namespace mongo
diff --git a/src/mongo/db/storage/storage_engine.h b/src/mongo/db/storage/storage_engine.h
index 85008da0f5c..67614562b72 100644
--- a/src/mongo/db/storage/storage_engine.h
+++ b/src/mongo/db/storage/storage_engine.h
@@ -39,180 +39,183 @@
namespace mongo {
- class DatabaseCatalogEntry;
- class OperationContext;
- class RecoveryUnit;
- struct StorageGlobalParams;
- class StorageEngineLockFile;
- class StorageEngineMetadata;
+class DatabaseCatalogEntry;
+class OperationContext;
+class RecoveryUnit;
+struct StorageGlobalParams;
+class StorageEngineLockFile;
+class StorageEngineMetadata;
+/**
+ * The StorageEngine class is the top level interface for creating a new storage
+ * engine. All StorageEngine(s) must be registered by calling registerFactory in order
+ * to possibly be activated.
+ */
+class StorageEngine {
+public:
/**
- * The StorageEngine class is the top level interface for creating a new storage
- * engine. All StorageEngine(s) must be registered by calling registerFactory in order
- * to possibly be activated.
+ * The interface for creating new instances of storage engines.
+ *
+ * A storage engine provides an instance of this class (along with an associated
+ * name) to the global environment, which then sets the global storage engine
+ * according to the provided configuration parameter.
*/
- class StorageEngine {
+ class Factory {
public:
+ virtual ~Factory() {}
/**
- * The interface for creating new instances of storage engines.
- *
- * A storage engine provides an instance of this class (along with an associated
- * name) to the global environment, which then sets the global storage engine
- * according to the provided configuration parameter.
- */
- class Factory {
- public:
- virtual ~Factory() { }
-
- /**
- * Return a new instance of the StorageEngine. Caller owns the returned pointer.
- */
- virtual StorageEngine* create(const StorageGlobalParams& params,
- const StorageEngineLockFile& lockFile) const = 0;
-
- /**
- * Returns the name of the storage engine.
- *
- * Implementations that change the value of the returned string can cause
- * data file incompatibilities.
- */
- virtual StringData getCanonicalName() const = 0;
-
- /**
- * Validates creation options for a collection in the StorageEngine.
- * Returns an error if the creation options are not valid.
- *
- * Default implementation only accepts empty objects (no options).
- */
- virtual Status validateCollectionStorageOptions(const BSONObj& options) const {
- if (options.isEmpty()) return Status::OK();
- return Status(ErrorCodes::InvalidOptions,
- str::stream() << "storage engine " << getCanonicalName()
- << " does not support any collection storage options");
- }
-
- /**
- * Validates creation options for an index in the StorageEngine.
- * Returns an error if the creation options are not valid.
- *
- * Default implementation only accepts empty objects (no options).
- */
- virtual Status validateIndexStorageOptions(const BSONObj& options) const {
- if (options.isEmpty()) return Status::OK();
- return Status(ErrorCodes::InvalidOptions,
- str::stream() << "storage engine " << getCanonicalName()
- << " does not support any index storage options");
- }
-
- /**
- * Validates existing metadata in the data directory against startup options.
- * Returns an error if the storage engine initialization should not proceed
- * due to any inconsistencies between the current startup options and the creation
- * options stored in the metadata.
- */
- virtual Status validateMetadata(const StorageEngineMetadata& metadata,
- const StorageGlobalParams& params) const = 0;
-
- /**
- * Returns a new document suitable for storing in the data directory metadata.
- * This document will be used by validateMetadata() to check startup options
- * on restart.
- */
- virtual BSONObj createMetadataOptions(const StorageGlobalParams& params) const = 0;
- };
-
- /**
- * Called after the globalStorageEngine pointer has been set up, before any other methods
- * are called. Any initialization work that requires the ability to create OperationContexts
- * should be done here rather than in the constructor.
+ * Return a new instance of the StorageEngine. Caller owns the returned pointer.
*/
- virtual void finishInit() {}
+ virtual StorageEngine* create(const StorageGlobalParams& params,
+ const StorageEngineLockFile& lockFile) const = 0;
/**
- * Returns a new interface to the storage engine's recovery unit. The recovery
- * unit is the durability interface. For details, see recovery_unit.h
+ * Returns the name of the storage engine.
*
- * Caller owns the returned pointer.
+ * Implementations that change the value of the returned string can cause
+ * data file incompatibilities.
*/
- virtual RecoveryUnit* newRecoveryUnit() = 0;
+ virtual StringData getCanonicalName() const = 0;
/**
- * List the databases stored in this storage engine.
+ * Validates creation options for a collection in the StorageEngine.
+ * Returns an error if the creation options are not valid.
*
- * XXX: why doesn't this take OpCtx?
+ * Default implementation only accepts empty objects (no options).
*/
- virtual void listDatabases( std::vector<std::string>* out ) const = 0;
+ virtual Status validateCollectionStorageOptions(const BSONObj& options) const {
+ if (options.isEmpty())
+ return Status::OK();
+ return Status(ErrorCodes::InvalidOptions,
+ str::stream() << "storage engine " << getCanonicalName()
+ << " does not support any collection storage options");
+ }
/**
- * Return the DatabaseCatalogEntry that describes the database indicated by 'db'.
+ * Validates creation options for an index in the StorageEngine.
+ * Returns an error if the creation options are not valid.
*
- * StorageEngine owns returned pointer.
- * It should not be deleted by any caller.
+ * Default implementation only accepts empty objects (no options).
*/
- virtual DatabaseCatalogEntry* getDatabaseCatalogEntry( OperationContext* opCtx,
- StringData db ) = 0;
+ virtual Status validateIndexStorageOptions(const BSONObj& options) const {
+ if (options.isEmpty())
+ return Status::OK();
+ return Status(ErrorCodes::InvalidOptions,
+ str::stream() << "storage engine " << getCanonicalName()
+ << " does not support any index storage options");
+ }
/**
- * Returns whether the storage engine supports its own locking locking below the collection
- * level. If the engine returns true, MongoDB will acquire intent locks down to the
- * collection level and will assume that the engine will ensure consistency at the level of
- * documents. If false, MongoDB will lock the entire collection in Shared/Exclusive mode
- * for read/write operations respectively.
+ * Validates existing metadata in the data directory against startup options.
+ * Returns an error if the storage engine initialization should not proceed
+ * due to any inconsistencies between the current startup options and the creation
+ * options stored in the metadata.
*/
- virtual bool supportsDocLocking() const = 0;
+ virtual Status validateMetadata(const StorageEngineMetadata& metadata,
+ const StorageGlobalParams& params) const = 0;
/**
- * Returns whether the engine supports a journalling concept or not.
+ * Returns a new document suitable for storing in the data directory metadata.
+ * This document will be used by validateMetadata() to check startup options
+ * on restart.
*/
- virtual bool isDurable() const = 0;
+ virtual BSONObj createMetadataOptions(const StorageGlobalParams& params) const = 0;
+ };
- /**
- * Only MMAPv1 should override this and return true to trigger MMAPv1-specific behavior.
- */
- virtual bool isMmapV1() const { return false; }
+ /**
+ * Called after the globalStorageEngine pointer has been set up, before any other methods
+ * are called. Any initialization work that requires the ability to create OperationContexts
+ * should be done here rather than in the constructor.
+ */
+ virtual void finishInit() {}
- /**
- * Closes all file handles associated with a database.
- */
- virtual Status closeDatabase( OperationContext* txn, StringData db ) = 0;
+ /**
+ * Returns a new interface to the storage engine's recovery unit. The recovery
+ * unit is the durability interface. For details, see recovery_unit.h
+ *
+ * Caller owns the returned pointer.
+ */
+ virtual RecoveryUnit* newRecoveryUnit() = 0;
- /**
- * Deletes all data and metadata for a database.
- */
- virtual Status dropDatabase( OperationContext* txn, StringData db ) = 0;
+ /**
+ * List the databases stored in this storage engine.
+ *
+ * XXX: why doesn't this take OpCtx?
+ */
+ virtual void listDatabases(std::vector<std::string>* out) const = 0;
- /**
- * @return number of files flushed
- */
- virtual int flushAllFiles( bool sync ) = 0;
+ /**
+ * Return the DatabaseCatalogEntry that describes the database indicated by 'db'.
+ *
+ * StorageEngine owns returned pointer.
+ * It should not be deleted by any caller.
+ */
+ virtual DatabaseCatalogEntry* getDatabaseCatalogEntry(OperationContext* opCtx,
+ StringData db) = 0;
- /**
- * Recover as much data as possible from a potentially corrupt RecordStore.
- * This only recovers the record data, not indexes or anything else.
- *
- * Generally, this method should not be called directly except by the repairDatabase()
- * free function.
- *
- * NOTE: MMAPv1 does not support this method and has its own repairDatabase() method.
- */
- virtual Status repairRecordStore(OperationContext* txn, const std::string& ns) = 0;
+ /**
+ * Returns whether the storage engine supports its own locking locking below the collection
+ * level. If the engine returns true, MongoDB will acquire intent locks down to the
+ * collection level and will assume that the engine will ensure consistency at the level of
+ * documents. If false, MongoDB will lock the entire collection in Shared/Exclusive mode
+ * for read/write operations respectively.
+ */
+ virtual bool supportsDocLocking() const = 0;
- /**
- * This method will be called before there is a clean shutdown. Storage engines should
- * override this method if they have clean-up to do that is different from unclean shutdown.
- * MongoDB will not call into the storage subsystem after calling this function.
- *
- * On error, the storage engine should assert and crash.
- * There is intentionally no uncleanShutdown().
- */
- virtual void cleanShutdown() = 0;
+ /**
+ * Returns whether the engine supports a journalling concept or not.
+ */
+ virtual bool isDurable() const = 0;
- protected:
- /**
- * The destructor will never be called. See cleanShutdown instead.
- */
- virtual ~StorageEngine() {}
- };
+ /**
+ * Only MMAPv1 should override this and return true to trigger MMAPv1-specific behavior.
+ */
+ virtual bool isMmapV1() const {
+ return false;
+ }
+
+ /**
+ * Closes all file handles associated with a database.
+ */
+ virtual Status closeDatabase(OperationContext* txn, StringData db) = 0;
+
+ /**
+ * Deletes all data and metadata for a database.
+ */
+ virtual Status dropDatabase(OperationContext* txn, StringData db) = 0;
+
+ /**
+ * @return number of files flushed
+ */
+ virtual int flushAllFiles(bool sync) = 0;
+
+ /**
+ * Recover as much data as possible from a potentially corrupt RecordStore.
+ * This only recovers the record data, not indexes or anything else.
+ *
+ * Generally, this method should not be called directly except by the repairDatabase()
+ * free function.
+ *
+ * NOTE: MMAPv1 does not support this method and has its own repairDatabase() method.
+ */
+ virtual Status repairRecordStore(OperationContext* txn, const std::string& ns) = 0;
+
+ /**
+ * This method will be called before there is a clean shutdown. Storage engines should
+ * override this method if they have clean-up to do that is different from unclean shutdown.
+ * MongoDB will not call into the storage subsystem after calling this function.
+ *
+ * On error, the storage engine should assert and crash.
+ * There is intentionally no uncleanShutdown().
+ */
+ virtual void cleanShutdown() = 0;
+
+protected:
+ /**
+ * The destructor will never be called. See cleanShutdown instead.
+ */
+ virtual ~StorageEngine() {}
+};
} // namespace mongo
diff --git a/src/mongo/db/storage/storage_engine_lock_file.h b/src/mongo/db/storage/storage_engine_lock_file.h
index 2cb14ac77d3..24e2359396e 100644
--- a/src/mongo/db/storage/storage_engine_lock_file.h
+++ b/src/mongo/db/storage/storage_engine_lock_file.h
@@ -36,60 +36,60 @@
namespace mongo {
- class StorageEngineLockFile {
- MONGO_DISALLOW_COPYING(StorageEngineLockFile);
- public:
-
- /**
- * Checks existing lock file, if present, to see if it contains data from a previous
- * unclean shutdown. A clean shutdown should have produced a zero length lock file.
- * Uses open() to read existing lock file or create new file.
- * Uses boost::filesystem to check lock file so may throw boost::exception.
- */
- StorageEngineLockFile(const std::string& dbpath);
-
- virtual ~StorageEngineLockFile();
-
- /**
- * Returns the path to the lock file.
- */
- std::string getFilespec() const;
-
- /**
- * Returns true if lock file was not zeroed out due to previous unclean shutdown.
- * This state is evaluated at object initialization to allow storage engine
- * to make decisions on recovery based on this information after open() has been called.
- */
- bool createdByUncleanShutdown() const;
-
- /**
- * Opens and locks 'mongod.lock' in 'dbpath' directory.
- */
- Status open();
-
- /**
- * Closes lock file handles.
- */
- void close();
-
- /**
- * Writes current process ID to file.
- * Fails if lock file has not been opened.
- */
- Status writePid();
-
- /**
- * Truncates file contents and releases file locks.
- */
- void clearPidAndUnlock();
-
- private:
- std::string _dbpath;
- std::string _filespec;
- bool _uncleanShutdown;
-
- class LockFileHandle;
- std::unique_ptr<LockFileHandle> _lockFileHandle;
- };
+class StorageEngineLockFile {
+ MONGO_DISALLOW_COPYING(StorageEngineLockFile);
+
+public:
+ /**
+ * Checks existing lock file, if present, to see if it contains data from a previous
+ * unclean shutdown. A clean shutdown should have produced a zero length lock file.
+ * Uses open() to read existing lock file or create new file.
+ * Uses boost::filesystem to check lock file so may throw boost::exception.
+ */
+ StorageEngineLockFile(const std::string& dbpath);
+
+ virtual ~StorageEngineLockFile();
+
+ /**
+ * Returns the path to the lock file.
+ */
+ std::string getFilespec() const;
+
+ /**
+ * Returns true if lock file was not zeroed out due to previous unclean shutdown.
+ * This state is evaluated at object initialization to allow storage engine
+ * to make decisions on recovery based on this information after open() has been called.
+ */
+ bool createdByUncleanShutdown() const;
+
+ /**
+ * Opens and locks 'mongod.lock' in 'dbpath' directory.
+ */
+ Status open();
+
+ /**
+ * Closes lock file handles.
+ */
+ void close();
+
+ /**
+ * Writes current process ID to file.
+ * Fails if lock file has not been opened.
+ */
+ Status writePid();
+
+ /**
+ * Truncates file contents and releases file locks.
+ */
+ void clearPidAndUnlock();
+
+private:
+ std::string _dbpath;
+ std::string _filespec;
+ bool _uncleanShutdown;
+
+ class LockFileHandle;
+ std::unique_ptr<LockFileHandle> _lockFileHandle;
+};
} // namespace mongo
diff --git a/src/mongo/db/storage/storage_engine_lock_file_posix.cpp b/src/mongo/db/storage/storage_engine_lock_file_posix.cpp
index f7f12871053..66f477232d2 100644
--- a/src/mongo/db/storage/storage_engine_lock_file_posix.cpp
+++ b/src/mongo/db/storage/storage_engine_lock_file_posix.cpp
@@ -50,150 +50,153 @@ namespace mongo {
namespace {
- const std::string kLockFileBasename = "mongod.lock";
+const std::string kLockFileBasename = "mongod.lock";
} // namespace
- class StorageEngineLockFile::LockFileHandle {
- public:
- static const int kInvalidFd = -1;
- LockFileHandle() : _fd(kInvalidFd) { }
- bool isValid() const { return _fd != kInvalidFd; }
- void clear() { _fd = kInvalidFd; }
- int _fd;
- };
-
- StorageEngineLockFile::StorageEngineLockFile(const std::string& dbpath)
- : _dbpath(dbpath),
- _filespec((boost::filesystem::path(_dbpath) / kLockFileBasename).string()),
- _uncleanShutdown(boost::filesystem::exists(_filespec) &&
- boost::filesystem::file_size(_filespec) > 0),
- _lockFileHandle(new LockFileHandle()) {
+class StorageEngineLockFile::LockFileHandle {
+public:
+ static const int kInvalidFd = -1;
+ LockFileHandle() : _fd(kInvalidFd) {}
+ bool isValid() const {
+ return _fd != kInvalidFd;
}
-
- StorageEngineLockFile::~StorageEngineLockFile() { }
-
- std::string StorageEngineLockFile::getFilespec() const {
- return _filespec;
- }
-
- bool StorageEngineLockFile::createdByUncleanShutdown() const {
- return _uncleanShutdown;
+ void clear() {
+ _fd = kInvalidFd;
}
-
- Status StorageEngineLockFile::open() {
- try {
- if (!boost::filesystem::exists(_dbpath)) {
- return Status(ErrorCodes::NonExistentPath, str::stream()
- << "Data directory " << _dbpath << " not found.");
- }
- }
- catch (const std::exception& ex) {
- return Status(ErrorCodes::UnknownError, str::stream()
- << "Unable to check existence of data directory "
- << _dbpath << ": " << ex.what());
+ int _fd;
+};
+
+StorageEngineLockFile::StorageEngineLockFile(const std::string& dbpath)
+ : _dbpath(dbpath),
+ _filespec((boost::filesystem::path(_dbpath) / kLockFileBasename).string()),
+ _uncleanShutdown(boost::filesystem::exists(_filespec) &&
+ boost::filesystem::file_size(_filespec) > 0),
+ _lockFileHandle(new LockFileHandle()) {}
+
+StorageEngineLockFile::~StorageEngineLockFile() {}
+
+std::string StorageEngineLockFile::getFilespec() const {
+ return _filespec;
+}
+
+bool StorageEngineLockFile::createdByUncleanShutdown() const {
+ return _uncleanShutdown;
+}
+
+Status StorageEngineLockFile::open() {
+ try {
+ if (!boost::filesystem::exists(_dbpath)) {
+ return Status(ErrorCodes::NonExistentPath,
+ str::stream() << "Data directory " << _dbpath << " not found.");
}
+ } catch (const std::exception& ex) {
+ return Status(ErrorCodes::UnknownError,
+ str::stream() << "Unable to check existence of data directory " << _dbpath
+ << ": " << ex.what());
+ }
- int lockFile = ::open(_filespec.c_str(), O_RDWR | O_CREAT, S_IRWXU | S_IRWXG | S_IRWXO);
- if (lockFile < 0) {
- int errorcode = errno;
- return Status(ErrorCodes::DBPathInUse, str::stream()
- << "Unable to create/open lock file: "
- << _filespec << ' ' << errnoWithDescription(errorcode)
- << " Is a mongod instance already running?");
- }
+ int lockFile = ::open(_filespec.c_str(), O_RDWR | O_CREAT, S_IRWXU | S_IRWXG | S_IRWXO);
+ if (lockFile < 0) {
+ int errorcode = errno;
+ return Status(ErrorCodes::DBPathInUse,
+ str::stream() << "Unable to create/open lock file: " << _filespec << ' '
+ << errnoWithDescription(errorcode)
+ << " Is a mongod instance already running?");
+ }
#if !defined(__sun)
- int ret = ::flock(lockFile, LOCK_EX | LOCK_NB);
+ int ret = ::flock(lockFile, LOCK_EX | LOCK_NB);
#else
- struct flock fileLockInfo = {0};
- fileLockInfo.l_type = F_WRLCK;
- fileLockInfo.l_whence = SEEK_SET;
- int ret = ::fcntl(lockFile, F_SETLK, &fileLockInfo);
+ struct flock fileLockInfo = {0};
+ fileLockInfo.l_type = F_WRLCK;
+ fileLockInfo.l_whence = SEEK_SET;
+ int ret = ::fcntl(lockFile, F_SETLK, &fileLockInfo);
#endif // !defined(__sun)
- if (ret != 0) {
- int errorcode = errno;
- ::close(lockFile);
- return Status(ErrorCodes::DBPathInUse, str::stream()
- << "Unable to lock file: "
- << _filespec << ' ' << errnoWithDescription(errorcode)
- << ". Is a mongod instance already running?");
- }
- _lockFileHandle->_fd = lockFile;
- return Status::OK();
+ if (ret != 0) {
+ int errorcode = errno;
+ ::close(lockFile);
+ return Status(ErrorCodes::DBPathInUse,
+ str::stream() << "Unable to lock file: " << _filespec << ' '
+ << errnoWithDescription(errorcode)
+ << ". Is a mongod instance already running?");
}
+ _lockFileHandle->_fd = lockFile;
+ return Status::OK();
+}
- void StorageEngineLockFile::close() {
- if (!_lockFileHandle->isValid()) {
- return;
- }
- ::close(_lockFileHandle->_fd);
- _lockFileHandle->clear();
+void StorageEngineLockFile::close() {
+ if (!_lockFileHandle->isValid()) {
+ return;
+ }
+ ::close(_lockFileHandle->_fd);
+ _lockFileHandle->clear();
+}
+
+Status StorageEngineLockFile::writePid() {
+ if (!_lockFileHandle->isValid()) {
+ return Status(ErrorCodes::FileNotOpen,
+ str::stream() << "Unable to write process ID to " << _filespec
+ << " because file has not been opened.");
}
- Status StorageEngineLockFile::writePid() {
- if (!_lockFileHandle->isValid()) {
- return Status(ErrorCodes::FileNotOpen, str::stream()
- << "Unable to write process ID to " << _filespec
- << " because file has not been opened.");
- }
-
- if (::ftruncate(_lockFileHandle->_fd, 0)) {
- int errorcode = errno;
- return Status(ErrorCodes::FileStreamFailed, str::stream()
- << "Unable to write process id to file (ftruncate failed): "
- << _filespec << ' ' << errnoWithDescription(errorcode));
- }
+ if (::ftruncate(_lockFileHandle->_fd, 0)) {
+ int errorcode = errno;
+ return Status(ErrorCodes::FileStreamFailed,
+ str::stream() << "Unable to write process id to file (ftruncate failed): "
+ << _filespec << ' ' << errnoWithDescription(errorcode));
+ }
- ProcessId pid = ProcessId::getCurrent();
- std::stringstream ss;
- ss << pid << std::endl;
- std::string pidStr = ss.str();
- int bytesWritten = ::write(_lockFileHandle->_fd, pidStr.c_str(), pidStr.size());
- if (bytesWritten < 0) {
- int errorcode = errno;
- return Status(ErrorCodes::FileStreamFailed, str::stream()
- << "Unable to write process id " << pid.toString() << " to file: "
- << _filespec << ' ' << errnoWithDescription(errorcode));
+ ProcessId pid = ProcessId::getCurrent();
+ std::stringstream ss;
+ ss << pid << std::endl;
+ std::string pidStr = ss.str();
+ int bytesWritten = ::write(_lockFileHandle->_fd, pidStr.c_str(), pidStr.size());
+ if (bytesWritten < 0) {
+ int errorcode = errno;
+ return Status(ErrorCodes::FileStreamFailed,
+ str::stream() << "Unable to write process id " << pid.toString()
+ << " to file: " << _filespec << ' '
+ << errnoWithDescription(errorcode));
+
+ } else if (bytesWritten == 0) {
+ return Status(ErrorCodes::FileStreamFailed,
+ str::stream() << "Unable to write process id " << pid.toString()
+ << " to file: " << _filespec << " no data written.");
+ }
- }
- else if (bytesWritten == 0) {
- return Status(ErrorCodes::FileStreamFailed, str::stream()
- << "Unable to write process id " << pid.toString() << " to file: "
- << _filespec << " no data written.");
- }
+ if (::fsync(_lockFileHandle->_fd)) {
+ int errorcode = errno;
+ return Status(ErrorCodes::FileStreamFailed,
+ str::stream() << "Unable to write process id " << pid.toString()
+ << " to file (fsync failed): " << _filespec << ' '
+ << errnoWithDescription(errorcode));
+ }
- if (::fsync(_lockFileHandle->_fd)) {
- int errorcode = errno;
- return Status(ErrorCodes::FileStreamFailed, str::stream()
- << "Unable to write process id " << pid.toString() << " to file (fsync failed): "
- << _filespec << ' ' << errnoWithDescription(errorcode));
- }
+ flushMyDirectory(_filespec);
- flushMyDirectory(_filespec);
+ return Status::OK();
+}
- return Status::OK();
+void StorageEngineLockFile::clearPidAndUnlock() {
+ if (!_lockFileHandle->isValid()) {
+ return;
+ }
+ log() << "shutdown: removing fs lock...";
+ // This ought to be an unlink(), but Eliot says the last
+ // time that was attempted, there was a race condition
+ // with acquirePathLock().
+ if (::ftruncate(_lockFileHandle->_fd, 0)) {
+ int errorcode = errno;
+ log() << "couldn't remove fs lock " << errnoWithDescription(errorcode);
}
-
- void StorageEngineLockFile::clearPidAndUnlock() {
- if (!_lockFileHandle->isValid()) {
- return;
- }
- log() << "shutdown: removing fs lock...";
- // This ought to be an unlink(), but Eliot says the last
- // time that was attempted, there was a race condition
- // with acquirePathLock().
- if(::ftruncate(_lockFileHandle->_fd, 0)) {
- int errorcode = errno;
- log() << "couldn't remove fs lock " << errnoWithDescription(errorcode);
- }
#if !defined(__sun)
- ::flock(_lockFileHandle->_fd, LOCK_UN);
+ ::flock(_lockFileHandle->_fd, LOCK_UN);
#else
- struct flock fileLockInfo = {0};
- fileLockInfo.l_type = F_UNLCK;
- fileLockInfo.l_whence = SEEK_SET;
- ::fcntl(_lockFileHandle->_fd, F_SETLK, &fileLockInfo);
+ struct flock fileLockInfo = {0};
+ fileLockInfo.l_type = F_UNLCK;
+ fileLockInfo.l_whence = SEEK_SET;
+ ::fcntl(_lockFileHandle->_fd, F_SETLK, &fileLockInfo);
#endif // !defined(__sun)
- }
+}
} // namespace mongo
diff --git a/src/mongo/db/storage/storage_engine_lock_file_test.cpp b/src/mongo/db/storage/storage_engine_lock_file_test.cpp
index 7ed69bc1477..9312b7f7c3a 100644
--- a/src/mongo/db/storage/storage_engine_lock_file_test.cpp
+++ b/src/mongo/db/storage/storage_engine_lock_file_test.cpp
@@ -39,134 +39,134 @@
namespace {
- using std::string;
- using mongo::unittest::TempDir;
+using std::string;
+using mongo::unittest::TempDir;
- using namespace mongo;
+using namespace mongo;
- TEST(StorageEngineLockFileTest, UncleanShutdownNoExistingFile) {
- TempDir tempDir("StorageEngineLockFileTest_UncleanShutdownNoExistingFile");
- StorageEngineLockFile lockFile(tempDir.path());
- ASSERT_FALSE(lockFile.createdByUncleanShutdown());
- }
-
- TEST(StorageEngineLockFileTest, UncleanShutdownEmptyExistingFile) {
- TempDir tempDir("StorageEngineLockFileTest_UncleanShutdownEmptyExistingFile");
- {
- std::string filename(tempDir.path() + "/mongod.lock");
- std::ofstream(filename.c_str());
- }
- StorageEngineLockFile lockFile(tempDir.path());
- ASSERT_FALSE(lockFile.createdByUncleanShutdown());
- }
-
- TEST(StorageEngineLockFileTest, UncleanShutdownNonEmptyExistingFile) {
- TempDir tempDir("StorageEngineLockFileTest_UncleanShutdownNonEmptyExistingFile");
- {
- std::string filename(tempDir.path() + "/mongod.lock");
- std::ofstream ofs(filename.c_str());
- ofs << 12345 << std::endl;
- }
- StorageEngineLockFile lockFile(tempDir.path());
- ASSERT_TRUE(lockFile.createdByUncleanShutdown());
- }
+TEST(StorageEngineLockFileTest, UncleanShutdownNoExistingFile) {
+ TempDir tempDir("StorageEngineLockFileTest_UncleanShutdownNoExistingFile");
+ StorageEngineLockFile lockFile(tempDir.path());
+ ASSERT_FALSE(lockFile.createdByUncleanShutdown());
+}
- TEST(StorageEngineLockFileTest, OpenInvalidDirectory) {
- StorageEngineLockFile lockFile("no_such_directory");
- ASSERT_EQUALS((boost::filesystem::path("no_such_directory") / "mongod.lock").string(),
- lockFile.getFilespec());
- Status status = lockFile.open();
- ASSERT_NOT_OK(status);
- ASSERT_EQUALS(ErrorCodes::NonExistentPath, status.code());
- }
-
- // Cause ::open() to fail by providing a regular file instead of a directory for 'dbpath'.
- TEST(StorageEngineLockFileTest, OpenInvalidFilename) {
- TempDir tempDir("StorageEngineLockFileTest_OpenInvalidFilename");
- std::string filename(tempDir.path() + "/some_file");
+TEST(StorageEngineLockFileTest, UncleanShutdownEmptyExistingFile) {
+ TempDir tempDir("StorageEngineLockFileTest_UncleanShutdownEmptyExistingFile");
+ {
+ std::string filename(tempDir.path() + "/mongod.lock");
std::ofstream(filename.c_str());
- StorageEngineLockFile lockFile(filename);
- Status status = lockFile.open();
- ASSERT_NOT_OK(status);
- ASSERT_EQUALS(ErrorCodes::DBPathInUse, status.code());
}
-
- TEST(StorageEngineLockFileTest, OpenNoExistingLockFile) {
- TempDir tempDir("StorageEngineLockFileTest_OpenNoExistingLockFile");
- StorageEngineLockFile lockFile(tempDir.path());
- ASSERT_OK(lockFile.open());
- lockFile.close();
+ StorageEngineLockFile lockFile(tempDir.path());
+ ASSERT_FALSE(lockFile.createdByUncleanShutdown());
+}
+
+TEST(StorageEngineLockFileTest, UncleanShutdownNonEmptyExistingFile) {
+ TempDir tempDir("StorageEngineLockFileTest_UncleanShutdownNonEmptyExistingFile");
+ {
+ std::string filename(tempDir.path() + "/mongod.lock");
+ std::ofstream ofs(filename.c_str());
+ ofs << 12345 << std::endl;
}
-
- TEST(StorageEngineLockFileTest, OpenEmptyLockFile) {
- TempDir tempDir("StorageEngineLockFileTest_OpenEmptyLockFile");
- StorageEngineLockFile lockFile(tempDir.path());
- std::string filename(lockFile.getFilespec());
- std::ofstream(filename.c_str());
- ASSERT_OK(lockFile.open());
- lockFile.close();
- }
-
- TEST(StorageEngineLockFileTest, WritePidFileNotOpened) {
- TempDir tempDir("StorageEngineLockFileTest_WritePidFileNotOpened");
- StorageEngineLockFile lockFile(tempDir.path());
- Status status = lockFile.writePid();
- ASSERT_NOT_OK(status);
- ASSERT_EQUALS(ErrorCodes::FileNotOpen, status.code());
- }
-
- TEST(StorageEngineLockFileTest, WritePidFileOpened) {
- TempDir tempDir("StorageEngineLockFileTest_WritePidFileOpened");
- StorageEngineLockFile lockFile(tempDir.path());
- ASSERT_OK(lockFile.open());
- ASSERT_OK(lockFile.writePid());
- lockFile.close();
-
- // Read PID from lock file.
- std::string filename(lockFile.getFilespec());
- std::ifstream ifs(filename.c_str());
- int64_t pidFromLockFile = 0;
- ASSERT_TRUE(ifs >> pidFromLockFile);
- ASSERT_EQUALS(ProcessId::getCurrent().asInt64(), pidFromLockFile);
- }
-
- // Existing data in lock file must be removed before writing process ID.
- TEST(StorageEngineLockFileTest, WritePidTruncateExistingFile) {
- TempDir tempDir("StorageEngineLockFileTest_WritePidTruncateExistingFile");
- StorageEngineLockFile lockFile(tempDir.path());
- {
- std::string filename(tempDir.path() + "/mongod.lock");
- std::ofstream ofs(filename.c_str());
- std::string currentPidStr = ProcessId::getCurrent().toString();
- ASSERT_FALSE(currentPidStr.empty());
- ofs << std::string(currentPidStr.size() * 100, 'X') << std::endl;
- }
- ASSERT_OK(lockFile.open());
- ASSERT_OK(lockFile.writePid());
- lockFile.close();
-
- // Read PID from lock file.
- std::string filename(lockFile.getFilespec());
- std::ifstream ifs(filename.c_str());
- int64_t pidFromLockFile = 0;
- ASSERT_TRUE(ifs >> pidFromLockFile);
- ASSERT_EQUALS(ProcessId::getCurrent().asInt64(), pidFromLockFile);
-
- // There should not be any data in the file after the process ID.
- std::string extraData;
- ASSERT_FALSE(ifs >> extraData);
- }
-
- TEST(StorageEngineLockFileTest, ClearPidAndUnlock) {
- TempDir tempDir("StorageEngineLockFileTest_ClearPidAndUnlock");
- StorageEngineLockFile lockFile(tempDir.path());
- ASSERT_OK(lockFile.open());
- ASSERT_OK(lockFile.writePid());
-
- // Clear lock file contents.
- lockFile.clearPidAndUnlock();
- ASSERT_TRUE(boost::filesystem::exists(lockFile.getFilespec()));
- ASSERT_EQUALS(0U, boost::filesystem::file_size(lockFile.getFilespec()));
+ StorageEngineLockFile lockFile(tempDir.path());
+ ASSERT_TRUE(lockFile.createdByUncleanShutdown());
+}
+
+TEST(StorageEngineLockFileTest, OpenInvalidDirectory) {
+ StorageEngineLockFile lockFile("no_such_directory");
+ ASSERT_EQUALS((boost::filesystem::path("no_such_directory") / "mongod.lock").string(),
+ lockFile.getFilespec());
+ Status status = lockFile.open();
+ ASSERT_NOT_OK(status);
+ ASSERT_EQUALS(ErrorCodes::NonExistentPath, status.code());
+}
+
+// Cause ::open() to fail by providing a regular file instead of a directory for 'dbpath'.
+TEST(StorageEngineLockFileTest, OpenInvalidFilename) {
+ TempDir tempDir("StorageEngineLockFileTest_OpenInvalidFilename");
+ std::string filename(tempDir.path() + "/some_file");
+ std::ofstream(filename.c_str());
+ StorageEngineLockFile lockFile(filename);
+ Status status = lockFile.open();
+ ASSERT_NOT_OK(status);
+ ASSERT_EQUALS(ErrorCodes::DBPathInUse, status.code());
+}
+
+TEST(StorageEngineLockFileTest, OpenNoExistingLockFile) {
+ TempDir tempDir("StorageEngineLockFileTest_OpenNoExistingLockFile");
+ StorageEngineLockFile lockFile(tempDir.path());
+ ASSERT_OK(lockFile.open());
+ lockFile.close();
+}
+
+TEST(StorageEngineLockFileTest, OpenEmptyLockFile) {
+ TempDir tempDir("StorageEngineLockFileTest_OpenEmptyLockFile");
+ StorageEngineLockFile lockFile(tempDir.path());
+ std::string filename(lockFile.getFilespec());
+ std::ofstream(filename.c_str());
+ ASSERT_OK(lockFile.open());
+ lockFile.close();
+}
+
+TEST(StorageEngineLockFileTest, WritePidFileNotOpened) {
+ TempDir tempDir("StorageEngineLockFileTest_WritePidFileNotOpened");
+ StorageEngineLockFile lockFile(tempDir.path());
+ Status status = lockFile.writePid();
+ ASSERT_NOT_OK(status);
+ ASSERT_EQUALS(ErrorCodes::FileNotOpen, status.code());
+}
+
+TEST(StorageEngineLockFileTest, WritePidFileOpened) {
+ TempDir tempDir("StorageEngineLockFileTest_WritePidFileOpened");
+ StorageEngineLockFile lockFile(tempDir.path());
+ ASSERT_OK(lockFile.open());
+ ASSERT_OK(lockFile.writePid());
+ lockFile.close();
+
+ // Read PID from lock file.
+ std::string filename(lockFile.getFilespec());
+ std::ifstream ifs(filename.c_str());
+ int64_t pidFromLockFile = 0;
+ ASSERT_TRUE(ifs >> pidFromLockFile);
+ ASSERT_EQUALS(ProcessId::getCurrent().asInt64(), pidFromLockFile);
+}
+
+// Existing data in lock file must be removed before writing process ID.
+TEST(StorageEngineLockFileTest, WritePidTruncateExistingFile) {
+ TempDir tempDir("StorageEngineLockFileTest_WritePidTruncateExistingFile");
+ StorageEngineLockFile lockFile(tempDir.path());
+ {
+ std::string filename(tempDir.path() + "/mongod.lock");
+ std::ofstream ofs(filename.c_str());
+ std::string currentPidStr = ProcessId::getCurrent().toString();
+ ASSERT_FALSE(currentPidStr.empty());
+ ofs << std::string(currentPidStr.size() * 100, 'X') << std::endl;
}
+ ASSERT_OK(lockFile.open());
+ ASSERT_OK(lockFile.writePid());
+ lockFile.close();
+
+ // Read PID from lock file.
+ std::string filename(lockFile.getFilespec());
+ std::ifstream ifs(filename.c_str());
+ int64_t pidFromLockFile = 0;
+ ASSERT_TRUE(ifs >> pidFromLockFile);
+ ASSERT_EQUALS(ProcessId::getCurrent().asInt64(), pidFromLockFile);
+
+ // There should not be any data in the file after the process ID.
+ std::string extraData;
+ ASSERT_FALSE(ifs >> extraData);
+}
+
+TEST(StorageEngineLockFileTest, ClearPidAndUnlock) {
+ TempDir tempDir("StorageEngineLockFileTest_ClearPidAndUnlock");
+ StorageEngineLockFile lockFile(tempDir.path());
+ ASSERT_OK(lockFile.open());
+ ASSERT_OK(lockFile.writePid());
+
+ // Clear lock file contents.
+ lockFile.clearPidAndUnlock();
+ ASSERT_TRUE(boost::filesystem::exists(lockFile.getFilespec()));
+ ASSERT_EQUALS(0U, boost::filesystem::file_size(lockFile.getFilespec()));
+}
} // namespace
diff --git a/src/mongo/db/storage/storage_engine_lock_file_windows.cpp b/src/mongo/db/storage/storage_engine_lock_file_windows.cpp
index 41fc74ea736..6a3d69a3e2a 100644
--- a/src/mongo/db/storage/storage_engine_lock_file_windows.cpp
+++ b/src/mongo/db/storage/storage_engine_lock_file_windows.cpp
@@ -45,146 +45,152 @@ namespace mongo {
namespace {
- const std::string kLockFileBasename = "mongod.lock";
-
- Status _truncateFile(HANDLE handle) {
- invariant(handle != INVALID_HANDLE_VALUE);
-
- LARGE_INTEGER largeint;
- largeint.QuadPart = 0;
- if (::SetFilePointerEx(handle, largeint, NULL, FILE_BEGIN) == FALSE) {
- int errorcode = GetLastError();
- return Status(ErrorCodes::FileStreamFailed, str::stream()
- << "Unable to truncate lock file (SetFilePointerEx failed) "
- << errnoWithDescription(errorcode));
- }
-
- if (::SetEndOfFile(handle) == FALSE) {
- int errorcode = GetLastError();
- return Status(ErrorCodes::FileStreamFailed, str::stream()
- << "Unable to truncate lock file (SetEndOfFile failed) "
- << errnoWithDescription(errorcode));
- }
-
- return Status::OK();
+const std::string kLockFileBasename = "mongod.lock";
+
+Status _truncateFile(HANDLE handle) {
+ invariant(handle != INVALID_HANDLE_VALUE);
+
+ LARGE_INTEGER largeint;
+ largeint.QuadPart = 0;
+ if (::SetFilePointerEx(handle, largeint, NULL, FILE_BEGIN) == FALSE) {
+ int errorcode = GetLastError();
+ return Status(ErrorCodes::FileStreamFailed,
+ str::stream() << "Unable to truncate lock file (SetFilePointerEx failed) "
+ << errnoWithDescription(errorcode));
}
-} // namespace
-
- class StorageEngineLockFile::LockFileHandle {
- public:
- LockFileHandle() : _handle(INVALID_HANDLE_VALUE) { }
- bool isValid() const { return _handle != INVALID_HANDLE_VALUE; }
- void clear() { _handle = INVALID_HANDLE_VALUE; }
- HANDLE _handle;
- };
-
- StorageEngineLockFile::StorageEngineLockFile(const std::string& dbpath)
- : _dbpath(dbpath),
- _filespec((boost::filesystem::path(_dbpath) / kLockFileBasename).string()),
- _uncleanShutdown(boost::filesystem::exists(_filespec) &&
- boost::filesystem::file_size(_filespec) > 0),
- _lockFileHandle(new LockFileHandle()) {
+ if (::SetEndOfFile(handle) == FALSE) {
+ int errorcode = GetLastError();
+ return Status(ErrorCodes::FileStreamFailed,
+ str::stream() << "Unable to truncate lock file (SetEndOfFile failed) "
+ << errnoWithDescription(errorcode));
}
- StorageEngineLockFile::~StorageEngineLockFile() { }
+ return Status::OK();
+}
- std::string StorageEngineLockFile::getFilespec() const {
- return _filespec;
- }
+} // namespace
- bool StorageEngineLockFile::createdByUncleanShutdown() const {
- return _uncleanShutdown;
+class StorageEngineLockFile::LockFileHandle {
+public:
+ LockFileHandle() : _handle(INVALID_HANDLE_VALUE) {}
+ bool isValid() const {
+ return _handle != INVALID_HANDLE_VALUE;
}
-
- Status StorageEngineLockFile::open() {
- try {
- if (!boost::filesystem::exists(_dbpath)) {
- return Status(ErrorCodes::NonExistentPath, str::stream()
- << "Data directory " << _dbpath << " not found.");
- }
- }
- catch (const std::exception& ex) {
- return Status(ErrorCodes::UnknownError, str::stream()
- << "Unable to check existence of data directory "
- << _dbpath << ": " << ex.what());
+ void clear() {
+ _handle = INVALID_HANDLE_VALUE;
+ }
+ HANDLE _handle;
+};
+
+StorageEngineLockFile::StorageEngineLockFile(const std::string& dbpath)
+ : _dbpath(dbpath),
+ _filespec((boost::filesystem::path(_dbpath) / kLockFileBasename).string()),
+ _uncleanShutdown(boost::filesystem::exists(_filespec) &&
+ boost::filesystem::file_size(_filespec) > 0),
+ _lockFileHandle(new LockFileHandle()) {}
+
+StorageEngineLockFile::~StorageEngineLockFile() {}
+
+std::string StorageEngineLockFile::getFilespec() const {
+ return _filespec;
+}
+
+bool StorageEngineLockFile::createdByUncleanShutdown() const {
+ return _uncleanShutdown;
+}
+
+Status StorageEngineLockFile::open() {
+ try {
+ if (!boost::filesystem::exists(_dbpath)) {
+ return Status(ErrorCodes::NonExistentPath,
+ str::stream() << "Data directory " << _dbpath << " not found.");
}
+ } catch (const std::exception& ex) {
+ return Status(ErrorCodes::UnknownError,
+ str::stream() << "Unable to check existence of data directory " << _dbpath
+ << ": " << ex.what());
+ }
- HANDLE lockFileHandle = CreateFileA(_filespec.c_str(), GENERIC_READ | GENERIC_WRITE,
- 0 /* do not allow anyone else access */, NULL,
- OPEN_ALWAYS /* success if fh can open */, 0, NULL);
-
- if (lockFileHandle == INVALID_HANDLE_VALUE) {
- int errorcode = GetLastError();
- return Status(ErrorCodes::DBPathInUse, str::stream()
- << "Unable to create/open lock file: " << _filespec << ' '
- << errnoWithDescription(errorcode)
- << ". Is a mongod instance already running?");
- }
- _lockFileHandle->_handle = lockFileHandle;
- return Status::OK();
+ HANDLE lockFileHandle = CreateFileA(_filespec.c_str(),
+ GENERIC_READ | GENERIC_WRITE,
+ 0 /* do not allow anyone else access */,
+ NULL,
+ OPEN_ALWAYS /* success if fh can open */,
+ 0,
+ NULL);
+
+ if (lockFileHandle == INVALID_HANDLE_VALUE) {
+ int errorcode = GetLastError();
+ return Status(ErrorCodes::DBPathInUse,
+ str::stream() << "Unable to create/open lock file: " << _filespec << ' '
+ << errnoWithDescription(errorcode)
+ << ". Is a mongod instance already running?");
}
+ _lockFileHandle->_handle = lockFileHandle;
+ return Status::OK();
+}
- void StorageEngineLockFile::close() {
- if (!_lockFileHandle->isValid()) {
- return;
- }
- CloseHandle(_lockFileHandle->_handle);
- _lockFileHandle->clear();
+void StorageEngineLockFile::close() {
+ if (!_lockFileHandle->isValid()) {
+ return;
+ }
+ CloseHandle(_lockFileHandle->_handle);
+ _lockFileHandle->clear();
+}
+
+Status StorageEngineLockFile::writePid() {
+ if (!_lockFileHandle->isValid()) {
+ return Status(ErrorCodes::FileNotOpen,
+ str::stream() << "Unable to write process ID to " << _filespec
+ << " because file has not been opened.");
}
- Status StorageEngineLockFile::writePid() {
- if (!_lockFileHandle->isValid()) {
- return Status(ErrorCodes::FileNotOpen, str::stream()
- << "Unable to write process ID to " << _filespec
- << " because file has not been opened.");
- }
+ Status status = _truncateFile(_lockFileHandle->_handle);
+ if (!status.isOK()) {
+ return status;
+ }
- Status status = _truncateFile(_lockFileHandle->_handle);
- if (!status.isOK()) {
- return status;
- }
+ ProcessId pid = ProcessId::getCurrent();
+ std::stringstream ss;
+ ss << pid << std::endl;
+ std::string pidStr = ss.str();
+ DWORD bytesWritten = 0;
+ if (::WriteFile(_lockFileHandle->_handle,
+ static_cast<LPCVOID>(pidStr.c_str()),
+ static_cast<DWORD>(pidStr.size()),
+ &bytesWritten,
+ NULL) == FALSE) {
+ int errorcode = GetLastError();
+ return Status(ErrorCodes::FileStreamFailed,
+ str::stream() << "Unable to write process id " << pid.toString()
+ << " to file: " << _filespec << ' '
+ << errnoWithDescription(errorcode));
+ } else if (bytesWritten == 0) {
+ return Status(ErrorCodes::FileStreamFailed,
+ str::stream() << "Unable to write process id " << pid.toString()
+ << " to file: " << _filespec << " no data written.");
+ }
- ProcessId pid = ProcessId::getCurrent();
- std::stringstream ss;
- ss << pid << std::endl;
- std::string pidStr = ss.str();
- DWORD bytesWritten = 0;
- if (::WriteFile(_lockFileHandle->_handle,
- static_cast<LPCVOID>(pidStr.c_str()),
- static_cast<DWORD>(pidStr.size()),
- &bytesWritten,
- NULL) == FALSE) {
- int errorcode = GetLastError();
- return Status(ErrorCodes::FileStreamFailed, str::stream()
- << "Unable to write process id " << pid.toString() << " to file: "
- << _filespec << ' ' << errnoWithDescription(errorcode));
- }
- else if (bytesWritten == 0) {
- return Status(ErrorCodes::FileStreamFailed, str::stream()
- << "Unable to write process id " << pid.toString() << " to file: "
- << _filespec << " no data written.");
- }
+ ::FlushFileBuffers(_lockFileHandle->_handle);
- ::FlushFileBuffers(_lockFileHandle->_handle);
+ return Status::OK();
+}
- return Status::OK();
+void StorageEngineLockFile::clearPidAndUnlock() {
+ if (!_lockFileHandle->isValid()) {
+ return;
}
-
- void StorageEngineLockFile::clearPidAndUnlock() {
- if (!_lockFileHandle->isValid()) {
- return;
- }
- log() << "shutdown: removing fs lock...";
- // This ought to be an unlink(), but Eliot says the last
- // time that was attempted, there was a race condition
- // with acquirePathLock().
- Status status = _truncateFile(_lockFileHandle->_handle);
- if (!status.isOK()) {
- log() << "couldn't remove fs lock " << status.toString();
- }
- CloseHandle(_lockFileHandle->_handle);
- _lockFileHandle->clear();
+ log() << "shutdown: removing fs lock...";
+ // This ought to be an unlink(), but Eliot says the last
+ // time that was attempted, there was a race condition
+ // with acquirePathLock().
+ Status status = _truncateFile(_lockFileHandle->_handle);
+ if (!status.isOK()) {
+ log() << "couldn't remove fs lock " << status.toString();
}
+ CloseHandle(_lockFileHandle->_handle);
+ _lockFileHandle->clear();
+}
} // namespace mongo
diff --git a/src/mongo/db/storage/storage_engine_metadata.cpp b/src/mongo/db/storage/storage_engine_metadata.cpp
index 2881c41d689..0a6dc42582e 100644
--- a/src/mongo/db/storage/storage_engine_metadata.cpp
+++ b/src/mongo/db/storage/storage_engine_metadata.cpp
@@ -49,221 +49,218 @@ namespace mongo {
namespace {
- const std::string kMetadataBasename = "storage.bson";
-
- /**
- * Returns true if local.ns is found in 'directory' or 'directory'/local/.
- */
- bool containsMMapV1LocalNsFile(const std::string& directory) {
- boost::filesystem::path directoryPath(directory);
- return boost::filesystem::exists(directoryPath / "local.ns") ||
- boost::filesystem::exists((directoryPath / "local") / "local.ns");
- }
+const std::string kMetadataBasename = "storage.bson";
+
+/**
+ * Returns true if local.ns is found in 'directory' or 'directory'/local/.
+ */
+bool containsMMapV1LocalNsFile(const std::string& directory) {
+ boost::filesystem::path directoryPath(directory);
+ return boost::filesystem::exists(directoryPath / "local.ns") ||
+ boost::filesystem::exists((directoryPath / "local") / "local.ns");
+}
} // namespace
- // static
- std::unique_ptr<StorageEngineMetadata> StorageEngineMetadata::forPath(
- const std::string& dbpath) {
- std::unique_ptr<StorageEngineMetadata> metadata;
- if (boost::filesystem::exists(boost::filesystem::path(dbpath) / kMetadataBasename)) {
- metadata.reset(new StorageEngineMetadata(dbpath));
- Status status = metadata->read();
- if (!status.isOK()) {
- error() << "Unable to read the storage engine metadata file: " << status;
- fassertFailed(28661);
- }
+// static
+std::unique_ptr<StorageEngineMetadata> StorageEngineMetadata::forPath(const std::string& dbpath) {
+ std::unique_ptr<StorageEngineMetadata> metadata;
+ if (boost::filesystem::exists(boost::filesystem::path(dbpath) / kMetadataBasename)) {
+ metadata.reset(new StorageEngineMetadata(dbpath));
+ Status status = metadata->read();
+ if (!status.isOK()) {
+ error() << "Unable to read the storage engine metadata file: " << status;
+ fassertFailed(28661);
}
- return metadata;
}
-
- // static
- boost::optional<std::string> StorageEngineMetadata::getStorageEngineForPath(
- const std::string& dbpath) {
- if (auto metadata = StorageEngineMetadata::forPath(dbpath)) {
- return {metadata->getStorageEngine()};
- }
-
- // Fallback to checking for MMAPv1-specific files to handle upgrades from before the
- // storage.bson metadata file was introduced in 3.0.
- if (containsMMapV1LocalNsFile(dbpath)) {
- return {std::string("mmapv1")};
- }
- return {};
+ return metadata;
+}
+
+// static
+boost::optional<std::string> StorageEngineMetadata::getStorageEngineForPath(
+ const std::string& dbpath) {
+ if (auto metadata = StorageEngineMetadata::forPath(dbpath)) {
+ return {metadata->getStorageEngine()};
}
- StorageEngineMetadata::StorageEngineMetadata(const std::string& dbpath)
- : _dbpath(dbpath) {
- reset();
+ // Fallback to checking for MMAPv1-specific files to handle upgrades from before the
+ // storage.bson metadata file was introduced in 3.0.
+ if (containsMMapV1LocalNsFile(dbpath)) {
+ return {std::string("mmapv1")};
}
+ return {};
+}
- StorageEngineMetadata::~StorageEngineMetadata() { }
+StorageEngineMetadata::StorageEngineMetadata(const std::string& dbpath) : _dbpath(dbpath) {
+ reset();
+}
- void StorageEngineMetadata::reset() {
- _storageEngine.clear();
- _storageEngineOptions = BSONObj();
- }
+StorageEngineMetadata::~StorageEngineMetadata() {}
- const std::string& StorageEngineMetadata::getStorageEngine() const {
- return _storageEngine;
- }
+void StorageEngineMetadata::reset() {
+ _storageEngine.clear();
+ _storageEngineOptions = BSONObj();
+}
- const BSONObj& StorageEngineMetadata::getStorageEngineOptions() const {
- return _storageEngineOptions;
- }
+const std::string& StorageEngineMetadata::getStorageEngine() const {
+ return _storageEngine;
+}
- void StorageEngineMetadata::setStorageEngine(const std::string& storageEngine) {
- _storageEngine = storageEngine;
- }
+const BSONObj& StorageEngineMetadata::getStorageEngineOptions() const {
+ return _storageEngineOptions;
+}
- void StorageEngineMetadata::setStorageEngineOptions(const BSONObj& storageEngineOptions) {
- _storageEngineOptions = storageEngineOptions.getOwned();
- }
+void StorageEngineMetadata::setStorageEngine(const std::string& storageEngine) {
+ _storageEngine = storageEngine;
+}
- Status StorageEngineMetadata::read() {
- reset();
+void StorageEngineMetadata::setStorageEngineOptions(const BSONObj& storageEngineOptions) {
+ _storageEngineOptions = storageEngineOptions.getOwned();
+}
- boost::filesystem::path metadataPath =
- boost::filesystem::path(_dbpath) / kMetadataBasename;
+Status StorageEngineMetadata::read() {
+ reset();
- if (!boost::filesystem::exists(metadataPath)) {
- return Status(ErrorCodes::NonExistentPath, str::stream()
- << "Metadata file " << metadataPath.string() << " not found.");
- }
+ boost::filesystem::path metadataPath = boost::filesystem::path(_dbpath) / kMetadataBasename;
- boost::uintmax_t fileSize = boost::filesystem::file_size(metadataPath);
- if (fileSize == 0) {
- return Status(ErrorCodes::InvalidPath, str::stream()
- << "Metadata file " << metadataPath.string() << " cannot be empty.");
- }
- if (fileSize == static_cast<boost::uintmax_t>(-1)) {
- return Status(ErrorCodes::InvalidPath, str::stream()
- << "Unable to determine size of metadata file " << metadataPath.string());
- }
+ if (!boost::filesystem::exists(metadataPath)) {
+ return Status(ErrorCodes::NonExistentPath,
+ str::stream() << "Metadata file " << metadataPath.string() << " not found.");
+ }
+
+ boost::uintmax_t fileSize = boost::filesystem::file_size(metadataPath);
+ if (fileSize == 0) {
+ return Status(ErrorCodes::InvalidPath,
+ str::stream() << "Metadata file " << metadataPath.string()
+ << " cannot be empty.");
+ }
+ if (fileSize == static_cast<boost::uintmax_t>(-1)) {
+ return Status(ErrorCodes::InvalidPath,
+ str::stream() << "Unable to determine size of metadata file "
+ << metadataPath.string());
+ }
- std::vector<char> buffer(fileSize);
- std::string filename = metadataPath.string();
- try {
- std::ifstream ifs(filename.c_str(), std::ios_base::in | std::ios_base::binary);
- if (!ifs) {
- return Status(ErrorCodes::FileNotOpen, str::stream()
- << "Failed to read metadata from " << filename);
+ std::vector<char> buffer(fileSize);
+ std::string filename = metadataPath.string();
+ try {
+ std::ifstream ifs(filename.c_str(), std::ios_base::in | std::ios_base::binary);
+ if (!ifs) {
+ return Status(ErrorCodes::FileNotOpen,
+ str::stream() << "Failed to read metadata from " << filename);
}
// Read BSON from file
ifs.read(&buffer[0], buffer.size());
if (!ifs) {
- return Status(ErrorCodes::FileStreamFailed, str::stream()
- << "Unable to read BSON data from " << filename);
- }
- }
- catch (const std::exception& ex) {
- return Status(ErrorCodes::FileStreamFailed, str::stream()
- << "Unexpected error reading BSON data from " << filename
- << ": " << ex.what());
+ return Status(ErrorCodes::FileStreamFailed,
+ str::stream() << "Unable to read BSON data from " << filename);
}
+ } catch (const std::exception& ex) {
+ return Status(ErrorCodes::FileStreamFailed,
+ str::stream() << "Unexpected error reading BSON data from " << filename
+ << ": " << ex.what());
+ }
- BSONObj obj;
- try {
- obj = BSONObj(&buffer[0]);
- }
- catch (DBException& ex) {
- return Status(ErrorCodes::FailedToParse, str::stream()
- << "Failed to convert data in " << filename
- << " to BSON: " << ex.what());
- }
+ BSONObj obj;
+ try {
+ obj = BSONObj(&buffer[0]);
+ } catch (DBException& ex) {
+ return Status(ErrorCodes::FailedToParse,
+ str::stream() << "Failed to convert data in " << filename
+ << " to BSON: " << ex.what());
+ }
- // Validate 'storage.engine' field.
- BSONElement storageEngineElement = obj.getFieldDotted("storage.engine");
- if (storageEngineElement.type() != mongo::String) {
- return Status(ErrorCodes::FailedToParse, str::stream()
- << "The 'storage.engine' field in metadata must be a string: "
- << storageEngineElement.toString());
- }
+ // Validate 'storage.engine' field.
+ BSONElement storageEngineElement = obj.getFieldDotted("storage.engine");
+ if (storageEngineElement.type() != mongo::String) {
+ return Status(ErrorCodes::FailedToParse,
+ str::stream() << "The 'storage.engine' field in metadata must be a string: "
+ << storageEngineElement.toString());
+ }
- // Extract storage engine name from 'storage.engine' node.
- std::string storageEngine = storageEngineElement.String();
- if (storageEngine.empty()) {
- return Status(ErrorCodes::FailedToParse,
- "The 'storage.engine' field in metadata cannot be empty string.");
- }
- _storageEngine = storageEngine;
+ // Extract storage engine name from 'storage.engine' node.
+ std::string storageEngine = storageEngineElement.String();
+ if (storageEngine.empty()) {
+ return Status(ErrorCodes::FailedToParse,
+ "The 'storage.engine' field in metadata cannot be empty string.");
+ }
+ _storageEngine = storageEngine;
- // Read storage engine options generated by storage engine factory from startup options.
- BSONElement storageEngineOptionsElement = obj.getFieldDotted("storage.options");
- if (!storageEngineOptionsElement.eoo()) {
- if (!storageEngineOptionsElement.isABSONObj()) {
- return Status(ErrorCodes::FailedToParse, str::stream()
+ // Read storage engine options generated by storage engine factory from startup options.
+ BSONElement storageEngineOptionsElement = obj.getFieldDotted("storage.options");
+ if (!storageEngineOptionsElement.eoo()) {
+ if (!storageEngineOptionsElement.isABSONObj()) {
+ return Status(ErrorCodes::FailedToParse,
+ str::stream()
<< "The 'storage.options' field in metadata must be a string: "
<< storageEngineOptionsElement.toString());
- }
- setStorageEngineOptions(storageEngineOptionsElement.Obj());
}
+ setStorageEngineOptions(storageEngineOptionsElement.Obj());
+ }
- return Status::OK();
+ return Status::OK();
+}
+
+Status StorageEngineMetadata::write() const {
+ if (_storageEngine.empty()) {
+ return Status(ErrorCodes::BadValue,
+ "Cannot write empty storage engine name to metadata file.");
}
- Status StorageEngineMetadata::write() const {
- if (_storageEngine.empty()) {
- return Status(ErrorCodes::BadValue,
- "Cannot write empty storage engine name to metadata file.");
+ boost::filesystem::path metadataTempPath =
+ boost::filesystem::path(_dbpath) / (kMetadataBasename + ".tmp");
+ {
+ std::string filenameTemp = metadataTempPath.string();
+ std::ofstream ofs(filenameTemp.c_str(), std::ios_base::out | std::ios_base::binary);
+ if (!ofs) {
+ return Status(ErrorCodes::FileNotOpen,
+ str::stream() << "Failed to write metadata to " << filenameTemp);
}
- boost::filesystem::path metadataTempPath =
- boost::filesystem::path(_dbpath) / (kMetadataBasename + ".tmp");
- {
- std::string filenameTemp = metadataTempPath.string();
- std::ofstream ofs(filenameTemp.c_str(), std::ios_base::out | std::ios_base::binary);
- if (!ofs) {
- return Status(ErrorCodes::FileNotOpen, str::stream()
- << "Failed to write metadata to " << filenameTemp);
- }
-
- BSONObj obj = BSON("storage"
- << BSON("engine" << _storageEngine << "options" << _storageEngineOptions));
- ofs.write(obj.objdata(), obj.objsize());
- if (!ofs) {
- return Status(ErrorCodes::InternalError, str::stream()
- << "Failed to write BSON data to " << filenameTemp);
- }
+ BSONObj obj = BSON(
+ "storage" << BSON("engine" << _storageEngine << "options" << _storageEngineOptions));
+ ofs.write(obj.objdata(), obj.objsize());
+ if (!ofs) {
+ return Status(ErrorCodes::InternalError,
+ str::stream() << "Failed to write BSON data to " << filenameTemp);
}
+ }
- // Rename temporary file to actual metadata file.
- boost::filesystem::path metadataPath =
- boost::filesystem::path(_dbpath) / kMetadataBasename;
- try {
- boost::filesystem::rename(metadataTempPath, metadataPath);
- }
- catch (const std::exception& ex) {
- return Status(ErrorCodes::FileRenameFailed, str::stream()
- << "Unexpected error while renaming temporary metadata file "
- << metadataTempPath.string() << " to " << metadataPath.string()
- << ": " << ex.what());
- }
+ // Rename temporary file to actual metadata file.
+ boost::filesystem::path metadataPath = boost::filesystem::path(_dbpath) / kMetadataBasename;
+ try {
+ boost::filesystem::rename(metadataTempPath, metadataPath);
+ } catch (const std::exception& ex) {
+ return Status(ErrorCodes::FileRenameFailed,
+ str::stream() << "Unexpected error while renaming temporary metadata file "
+ << metadataTempPath.string() << " to " << metadataPath.string()
+ << ": " << ex.what());
+ }
+ return Status::OK();
+}
+
+template <>
+Status StorageEngineMetadata::validateStorageEngineOption<bool>(StringData fieldName,
+ bool expectedValue) const {
+ BSONElement element = _storageEngineOptions.getField(fieldName);
+ if (element.eoo()) {
return Status::OK();
}
-
- template <>
- Status StorageEngineMetadata::validateStorageEngineOption<bool>(StringData fieldName,
- bool expectedValue) const {
- BSONElement element = _storageEngineOptions.getField(fieldName);
- if (element.eoo()) {
- return Status::OK();
- }
- if (!element.isBoolean()) {
- return Status(ErrorCodes::FailedToParse, str::stream()
- << "Expected boolean field " << fieldName << " but got "
- << typeName(element.type()) << " instead: " << element);
- }
- if (element.boolean() == expectedValue) {
- return Status::OK();
- }
- return Status(ErrorCodes::InvalidOptions, str::stream()
- << "Requested option conflicts with current storage engine option for "
+ if (!element.isBoolean()) {
+ return Status(ErrorCodes::FailedToParse,
+ str::stream() << "Expected boolean field " << fieldName << " but got "
+ << typeName(element.type()) << " instead: " << element);
+ }
+ if (element.boolean() == expectedValue) {
+ return Status::OK();
+ }
+ return Status(
+ ErrorCodes::InvalidOptions,
+ str::stream() << "Requested option conflicts with current storage engine option for "
<< fieldName << "; you requested " << (expectedValue ? "true" : "false")
<< " but the current server storage is already set to "
<< (element.boolean() ? "true" : "false") << " and cannot be changed");
- }
+}
} // namespace mongo
diff --git a/src/mongo/db/storage/storage_engine_metadata.h b/src/mongo/db/storage/storage_engine_metadata.h
index a4dafdf9bfa..03873a851a9 100644
--- a/src/mongo/db/storage/storage_engine_metadata.h
+++ b/src/mongo/db/storage/storage_engine_metadata.h
@@ -38,83 +38,82 @@
namespace mongo {
+/**
+ * This reads and write the storage engine metadata file 'storage.bson'
+ * in the data directory (See --dbpath).
+ * 'storage.engine' is the only mandatory field in the BSON metadata file.
+ * Fields other than 'storage.engine' are ignored.
+ */
+class StorageEngineMetadata {
+ MONGO_DISALLOW_COPYING(StorageEngineMetadata);
+
+public:
+ /**
+ * Returns a metadata object describing the storage engine that backs the data files
+ * contained in 'dbpath', and nullptr otherwise.
+ */
+ static std::unique_ptr<StorageEngineMetadata> forPath(const std::string& dbpath);
+
+ /**
+ * Returns the name of the storage engine that backs the data files contained in 'dbpath',
+ * and none otherwise.
+ */
+ static boost::optional<std::string> getStorageEngineForPath(const std::string& dbpath);
+
+ /**
+ * Sets fields to defaults.
+ * Use read() load metadata from file.
+ */
+ StorageEngineMetadata(const std::string& dbpath);
+
+ virtual ~StorageEngineMetadata();
+
+ /**
+ * Returns name of storage engine in metadata.
+ */
+ const std::string& getStorageEngine() const;
+
+ /**
+ * Returns storage engine options in metadata.
+ */
+ const BSONObj& getStorageEngineOptions() const;
+
+ /**
+ * Sets name of storage engine in metadata.
+ */
+ void setStorageEngine(const std::string& storageEngine);
+
+ /**
+ * Sets storage engine options in metadata.
+ */
+ void setStorageEngineOptions(const BSONObj& storageEngineOptions);
+
+ /**
+ * Resets fields to default values.
+ */
+ void reset();
+
+ /**
+ * Reads metadata from 'storage.bson' in 'dbpath' directory.
+ */
+ Status read();
+
+ /**
+ * Writes metadata to file.
+ */
+ Status write() const;
+
/**
- * This reads and write the storage engine metadata file 'storage.bson'
- * in the data directory (See --dbpath).
- * 'storage.engine' is the only mandatory field in the BSON metadata file.
- * Fields other than 'storage.engine' are ignored.
+ * Validates a single field in the storage engine options.
+ * Currently, only boolean fields are supported.
*/
- class StorageEngineMetadata {
- MONGO_DISALLOW_COPYING(StorageEngineMetadata);
-
- public:
-
- /**
- * Returns a metadata object describing the storage engine that backs the data files
- * contained in 'dbpath', and nullptr otherwise.
- */
- static std::unique_ptr<StorageEngineMetadata> forPath(const std::string& dbpath);
-
- /**
- * Returns the name of the storage engine that backs the data files contained in 'dbpath',
- * and none otherwise.
- */
- static boost::optional<std::string> getStorageEngineForPath(const std::string& dbpath);
-
- /**
- * Sets fields to defaults.
- * Use read() load metadata from file.
- */
- StorageEngineMetadata(const std::string& dbpath);
-
- virtual ~StorageEngineMetadata();
-
- /**
- * Returns name of storage engine in metadata.
- */
- const std::string& getStorageEngine() const;
-
- /**
- * Returns storage engine options in metadata.
- */
- const BSONObj& getStorageEngineOptions() const;
-
- /**
- * Sets name of storage engine in metadata.
- */
- void setStorageEngine(const std::string& storageEngine);
-
- /**
- * Sets storage engine options in metadata.
- */
- void setStorageEngineOptions(const BSONObj& storageEngineOptions);
-
- /**
- * Resets fields to default values.
- */
- void reset();
-
- /**
- * Reads metadata from 'storage.bson' in 'dbpath' directory.
- */
- Status read();
-
- /**
- * Writes metadata to file.
- */
- Status write() const;
-
- /**
- * Validates a single field in the storage engine options.
- * Currently, only boolean fields are supported.
- */
- template <typename T>
- Status validateStorageEngineOption(StringData fieldName, T expectedValue) const;
-
- private:
- std::string _dbpath;
- std::string _storageEngine;
- BSONObj _storageEngineOptions;
- };
+ template <typename T>
+ Status validateStorageEngineOption(StringData fieldName, T expectedValue) const;
+
+private:
+ std::string _dbpath;
+ std::string _storageEngine;
+ BSONObj _storageEngineOptions;
+};
} // namespace mongo
diff --git a/src/mongo/db/storage/storage_engine_metadata_test.cpp b/src/mongo/db/storage/storage_engine_metadata_test.cpp
index 27508dfe7a5..0f0326a2161 100644
--- a/src/mongo/db/storage/storage_engine_metadata_test.cpp
+++ b/src/mongo/db/storage/storage_engine_metadata_test.cpp
@@ -43,261 +43,261 @@
namespace {
- using std::string;
- using mongo::unittest::TempDir;
+using std::string;
+using mongo::unittest::TempDir;
- using namespace mongo;
+using namespace mongo;
- TEST(StorageEngineMetadataTest, ReadNonExistentMetadataFile) {
- StorageEngineMetadata metadata("no_such_directory");
- Status status = metadata.read();
- ASSERT_NOT_OK(status);
- ASSERT_EQUALS(ErrorCodes::NonExistentPath, status.code());
- }
+TEST(StorageEngineMetadataTest, ReadNonExistentMetadataFile) {
+ StorageEngineMetadata metadata("no_such_directory");
+ Status status = metadata.read();
+ ASSERT_NOT_OK(status);
+ ASSERT_EQUALS(ErrorCodes::NonExistentPath, status.code());
+}
- TEST(StorageEngineMetadataTest, WriteToNonexistentDirectory) {
- ASSERT_NOT_OK(StorageEngineMetadata("no_such_directory").write());
- }
+TEST(StorageEngineMetadataTest, WriteToNonexistentDirectory) {
+ ASSERT_NOT_OK(StorageEngineMetadata("no_such_directory").write());
+}
- TEST(StorageEngineMetadataTest, InvalidMetadataFileNotBSON) {
- TempDir tempDir("StorageEngineMetadataTest_InvalidMetadataFileNotBSON");
- {
- std::string filename(tempDir.path() + "/storage.bson");
- std::ofstream ofs(filename.c_str());
- // BSON document of size -1 and EOO as first element.
- BSONObj obj = fromjson("{x: 1}");
- ofs.write("\xff\xff\xff\xff", 4);
- ofs.write(obj.objdata()+4, obj.objsize()-4);
- ofs.flush();
- }
- {
- StorageEngineMetadata metadata(tempDir.path());
- ASSERT_NOT_OK(metadata.read());
- }
+TEST(StorageEngineMetadataTest, InvalidMetadataFileNotBSON) {
+ TempDir tempDir("StorageEngineMetadataTest_InvalidMetadataFileNotBSON");
+ {
+ std::string filename(tempDir.path() + "/storage.bson");
+ std::ofstream ofs(filename.c_str());
+ // BSON document of size -1 and EOO as first element.
+ BSONObj obj = fromjson("{x: 1}");
+ ofs.write("\xff\xff\xff\xff", 4);
+ ofs.write(obj.objdata() + 4, obj.objsize() - 4);
+ ofs.flush();
}
-
- TEST(StorageEngineMetadataTest, InvalidMetadataFileStorageFieldMissing) {
- TempDir tempDir("StorageEngineMetadataTest_InvalidMetadataFileStorageFieldMissing");
- {
- std::string filename(tempDir.path() + "/storage.bson");
- std::ofstream ofs(filename.c_str(), std::ios_base::out | std::ios_base::binary);
- BSONObj obj = fromjson("{missing_storage_field: 123}");
- ofs.write(obj.objdata(), obj.objsize());
- ofs.flush();
- }
- {
- StorageEngineMetadata metadata(tempDir.path());
- ASSERT_NOT_OK(metadata.read());
- }
+ {
+ StorageEngineMetadata metadata(tempDir.path());
+ ASSERT_NOT_OK(metadata.read());
}
+}
- TEST(StorageEngineMetadataTest, InvalidMetadataFileStorageNodeNotObject) {
- TempDir tempDir("StorageEngineMetadataTest_InvalidMetadataFileStorageNodeNotObject");
- {
- std::string filename(tempDir.path() + "/storage.bson");
- std::ofstream ofs(filename.c_str());
- BSONObj obj = fromjson("{storage: 123}");
- ofs.write(obj.objdata(), obj.objsize());
- ofs.flush();
- }
- {
- StorageEngineMetadata metadata(tempDir.path());
- ASSERT_NOT_OK(metadata.read());
- }
+TEST(StorageEngineMetadataTest, InvalidMetadataFileStorageFieldMissing) {
+ TempDir tempDir("StorageEngineMetadataTest_InvalidMetadataFileStorageFieldMissing");
+ {
+ std::string filename(tempDir.path() + "/storage.bson");
+ std::ofstream ofs(filename.c_str(), std::ios_base::out | std::ios_base::binary);
+ BSONObj obj = fromjson("{missing_storage_field: 123}");
+ ofs.write(obj.objdata(), obj.objsize());
+ ofs.flush();
}
+ {
+ StorageEngineMetadata metadata(tempDir.path());
+ ASSERT_NOT_OK(metadata.read());
+ }
+}
- TEST(StorageEngineMetadataTest, InvalidMetadataFileStorageEngineFieldMissing) {
- TempDir tempDir("StorageEngineMetadataTest_InvalidMetadataFileStorageEngineFieldMissing");
- {
- std::string filename(tempDir.path() + "/storage.bson");
- std::ofstream ofs(filename.c_str());
- BSONObj obj = fromjson("{storage: {}}");
- ofs.write(obj.objdata(), obj.objsize());
- ofs.flush();
- }
- {
- StorageEngineMetadata metadata(tempDir.path());
- ASSERT_NOT_OK(metadata.read());
- }
+TEST(StorageEngineMetadataTest, InvalidMetadataFileStorageNodeNotObject) {
+ TempDir tempDir("StorageEngineMetadataTest_InvalidMetadataFileStorageNodeNotObject");
+ {
+ std::string filename(tempDir.path() + "/storage.bson");
+ std::ofstream ofs(filename.c_str());
+ BSONObj obj = fromjson("{storage: 123}");
+ ofs.write(obj.objdata(), obj.objsize());
+ ofs.flush();
}
+ {
+ StorageEngineMetadata metadata(tempDir.path());
+ ASSERT_NOT_OK(metadata.read());
+ }
+}
- TEST(StorageEngineMetadataTest, InvalidMetadataFileStorageEngineFieldNotString) {
- TempDir tempDir("StorageEngineMetadataTest_InvalidMetadataFileStorageEngineFieldNotString");
- {
- std::string filename(tempDir.path() + "/storage.bson");
- std::ofstream ofs(filename.c_str());
- BSONObj obj = fromjson("{storage: {engine: 123}}");
- ofs.write(obj.objdata(), obj.objsize());
- ofs.flush();
- }
- {
- StorageEngineMetadata metadata(tempDir.path());
- ASSERT_NOT_OK(metadata.read());
- }
+TEST(StorageEngineMetadataTest, InvalidMetadataFileStorageEngineFieldMissing) {
+ TempDir tempDir("StorageEngineMetadataTest_InvalidMetadataFileStorageEngineFieldMissing");
+ {
+ std::string filename(tempDir.path() + "/storage.bson");
+ std::ofstream ofs(filename.c_str());
+ BSONObj obj = fromjson("{storage: {}}");
+ ofs.write(obj.objdata(), obj.objsize());
+ ofs.flush();
+ }
+ {
+ StorageEngineMetadata metadata(tempDir.path());
+ ASSERT_NOT_OK(metadata.read());
}
+}
- TEST(StorageEngineMetadataTest, InvalidMetadataFileStorageEngineOptionsFieldNotObject) {
- TempDir tempDir("StorageEngineMetadataTest_IgnoreUnknownField");
- {
- std::string filename(tempDir.path() + "/storage.bson");
- std::ofstream ofs(filename.c_str());
- BSONObj obj = fromjson("{storage: {engine: \"storageEngine1\", options: 123}}");
- ofs.write(obj.objdata(), obj.objsize());
- ofs.flush();
- }
- {
- StorageEngineMetadata metadata(tempDir.path());
- ASSERT_NOT_OK(metadata.read());
- }
+TEST(StorageEngineMetadataTest, InvalidMetadataFileStorageEngineFieldNotString) {
+ TempDir tempDir("StorageEngineMetadataTest_InvalidMetadataFileStorageEngineFieldNotString");
+ {
+ std::string filename(tempDir.path() + "/storage.bson");
+ std::ofstream ofs(filename.c_str());
+ BSONObj obj = fromjson("{storage: {engine: 123}}");
+ ofs.write(obj.objdata(), obj.objsize());
+ ofs.flush();
}
+ {
+ StorageEngineMetadata metadata(tempDir.path());
+ ASSERT_NOT_OK(metadata.read());
+ }
+}
- // Metadata parser should ignore unknown metadata fields.
- TEST(StorageEngineMetadataTest, IgnoreUnknownField) {
- TempDir tempDir("StorageEngineMetadataTest_IgnoreUnknownField");
- {
- std::string filename(tempDir.path() + "/storage.bson");
- std::ofstream ofs(filename.c_str());
- BSONObj obj = fromjson("{storage: {engine: \"storageEngine1\", unknown_field: 123}}");
- ofs.write(obj.objdata(), obj.objsize());
- ofs.flush();
- }
- {
- StorageEngineMetadata metadata(tempDir.path());
- ASSERT_OK(metadata.read());
- ASSERT_EQUALS("storageEngine1", metadata.getStorageEngine());
- ASSERT_TRUE(metadata.getStorageEngineOptions().isEmpty());
- }
+TEST(StorageEngineMetadataTest, InvalidMetadataFileStorageEngineOptionsFieldNotObject) {
+ TempDir tempDir("StorageEngineMetadataTest_IgnoreUnknownField");
+ {
+ std::string filename(tempDir.path() + "/storage.bson");
+ std::ofstream ofs(filename.c_str());
+ BSONObj obj = fromjson("{storage: {engine: \"storageEngine1\", options: 123}}");
+ ofs.write(obj.objdata(), obj.objsize());
+ ofs.flush();
}
+ {
+ StorageEngineMetadata metadata(tempDir.path());
+ ASSERT_NOT_OK(metadata.read());
+ }
+}
- TEST(StorageEngineMetadataTest, WriteEmptyStorageEngineName) {
- TempDir tempDir("StorageEngineMetadataTest_WriteEmptyStorageEngineName");
+// Metadata parser should ignore unknown metadata fields.
+TEST(StorageEngineMetadataTest, IgnoreUnknownField) {
+ TempDir tempDir("StorageEngineMetadataTest_IgnoreUnknownField");
+ {
+ std::string filename(tempDir.path() + "/storage.bson");
+ std::ofstream ofs(filename.c_str());
+ BSONObj obj = fromjson("{storage: {engine: \"storageEngine1\", unknown_field: 123}}");
+ ofs.write(obj.objdata(), obj.objsize());
+ ofs.flush();
+ }
+ {
StorageEngineMetadata metadata(tempDir.path());
- ASSERT_EQUALS("", metadata.getStorageEngine());
- // Write empty storage engine name to metadata file.
- ASSERT_NOT_OK(metadata.write());
+ ASSERT_OK(metadata.read());
+ ASSERT_EQUALS("storageEngine1", metadata.getStorageEngine());
+ ASSERT_TRUE(metadata.getStorageEngineOptions().isEmpty());
}
+}
- TEST(StorageEngineMetadataTest, Roundtrip) {
- TempDir tempDir("StorageEngineMetadataTest_Roundtrip");
- BSONObj options = fromjson("{x: 1}");
- {
- StorageEngineMetadata metadata(tempDir.path());
- metadata.setStorageEngine("storageEngine1");
- metadata.setStorageEngineOptions(options);
- ASSERT_OK(metadata.write());
- }
- // Read back storage engine name.
- {
- StorageEngineMetadata metadata(tempDir.path());
- ASSERT_OK(metadata.read());
- ASSERT_EQUALS("storageEngine1", metadata.getStorageEngine());
- ASSERT_EQUALS(options, metadata.getStorageEngineOptions());
+TEST(StorageEngineMetadataTest, WriteEmptyStorageEngineName) {
+ TempDir tempDir("StorageEngineMetadataTest_WriteEmptyStorageEngineName");
+ StorageEngineMetadata metadata(tempDir.path());
+ ASSERT_EQUALS("", metadata.getStorageEngine());
+ // Write empty storage engine name to metadata file.
+ ASSERT_NOT_OK(metadata.write());
+}
- metadata.reset();
- ASSERT_TRUE(metadata.getStorageEngine().empty());
- ASSERT_TRUE(metadata.getStorageEngineOptions().isEmpty());
- }
+TEST(StorageEngineMetadataTest, Roundtrip) {
+ TempDir tempDir("StorageEngineMetadataTest_Roundtrip");
+ BSONObj options = fromjson("{x: 1}");
+ {
+ StorageEngineMetadata metadata(tempDir.path());
+ metadata.setStorageEngine("storageEngine1");
+ metadata.setStorageEngineOptions(options);
+ ASSERT_OK(metadata.write());
}
+ // Read back storage engine name.
+ {
+ StorageEngineMetadata metadata(tempDir.path());
+ ASSERT_OK(metadata.read());
+ ASSERT_EQUALS("storageEngine1", metadata.getStorageEngine());
+ ASSERT_EQUALS(options, metadata.getStorageEngineOptions());
- TEST(StorageEngineMetadataTest, ValidateStorageEngineOption) {
- // It is fine to provide an invalid data directory as long as we do not
- // call read() or write().
- StorageEngineMetadata metadata("no_such_directory");
- BSONObj options = fromjson("{x: true, y: false, z: 123}");
- metadata.setStorageEngineOptions(options);
+ metadata.reset();
+ ASSERT_TRUE(metadata.getStorageEngine().empty());
+ ASSERT_TRUE(metadata.getStorageEngineOptions().isEmpty());
+ }
+}
- // Non-existent field.
- ASSERT_OK(metadata.validateStorageEngineOption("w", true));
- ASSERT_OK(metadata.validateStorageEngineOption("w", false));
+TEST(StorageEngineMetadataTest, ValidateStorageEngineOption) {
+ // It is fine to provide an invalid data directory as long as we do not
+ // call read() or write().
+ StorageEngineMetadata metadata("no_such_directory");
+ BSONObj options = fromjson("{x: true, y: false, z: 123}");
+ metadata.setStorageEngineOptions(options);
- // Non-boolean field.
- Status status = metadata.validateStorageEngineOption("z", true);
- ASSERT_NOT_OK(status);
- ASSERT_EQUALS(ErrorCodes::FailedToParse, status.code());
- status = metadata.validateStorageEngineOption("z", false);
- ASSERT_NOT_OK(status);
- ASSERT_EQUALS(ErrorCodes::FailedToParse, status.code());
+ // Non-existent field.
+ ASSERT_OK(metadata.validateStorageEngineOption("w", true));
+ ASSERT_OK(metadata.validateStorageEngineOption("w", false));
- // Boolean fields.
- ASSERT_OK(metadata.validateStorageEngineOption("x", true));
- status = metadata.validateStorageEngineOption("x", false);
- ASSERT_NOT_OK(status);
- ASSERT_EQUALS(ErrorCodes::InvalidOptions, status.code());
+ // Non-boolean field.
+ Status status = metadata.validateStorageEngineOption("z", true);
+ ASSERT_NOT_OK(status);
+ ASSERT_EQUALS(ErrorCodes::FailedToParse, status.code());
+ status = metadata.validateStorageEngineOption("z", false);
+ ASSERT_NOT_OK(status);
+ ASSERT_EQUALS(ErrorCodes::FailedToParse, status.code());
- ASSERT_OK(metadata.validateStorageEngineOption("y", false));
- status = metadata.validateStorageEngineOption("y", true);
- ASSERT_NOT_OK(status);
- ASSERT_EQUALS(ErrorCodes::InvalidOptions, status.code());
- }
+ // Boolean fields.
+ ASSERT_OK(metadata.validateStorageEngineOption("x", true));
+ status = metadata.validateStorageEngineOption("x", false);
+ ASSERT_NOT_OK(status);
+ ASSERT_EQUALS(ErrorCodes::InvalidOptions, status.code());
- // Do not override the active storage engine when the data directory is empty.
- TEST(StorageEngineMetadataTest, StorageEngineForPath_EmptyDirectory) {
- TempDir tempDir("StorageEngineMetadataTest_StorageEngineForPath_EmptyDirectory");
- auto storageEngine = StorageEngineMetadata::getStorageEngineForPath(tempDir.path());
- ASSERT_FALSE(storageEngine);
- }
+ ASSERT_OK(metadata.validateStorageEngineOption("y", false));
+ status = metadata.validateStorageEngineOption("y", true);
+ ASSERT_NOT_OK(status);
+ ASSERT_EQUALS(ErrorCodes::InvalidOptions, status.code());
+}
- // Override the active storage engine with "mmapv1" when the data directory contains local.ns.
- TEST(StorageEngineMetadataTest, StorageEngineForPath_DataFilesExist) {
- TempDir tempDir("StorageEngineMetadataTest_StorageEngineForPath_DataFilesExist");
- {
- std::string filename(tempDir.path() + "/local.ns");
- std::ofstream ofs(filename.c_str());
- ofs << "unused data" << std::endl;
- }
- ASSERT_EQUALS(std::string("mmapv1"),
- StorageEngineMetadata::getStorageEngineForPath(tempDir.path()));
+// Do not override the active storage engine when the data directory is empty.
+TEST(StorageEngineMetadataTest, StorageEngineForPath_EmptyDirectory) {
+ TempDir tempDir("StorageEngineMetadataTest_StorageEngineForPath_EmptyDirectory");
+ auto storageEngine = StorageEngineMetadata::getStorageEngineForPath(tempDir.path());
+ ASSERT_FALSE(storageEngine);
+}
+
+// Override the active storage engine with "mmapv1" when the data directory contains local.ns.
+TEST(StorageEngineMetadataTest, StorageEngineForPath_DataFilesExist) {
+ TempDir tempDir("StorageEngineMetadataTest_StorageEngineForPath_DataFilesExist");
+ {
+ std::string filename(tempDir.path() + "/local.ns");
+ std::ofstream ofs(filename.c_str());
+ ofs << "unused data" << std::endl;
}
+ ASSERT_EQUALS(std::string("mmapv1"),
+ StorageEngineMetadata::getStorageEngineForPath(tempDir.path()));
+}
- // Override the active storage engine with "mmapv1" when the data directory contains
- // local/local.ns.
- TEST(StorageEngineMetadataTest, StorageEngineForPath_DataFilesExist_DirPerDB) {
- TempDir tempDir("StorageEngineMetadataTest_StorageEngineForPath_DataFilesExist_DirPerDB");
- {
- boost::filesystem::create_directory(tempDir.path() + "/local");
- std::string filename(tempDir.path() + "/local/local.ns");
- std::ofstream ofs(filename.c_str());
- ofs << "unused data" << std::endl;
- }
- ASSERT_EQUALS(std::string("mmapv1"),
- StorageEngineMetadata::getStorageEngineForPath(tempDir.path()));
+// Override the active storage engine with "mmapv1" when the data directory contains
+// local/local.ns.
+TEST(StorageEngineMetadataTest, StorageEngineForPath_DataFilesExist_DirPerDB) {
+ TempDir tempDir("StorageEngineMetadataTest_StorageEngineForPath_DataFilesExist_DirPerDB");
+ {
+ boost::filesystem::create_directory(tempDir.path() + "/local");
+ std::string filename(tempDir.path() + "/local/local.ns");
+ std::ofstream ofs(filename.c_str());
+ ofs << "unused data" << std::endl;
}
+ ASSERT_EQUALS(std::string("mmapv1"),
+ StorageEngineMetadata::getStorageEngineForPath(tempDir.path()));
+}
- // Do not override the active storage engine when the data directory is nonempty, but does not
- // contain either local.ns or local/local.ns.
- TEST(StorageEngineMetadataTest, StorageEngineForPath_NoDataFilesExist) {
- TempDir tempDir("StorageEngineMetadataTest_StorageEngineForPath_NoDataFilesExist");
- {
- std::string filename(tempDir.path() + "/user_data.txt");
- std::ofstream ofs(filename.c_str());
- ofs << "unused data" << std::endl;
- }
- auto storageEngine = StorageEngineMetadata::getStorageEngineForPath(tempDir.path());
- ASSERT_FALSE(storageEngine);
+// Do not override the active storage engine when the data directory is nonempty, but does not
+// contain either local.ns or local/local.ns.
+TEST(StorageEngineMetadataTest, StorageEngineForPath_NoDataFilesExist) {
+ TempDir tempDir("StorageEngineMetadataTest_StorageEngineForPath_NoDataFilesExist");
+ {
+ std::string filename(tempDir.path() + "/user_data.txt");
+ std::ofstream ofs(filename.c_str());
+ ofs << "unused data" << std::endl;
}
+ auto storageEngine = StorageEngineMetadata::getStorageEngineForPath(tempDir.path());
+ ASSERT_FALSE(storageEngine);
+}
- // Override the active storage engine with "mmapv1" when the metadata file specifies "mmapv1".
- TEST(StorageEngineMetadataTest, StorageEngineForPath_MetadataFile_mmapv1) {
- TempDir tempDir("StorageEngineMetadataTest_StorageEngineForPath_MetadataFile_mmapv1");
- {
- StorageEngineMetadata metadata(tempDir.path());
- metadata.setStorageEngine("mmapv1");
- ASSERT_OK(metadata.write());
- }
- ASSERT_EQUALS(std::string("mmapv1"),
- StorageEngineMetadata::getStorageEngineForPath(tempDir.path()));
+// Override the active storage engine with "mmapv1" when the metadata file specifies "mmapv1".
+TEST(StorageEngineMetadataTest, StorageEngineForPath_MetadataFile_mmapv1) {
+ TempDir tempDir("StorageEngineMetadataTest_StorageEngineForPath_MetadataFile_mmapv1");
+ {
+ StorageEngineMetadata metadata(tempDir.path());
+ metadata.setStorageEngine("mmapv1");
+ ASSERT_OK(metadata.write());
}
+ ASSERT_EQUALS(std::string("mmapv1"),
+ StorageEngineMetadata::getStorageEngineForPath(tempDir.path()));
+}
- // Override the active storage engine whatever the metadata file specifies.
- TEST(StorageEngineMetadataTest, StorageEngineForPath_MetadataFile_someEngine) {
- TempDir tempDir("StorageEngineMetadataTest_StorageEngineForPath_MetadataFile_someEngine");
- {
- StorageEngineMetadata metadata(tempDir.path());
- metadata.setStorageEngine("someEngine");
- ASSERT_OK(metadata.write());
- }
- ASSERT_EQUALS(std::string("someEngine"),
- StorageEngineMetadata::getStorageEngineForPath(tempDir.path()));
+// Override the active storage engine whatever the metadata file specifies.
+TEST(StorageEngineMetadataTest, StorageEngineForPath_MetadataFile_someEngine) {
+ TempDir tempDir("StorageEngineMetadataTest_StorageEngineForPath_MetadataFile_someEngine");
+ {
+ StorageEngineMetadata metadata(tempDir.path());
+ metadata.setStorageEngine("someEngine");
+ ASSERT_OK(metadata.write());
}
+ ASSERT_EQUALS(std::string("someEngine"),
+ StorageEngineMetadata::getStorageEngineForPath(tempDir.path()));
+}
} // namespace
diff --git a/src/mongo/db/storage/storage_init.cpp b/src/mongo/db/storage/storage_init.cpp
index 4e46afd5a84..a17f5e494d5 100644
--- a/src/mongo/db/storage/storage_init.cpp
+++ b/src/mongo/db/storage/storage_init.cpp
@@ -36,22 +36,21 @@ namespace mongo {
// TODO: Does this belong here?
namespace {
- class StorageSSS : public ServerStatusSection {
- public:
- StorageSSS() : ServerStatusSection( "storageEngine" ) {
- }
+class StorageSSS : public ServerStatusSection {
+public:
+ StorageSSS() : ServerStatusSection("storageEngine") {}
- virtual ~StorageSSS() {}
+ virtual ~StorageSSS() {}
- virtual bool includeByDefault() const { return true; }
+ virtual bool includeByDefault() const {
+ return true;
+ }
- virtual BSONObj generateSection(OperationContext* txn,
- const BSONElement& configElement) const {
+ virtual BSONObj generateSection(OperationContext* txn, const BSONElement& configElement) const {
+ return BSON("name" << storageGlobalParams.engine);
+ }
- return BSON( "name" << storageGlobalParams.engine );
- }
-
- } storageSSS;
+} storageSSS;
} // namespace
} // namespace mongo
diff --git a/src/mongo/db/storage/wiredtiger/wiredtiger_customization_hooks.cpp b/src/mongo/db/storage/wiredtiger/wiredtiger_customization_hooks.cpp
index a6f685418e6..12388fbfaa6 100644
--- a/src/mongo/db/storage/wiredtiger/wiredtiger_customization_hooks.cpp
+++ b/src/mongo/db/storage/wiredtiger/wiredtiger_customization_hooks.cpp
@@ -38,39 +38,37 @@
namespace mongo {
- /* Make a WiredTigerCustomizationHooks pointer a decoration on the global ServiceContext */
- MONGO_INITIALIZER_WITH_PREREQUISITES(SetWiredTigerCustomizationHooks,
- ("SetGlobalEnvironment"))
- (InitializerContext* context) {
- auto customizationHooks = stdx::make_unique<EmptyWiredTigerCustomizationHooks>();
- WiredTigerCustomizationHooks::set(getGlobalServiceContext(), std::move(customizationHooks));
+/* Make a WiredTigerCustomizationHooks pointer a decoration on the global ServiceContext */
+MONGO_INITIALIZER_WITH_PREREQUISITES(SetWiredTigerCustomizationHooks, ("SetGlobalEnvironment"))
+(InitializerContext* context) {
+ auto customizationHooks = stdx::make_unique<EmptyWiredTigerCustomizationHooks>();
+ WiredTigerCustomizationHooks::set(getGlobalServiceContext(), std::move(customizationHooks));
- return Status::OK();
- }
+ return Status::OK();
+}
- namespace {
- const auto getCustomizationHooks =
- ServiceContext::declareDecoration<std::unique_ptr<WiredTigerCustomizationHooks>>();
- } // namespace
+namespace {
+const auto getCustomizationHooks =
+ ServiceContext::declareDecoration<std::unique_ptr<WiredTigerCustomizationHooks>>();
+} // namespace
- void WiredTigerCustomizationHooks::set(
- ServiceContext* service,
- std::unique_ptr<WiredTigerCustomizationHooks> custHooks) {
- auto& hooks = getCustomizationHooks(service);
- invariant(custHooks);
- hooks = std::move(custHooks);
- }
+void WiredTigerCustomizationHooks::set(ServiceContext* service,
+ std::unique_ptr<WiredTigerCustomizationHooks> custHooks) {
+ auto& hooks = getCustomizationHooks(service);
+ invariant(custHooks);
+ hooks = std::move(custHooks);
+}
- WiredTigerCustomizationHooks* WiredTigerCustomizationHooks::get(ServiceContext* service) {
- return getCustomizationHooks(service).get();
- }
+WiredTigerCustomizationHooks* WiredTigerCustomizationHooks::get(ServiceContext* service) {
+ return getCustomizationHooks(service).get();
+}
- EmptyWiredTigerCustomizationHooks::~EmptyWiredTigerCustomizationHooks() {}
+EmptyWiredTigerCustomizationHooks::~EmptyWiredTigerCustomizationHooks() {}
- void EmptyWiredTigerCustomizationHooks::appendUID(BSONObjBuilder* builder) {}
+void EmptyWiredTigerCustomizationHooks::appendUID(BSONObjBuilder* builder) {}
- std::string EmptyWiredTigerCustomizationHooks::getOpenConfig(StringData tableName) {
- return "";
- }
+std::string EmptyWiredTigerCustomizationHooks::getOpenConfig(StringData tableName) {
+ return "";
+}
-} // namespace mongo
+} // namespace mongo
diff --git a/src/mongo/db/storage/wiredtiger/wiredtiger_customization_hooks.h b/src/mongo/db/storage/wiredtiger/wiredtiger_customization_hooks.h
index e826a8971b8..8eeed66b49a 100644
--- a/src/mongo/db/storage/wiredtiger/wiredtiger_customization_hooks.h
+++ b/src/mongo/db/storage/wiredtiger/wiredtiger_customization_hooks.h
@@ -36,38 +36,37 @@
#include "mongo/db/jsobj.h"
namespace mongo {
- class StringData;
- class ServiceContext;
+class StringData;
+class ServiceContext;
- class WiredTigerCustomizationHooks {
- public:
- static void set(ServiceContext* service,
- std::unique_ptr<WiredTigerCustomizationHooks> custHooks);
+class WiredTigerCustomizationHooks {
+public:
+ static void set(ServiceContext* service,
+ std::unique_ptr<WiredTigerCustomizationHooks> custHooks);
- static WiredTigerCustomizationHooks* get(ServiceContext* service);
+ static WiredTigerCustomizationHooks* get(ServiceContext* service);
- virtual ~WiredTigerCustomizationHooks() = default;
+ virtual ~WiredTigerCustomizationHooks() = default;
- /**
- * Appends additional configuration sub object(s) to the BSONObjbuilder builder.
- */
- virtual void appendUID(BSONObjBuilder* builder) = 0;
+ /**
+ * Appends additional configuration sub object(s) to the BSONObjbuilder builder.
+ */
+ virtual void appendUID(BSONObjBuilder* builder) = 0;
- /**
- * Gets the WiredTiger encryption configuration string for the
- * provided table name
- */
- virtual std::string getOpenConfig(StringData tableName) = 0;
- };
+ /**
+ * Gets the WiredTiger encryption configuration string for the
+ * provided table name
+ */
+ virtual std::string getOpenConfig(StringData tableName) = 0;
+};
- // Empty default implementation of the abstract class WiredTigerCustomizationHooks
- class EmptyWiredTigerCustomizationHooks : public WiredTigerCustomizationHooks {
+// Empty default implementation of the abstract class WiredTigerCustomizationHooks
+class EmptyWiredTigerCustomizationHooks : public WiredTigerCustomizationHooks {
+public:
+ ~EmptyWiredTigerCustomizationHooks() override;
- public:
- ~EmptyWiredTigerCustomizationHooks() override;
+ void appendUID(BSONObjBuilder* builder) override;
- void appendUID(BSONObjBuilder* builder) override;
-
- std::string getOpenConfig(StringData tableName) override;
- };
-} // namespace mongo
+ std::string getOpenConfig(StringData tableName) override;
+};
+} // namespace mongo
diff --git a/src/mongo/db/storage/wiredtiger/wiredtiger_global_options.cpp b/src/mongo/db/storage/wiredtiger/wiredtiger_global_options.cpp
index 24cd3aa82d6..01aa0d3bbc1 100644
--- a/src/mongo/db/storage/wiredtiger/wiredtiger_global_options.cpp
+++ b/src/mongo/db/storage/wiredtiger/wiredtiger_global_options.cpp
@@ -38,126 +38,122 @@
namespace mongo {
- WiredTigerGlobalOptions wiredTigerGlobalOptions;
-
- Status WiredTigerGlobalOptions::add(moe::OptionSection* options) {
- moe::OptionSection wiredTigerOptions("WiredTiger options");
-
- // WiredTiger storage engine options
- wiredTigerOptions.addOptionChaining("storage.wiredTiger.engineConfig.cacheSizeGB",
- "wiredTigerCacheSizeGB",
- moe::Int,
- "maximum amount of memory to allocate for cache; "
- "defaults to 1/2 of physical RAM")
- .validRange(1,10000);
- wiredTigerOptions.addOptionChaining(
- "storage.wiredTiger.engineConfig.statisticsLogDelaySecs",
- "wiredTigerStatisticsLogDelaySecs",
- moe::Int,
- "seconds to wait between each write to a statistics file in the dbpath; "
- "0 means do not log statistics")
- .validRange(0, 100000)
- .setDefault(moe::Value(0));
- wiredTigerOptions.addOptionChaining("storage.wiredTiger.engineConfig.journalCompressor",
- "wiredTigerJournalCompressor",
- moe::String,
- "use a compressor for log records [none|snappy|zlib]")
- .format("(:?none)|(:?snappy)|(:?zlib)", "(none/snappy/zlib)")
- .setDefault(moe::Value(std::string("snappy")));
- wiredTigerOptions.addOptionChaining("storage.wiredTiger.engineConfig.directoryForIndexes",
- "wiredTigerDirectoryForIndexes",
- moe::Switch,
- "Put indexes and data in different directories");
- wiredTigerOptions.addOptionChaining("storage.wiredTiger.engineConfig.configString",
- "wiredTigerEngineConfigString",
- moe::String,
- "WiredTiger storage engine custom "
- "configuration settings")
- .hidden();
-
- // WiredTiger collection options
- wiredTigerOptions.addOptionChaining("storage.wiredTiger.collectionConfig.blockCompressor",
- "wiredTigerCollectionBlockCompressor",
- moe::String,
- "block compression algorithm for collection data "
- "[none|snappy|zlib]")
- .format("(:?none)|(:?snappy)|(:?zlib)", "(none/snappy/zlib)")
- .setDefault(moe::Value(std::string("snappy")));
- wiredTigerOptions.addOptionChaining("storage.wiredTiger.collectionConfig.configString",
- "wiredTigerCollectionConfigString",
- moe::String,
- "WiredTiger custom collection configuration settings")
- .hidden();
-
-
- // WiredTiger index options
- wiredTigerOptions.addOptionChaining("storage.wiredTiger.indexConfig.prefixCompression",
- "wiredTigerIndexPrefixCompression",
- moe::Bool,
- "use prefix compression on row-store leaf pages")
- .setDefault(moe::Value(true));
- wiredTigerOptions.addOptionChaining("storage.wiredTiger.indexConfig.configString",
- "wiredTigerIndexConfigString",
- moe::String,
- "WiredTiger custom index configuration settings")
- .hidden();
-
- return options->addSection(wiredTigerOptions);
+WiredTigerGlobalOptions wiredTigerGlobalOptions;
+
+Status WiredTigerGlobalOptions::add(moe::OptionSection* options) {
+ moe::OptionSection wiredTigerOptions("WiredTiger options");
+
+ // WiredTiger storage engine options
+ wiredTigerOptions.addOptionChaining("storage.wiredTiger.engineConfig.cacheSizeGB",
+ "wiredTigerCacheSizeGB",
+ moe::Int,
+ "maximum amount of memory to allocate for cache; "
+ "defaults to 1/2 of physical RAM").validRange(1, 10000);
+ wiredTigerOptions.addOptionChaining(
+ "storage.wiredTiger.engineConfig.statisticsLogDelaySecs",
+ "wiredTigerStatisticsLogDelaySecs",
+ moe::Int,
+ "seconds to wait between each write to a statistics file in the dbpath; "
+ "0 means do not log statistics")
+ .validRange(0, 100000)
+ .setDefault(moe::Value(0));
+ wiredTigerOptions.addOptionChaining("storage.wiredTiger.engineConfig.journalCompressor",
+ "wiredTigerJournalCompressor",
+ moe::String,
+ "use a compressor for log records [none|snappy|zlib]")
+ .format("(:?none)|(:?snappy)|(:?zlib)", "(none/snappy/zlib)")
+ .setDefault(moe::Value(std::string("snappy")));
+ wiredTigerOptions.addOptionChaining("storage.wiredTiger.engineConfig.directoryForIndexes",
+ "wiredTigerDirectoryForIndexes",
+ moe::Switch,
+ "Put indexes and data in different directories");
+ wiredTigerOptions.addOptionChaining("storage.wiredTiger.engineConfig.configString",
+ "wiredTigerEngineConfigString",
+ moe::String,
+ "WiredTiger storage engine custom "
+ "configuration settings").hidden();
+
+ // WiredTiger collection options
+ wiredTigerOptions.addOptionChaining("storage.wiredTiger.collectionConfig.blockCompressor",
+ "wiredTigerCollectionBlockCompressor",
+ moe::String,
+ "block compression algorithm for collection data "
+ "[none|snappy|zlib]")
+ .format("(:?none)|(:?snappy)|(:?zlib)", "(none/snappy/zlib)")
+ .setDefault(moe::Value(std::string("snappy")));
+ wiredTigerOptions.addOptionChaining("storage.wiredTiger.collectionConfig.configString",
+ "wiredTigerCollectionConfigString",
+ moe::String,
+ "WiredTiger custom collection configuration settings")
+ .hidden();
+
+
+ // WiredTiger index options
+ wiredTigerOptions.addOptionChaining("storage.wiredTiger.indexConfig.prefixCompression",
+ "wiredTigerIndexPrefixCompression",
+ moe::Bool,
+ "use prefix compression on row-store leaf pages")
+ .setDefault(moe::Value(true));
+ wiredTigerOptions.addOptionChaining("storage.wiredTiger.indexConfig.configString",
+ "wiredTigerIndexConfigString",
+ moe::String,
+ "WiredTiger custom index configuration settings").hidden();
+
+ return options->addSection(wiredTigerOptions);
+}
+
+Status WiredTigerGlobalOptions::store(const moe::Environment& params,
+ const std::vector<std::string>& args) {
+ // WiredTiger storage engine options
+ if (params.count("storage.wiredTiger.engineConfig.cacheSizeGB")) {
+ wiredTigerGlobalOptions.cacheSizeGB =
+ params["storage.wiredTiger.engineConfig.cacheSizeGB"].as<int>();
+ }
+ if (params.count("storage.syncPeriodSecs")) {
+ wiredTigerGlobalOptions.checkpointDelaySecs =
+ static_cast<size_t>(params["storage.syncPeriodSecs"].as<double>());
+ }
+ if (params.count("storage.wiredTiger.engineConfig.statisticsLogDelaySecs")) {
+ wiredTigerGlobalOptions.statisticsLogDelaySecs =
+ params["storage.wiredTiger.engineConfig.statisticsLogDelaySecs"].as<int>();
+ }
+ if (params.count("storage.wiredTiger.engineConfig.journalCompressor")) {
+ wiredTigerGlobalOptions.journalCompressor =
+ params["storage.wiredTiger.engineConfig.journalCompressor"].as<std::string>();
+ }
+ if (params.count("storage.wiredTiger.engineConfig.directoryForIndexes")) {
+ wiredTigerGlobalOptions.directoryForIndexes =
+ params["storage.wiredTiger.engineConfig.directoryForIndexes"].as<bool>();
+ }
+ if (params.count("storage.wiredTiger.engineConfig.configString")) {
+ wiredTigerGlobalOptions.engineConfig =
+ params["storage.wiredTiger.engineConfig.configString"].as<std::string>();
+ log() << "Engine custom option: " << wiredTigerGlobalOptions.engineConfig;
}
- Status WiredTigerGlobalOptions::store(const moe::Environment& params,
- const std::vector<std::string>& args) {
-
- // WiredTiger storage engine options
- if (params.count("storage.wiredTiger.engineConfig.cacheSizeGB")) {
- wiredTigerGlobalOptions.cacheSizeGB =
- params["storage.wiredTiger.engineConfig.cacheSizeGB"].as<int>();
- }
- if (params.count("storage.syncPeriodSecs")) {
- wiredTigerGlobalOptions.checkpointDelaySecs =
- static_cast<size_t>(params["storage.syncPeriodSecs"].as<double>());
- }
- if (params.count("storage.wiredTiger.engineConfig.statisticsLogDelaySecs")) {
- wiredTigerGlobalOptions.statisticsLogDelaySecs =
- params["storage.wiredTiger.engineConfig.statisticsLogDelaySecs"].as<int>();
- }
- if (params.count("storage.wiredTiger.engineConfig.journalCompressor")) {
- wiredTigerGlobalOptions.journalCompressor =
- params["storage.wiredTiger.engineConfig.journalCompressor"].as<std::string>();
- }
- if (params.count("storage.wiredTiger.engineConfig.directoryForIndexes")) {
- wiredTigerGlobalOptions.directoryForIndexes =
- params["storage.wiredTiger.engineConfig.directoryForIndexes"].as<bool>();
- }
- if (params.count("storage.wiredTiger.engineConfig.configString")) {
- wiredTigerGlobalOptions.engineConfig =
- params["storage.wiredTiger.engineConfig.configString"].as<std::string>();
- log() << "Engine custom option: " << wiredTigerGlobalOptions.engineConfig;
- }
-
- // WiredTiger collection options
- if (params.count("storage.wiredTiger.collectionConfig.blockCompressor")) {
- wiredTigerGlobalOptions.collectionBlockCompressor =
- params["storage.wiredTiger.collectionConfig.blockCompressor"].as<std::string>();
- }
- if (params.count("storage.wiredTiger.collectionConfig.configString")) {
- wiredTigerGlobalOptions.collectionConfig =
- params["storage.wiredTiger.collectionConfig.configString"].as<std::string>();
- log() << "Collection custom option: " << wiredTigerGlobalOptions.collectionConfig;
- }
-
- // WiredTiger index options
- if (params.count("storage.wiredTiger.indexConfig.prefixCompression")) {
- wiredTigerGlobalOptions.useIndexPrefixCompression =
- params["storage.wiredTiger.indexConfig.prefixCompression"].as<bool>();
- }
- if (params.count("storage.wiredTiger.indexConfig.configString")) {
- wiredTigerGlobalOptions.indexConfig =
- params["storage.wiredTiger.indexConfig.configString"].as<std::string>();
- log() << "Index custom option: " << wiredTigerGlobalOptions.indexConfig;
- }
-
- return Status::OK();
+ // WiredTiger collection options
+ if (params.count("storage.wiredTiger.collectionConfig.blockCompressor")) {
+ wiredTigerGlobalOptions.collectionBlockCompressor =
+ params["storage.wiredTiger.collectionConfig.blockCompressor"].as<std::string>();
}
+ if (params.count("storage.wiredTiger.collectionConfig.configString")) {
+ wiredTigerGlobalOptions.collectionConfig =
+ params["storage.wiredTiger.collectionConfig.configString"].as<std::string>();
+ log() << "Collection custom option: " << wiredTigerGlobalOptions.collectionConfig;
+ }
+
+ // WiredTiger index options
+ if (params.count("storage.wiredTiger.indexConfig.prefixCompression")) {
+ wiredTigerGlobalOptions.useIndexPrefixCompression =
+ params["storage.wiredTiger.indexConfig.prefixCompression"].as<bool>();
+ }
+ if (params.count("storage.wiredTiger.indexConfig.configString")) {
+ wiredTigerGlobalOptions.indexConfig =
+ params["storage.wiredTiger.indexConfig.configString"].as<std::string>();
+ log() << "Index custom option: " << wiredTigerGlobalOptions.indexConfig;
+ }
+
+ return Status::OK();
+}
} // namespace mongo
diff --git a/src/mongo/db/storage/wiredtiger/wiredtiger_global_options.h b/src/mongo/db/storage/wiredtiger/wiredtiger_global_options.h
index 622d3d61b03..9e2307ff0fa 100644
--- a/src/mongo/db/storage/wiredtiger/wiredtiger_global_options.h
+++ b/src/mongo/db/storage/wiredtiger/wiredtiger_global_options.h
@@ -35,37 +35,35 @@
namespace mongo {
- namespace moe = mongo::optionenvironment;
+namespace moe = mongo::optionenvironment;
- class WiredTigerGlobalOptions {
- public:
- WiredTigerGlobalOptions() : cacheSizeGB(0),
- checkpointDelaySecs(0),
- statisticsLogDelaySecs(0),
- directoryForIndexes(false),
- useCollectionPrefixCompression(false),
- useIndexPrefixCompression(false)
- {};
+class WiredTigerGlobalOptions {
+public:
+ WiredTigerGlobalOptions()
+ : cacheSizeGB(0),
+ checkpointDelaySecs(0),
+ statisticsLogDelaySecs(0),
+ directoryForIndexes(false),
+ useCollectionPrefixCompression(false),
+ useIndexPrefixCompression(false){};
- Status add(moe::OptionSection* options);
- Status store(const moe::Environment& params, const std::vector<std::string>& args);
+ Status add(moe::OptionSection* options);
+ Status store(const moe::Environment& params, const std::vector<std::string>& args);
- size_t cacheSizeGB;
- size_t checkpointDelaySecs;
- size_t statisticsLogDelaySecs;
- std::string journalCompressor;
- bool directoryForIndexes;
- std::string engineConfig;
+ size_t cacheSizeGB;
+ size_t checkpointDelaySecs;
+ size_t statisticsLogDelaySecs;
+ std::string journalCompressor;
+ bool directoryForIndexes;
+ std::string engineConfig;
- std::string collectionBlockCompressor;
- std::string indexBlockCompressor;
- bool useCollectionPrefixCompression;
- bool useIndexPrefixCompression;
- std::string collectionConfig;
- std::string indexConfig;
-
- };
-
- extern WiredTigerGlobalOptions wiredTigerGlobalOptions;
+ std::string collectionBlockCompressor;
+ std::string indexBlockCompressor;
+ bool useCollectionPrefixCompression;
+ bool useIndexPrefixCompression;
+ std::string collectionConfig;
+ std::string indexConfig;
+};
+extern WiredTigerGlobalOptions wiredTigerGlobalOptions;
}
diff --git a/src/mongo/db/storage/wiredtiger/wiredtiger_index.cpp b/src/mongo/db/storage/wiredtiger/wiredtiger_index.cpp
index 402dc13a0d1..a5abbf61137 100644
--- a/src/mongo/db/storage/wiredtiger/wiredtiger_index.cpp
+++ b/src/mongo/db/storage/wiredtiger/wiredtiger_index.cpp
@@ -59,1119 +59,1110 @@
#if TRACING_ENABLED
#define TRACE_CURSOR log() << "WT index (" << (const void*)&_idx << ") "
-#define TRACE_INDEX log() << "WT index (" << (const void*)this << ") "
+#define TRACE_INDEX log() << "WT index (" << (const void*) this << ") "
#else
-#define TRACE_CURSOR if ( 0 ) log()
-#define TRACE_INDEX if ( 0 ) log()
+#define TRACE_CURSOR \
+ if (0) \
+ log()
+#define TRACE_INDEX \
+ if (0) \
+ log()
#endif
namespace mongo {
namespace {
- using std::string;
- using std::vector;
+using std::string;
+using std::vector;
- static const int TempKeyMaxSize = 1024; // this goes away with SERVER-3372
+static const int TempKeyMaxSize = 1024; // this goes away with SERVER-3372
- static const WiredTigerItem emptyItem(NULL, 0);
+static const WiredTigerItem emptyItem(NULL, 0);
- static const int kMinimumIndexVersion = 6;
- static const int kCurrentIndexVersion = 6; // New indexes use this by default.
- static const int kMaximumIndexVersion = 6;
- BOOST_STATIC_ASSERT(kCurrentIndexVersion >= kMinimumIndexVersion);
- BOOST_STATIC_ASSERT(kCurrentIndexVersion <= kMaximumIndexVersion);
+static const int kMinimumIndexVersion = 6;
+static const int kCurrentIndexVersion = 6; // New indexes use this by default.
+static const int kMaximumIndexVersion = 6;
+BOOST_STATIC_ASSERT(kCurrentIndexVersion >= kMinimumIndexVersion);
+BOOST_STATIC_ASSERT(kCurrentIndexVersion <= kMaximumIndexVersion);
- bool hasFieldNames(const BSONObj& obj) {
- BSONForEach(e, obj) {
- if (e.fieldName()[0])
- return true;
- }
- return false;
+bool hasFieldNames(const BSONObj& obj) {
+ BSONForEach(e, obj) {
+ if (e.fieldName()[0])
+ return true;
}
+ return false;
+}
- BSONObj stripFieldNames(const BSONObj& query) {
- if (!hasFieldNames(query))
- return query;
+BSONObj stripFieldNames(const BSONObj& query) {
+ if (!hasFieldNames(query))
+ return query;
- BSONObjBuilder bb;
- BSONForEach(e, query) {
- bb.appendAs(e, StringData());
- }
- return bb.obj();
+ BSONObjBuilder bb;
+ BSONForEach(e, query) {
+ bb.appendAs(e, StringData());
}
-
- Status checkKeySize(const BSONObj& key) {
- if ( key.objsize() >= TempKeyMaxSize ) {
- string msg = mongoutils::str::stream()
- << "WiredTigerIndex::insert: key too large to index, failing "
- << ' ' << key.objsize() << ' ' << key;
- return Status(ErrorCodes::KeyTooLong, msg);
- }
- return Status::OK();
+ return bb.obj();
+}
+
+Status checkKeySize(const BSONObj& key) {
+ if (key.objsize() >= TempKeyMaxSize) {
+ string msg = mongoutils::str::stream()
+ << "WiredTigerIndex::insert: key too large to index, failing " << ' ' << key.objsize()
+ << ' ' << key;
+ return Status(ErrorCodes::KeyTooLong, msg);
}
-
-} // namespace
-
- Status WiredTigerIndex::dupKeyError(const BSONObj& key) {
- StringBuilder sb;
- sb << "E11000 duplicate key error";
- sb << " collection: " << _collectionNamespace;
- sb << " index: " << _indexName;
- sb << " dup key: " << key;
- return Status(ErrorCodes::DuplicateKey, sb.str());
- }
-
- // static
- StatusWith<std::string> WiredTigerIndex::parseIndexOptions(const BSONObj& options) {
- StringBuilder ss;
- BSONForEach(elem, options) {
- if (elem.fieldNameStringData() == "configString") {
- if (elem.type() != String) {
- return StatusWith<std::string>(ErrorCodes::TypeMismatch, str::stream()
- << "configString must be a string. "
- << "Not adding 'configString' value "
- << elem << " to index configuration");
- }
- ss << elem.valueStringData() << ',';
- }
- else {
- // Return error on first unrecognized field.
- return StatusWith<std::string>(ErrorCodes::InvalidOptions, str::stream()
- << '\'' << elem.fieldNameStringData() << '\''
- << " is not a supported option.");
+ return Status::OK();
+}
+
+} // namespace
+
+Status WiredTigerIndex::dupKeyError(const BSONObj& key) {
+ StringBuilder sb;
+ sb << "E11000 duplicate key error";
+ sb << " collection: " << _collectionNamespace;
+ sb << " index: " << _indexName;
+ sb << " dup key: " << key;
+ return Status(ErrorCodes::DuplicateKey, sb.str());
+}
+
+// static
+StatusWith<std::string> WiredTigerIndex::parseIndexOptions(const BSONObj& options) {
+ StringBuilder ss;
+ BSONForEach(elem, options) {
+ if (elem.fieldNameStringData() == "configString") {
+ if (elem.type() != String) {
+ return StatusWith<std::string>(ErrorCodes::TypeMismatch,
+ str::stream() << "configString must be a string. "
+ << "Not adding 'configString' value "
+ << elem << " to index configuration");
}
+ ss << elem.valueStringData() << ',';
+ } else {
+ // Return error on first unrecognized field.
+ return StatusWith<std::string>(ErrorCodes::InvalidOptions,
+ str::stream() << '\'' << elem.fieldNameStringData()
+ << '\'' << " is not a supported option.");
}
- return StatusWith<std::string>(ss.str());
}
-
- // static
- StatusWith<std::string> WiredTigerIndex::generateCreateString(const std::string& extraConfig,
- const IndexDescriptor& desc) {
- str::stream ss;
-
- // Separate out a prefix and suffix in the default string. User configuration will override
- // values in the prefix, but not values in the suffix. Page sizes are chosen so that index
- // keys (up to 1024 bytes) will not overflow.
- ss << "type=file,internal_page_max=16k,leaf_page_max=16k,";
- ss << "checksum=on,";
- if (wiredTigerGlobalOptions.useIndexPrefixCompression) {
- ss << "prefix_compression=true,";
- }
-
- ss << "block_compressor=" << wiredTigerGlobalOptions.indexBlockCompressor << ",";
- ss << WiredTigerCustomizationHooks::get(
- getGlobalServiceContext())->getOpenConfig(desc.parentNS());
- ss << extraConfig;
-
- // Validate configuration object.
- // Raise an error about unrecognized fields that may be introduced in newer versions of
- // this storage engine.
- // Ensure that 'configString' field is a string. Raise an error if this is not the case.
- BSONElement storageEngineElement = desc.getInfoElement("storageEngine");
- if (storageEngineElement.isABSONObj()) {
- BSONObj storageEngine = storageEngineElement.Obj();
- StatusWith<std::string> parseStatus =
- parseIndexOptions(storageEngine.getObjectField(kWiredTigerEngineName));
- if (!parseStatus.isOK()) {
- return parseStatus;
- }
- if (!parseStatus.getValue().empty()) {
- ss << "," << parseStatus.getValue();
- }
- }
-
- // WARNING: No user-specified config can appear below this line. These options are required
- // for correct behavior of the server.
-
- // Indexes need to store the metadata for collation to work as expected.
- ss << ",key_format=u,value_format=u";
-
- // Index metadata
- ss << ",app_metadata=("
- << "formatVersion=" << kCurrentIndexVersion << ','
- << "infoObj=" << desc.infoObj().jsonString()
- << "),";
-
- LOG(3) << "index create string: " << ss.ss.str();
- return StatusWith<std::string>(ss);
- }
-
- int WiredTigerIndex::Create(OperationContext* txn,
- const std::string& uri,
- const std::string& config) {
- WT_SESSION* s = WiredTigerRecoveryUnit::get( txn )->getSession(txn)->getSession();
- LOG(1) << "create uri: " << uri << " config: " << config;
- return s->create(s, uri.c_str(), config.c_str());
+ return StatusWith<std::string>(ss.str());
+}
+
+// static
+StatusWith<std::string> WiredTigerIndex::generateCreateString(const std::string& extraConfig,
+ const IndexDescriptor& desc) {
+ str::stream ss;
+
+ // Separate out a prefix and suffix in the default string. User configuration will override
+ // values in the prefix, but not values in the suffix. Page sizes are chosen so that index
+ // keys (up to 1024 bytes) will not overflow.
+ ss << "type=file,internal_page_max=16k,leaf_page_max=16k,";
+ ss << "checksum=on,";
+ if (wiredTigerGlobalOptions.useIndexPrefixCompression) {
+ ss << "prefix_compression=true,";
}
- WiredTigerIndex::WiredTigerIndex(OperationContext* ctx,
- const std::string& uri,
- const IndexDescriptor* desc)
- : _ordering(Ordering::make(desc->keyPattern())),
- _uri( uri ),
- _instanceId( WiredTigerSession::genCursorId() ),
- _collectionNamespace( desc->parentNS() ),
- _indexName( desc->indexName() ){
-
- Status versionStatus =
- WiredTigerUtil::checkApplicationMetadataFormatVersion(ctx,
- uri,
- kMinimumIndexVersion,
- kMaximumIndexVersion);
- if (!versionStatus.isOK()) {
- fassertFailedWithStatusNoTrace(28579, versionStatus);
+ ss << "block_compressor=" << wiredTigerGlobalOptions.indexBlockCompressor << ",";
+ ss << WiredTigerCustomizationHooks::get(getGlobalServiceContext())
+ ->getOpenConfig(desc.parentNS());
+ ss << extraConfig;
+
+ // Validate configuration object.
+ // Raise an error about unrecognized fields that may be introduced in newer versions of
+ // this storage engine.
+ // Ensure that 'configString' field is a string. Raise an error if this is not the case.
+ BSONElement storageEngineElement = desc.getInfoElement("storageEngine");
+ if (storageEngineElement.isABSONObj()) {
+ BSONObj storageEngine = storageEngineElement.Obj();
+ StatusWith<std::string> parseStatus =
+ parseIndexOptions(storageEngine.getObjectField(kWiredTigerEngineName));
+ if (!parseStatus.isOK()) {
+ return parseStatus;
+ }
+ if (!parseStatus.getValue().empty()) {
+ ss << "," << parseStatus.getValue();
}
}
- Status WiredTigerIndex::insert(OperationContext* txn,
- const BSONObj& key,
- const RecordId& loc,
- bool dupsAllowed) {
- invariant(loc.isNormal());
- dassert(!hasFieldNames(key));
-
- Status s = checkKeySize(key);
- if (!s.isOK())
- return s;
-
- WiredTigerCursor curwrap(_uri, _instanceId, false, txn);
- curwrap.assertInActiveTxn();
- WT_CURSOR *c = curwrap.get();
-
- return _insert( c, key, loc, dupsAllowed );
- }
-
- void WiredTigerIndex::unindex(OperationContext* txn,
- const BSONObj& key,
- const RecordId& loc,
- bool dupsAllowed ) {
- invariant(loc.isNormal());
- dassert(!hasFieldNames(key));
-
- WiredTigerCursor curwrap(_uri, _instanceId, false, txn);
- curwrap.assertInActiveTxn();
- WT_CURSOR *c = curwrap.get();
- invariant( c );
-
- _unindex( c, key, loc, dupsAllowed );
+ // WARNING: No user-specified config can appear below this line. These options are required
+ // for correct behavior of the server.
+
+ // Indexes need to store the metadata for collation to work as expected.
+ ss << ",key_format=u,value_format=u";
+
+ // Index metadata
+ ss << ",app_metadata=("
+ << "formatVersion=" << kCurrentIndexVersion << ','
+ << "infoObj=" << desc.infoObj().jsonString() << "),";
+
+ LOG(3) << "index create string: " << ss.ss.str();
+ return StatusWith<std::string>(ss);
+}
+
+int WiredTigerIndex::Create(OperationContext* txn,
+ const std::string& uri,
+ const std::string& config) {
+ WT_SESSION* s = WiredTigerRecoveryUnit::get(txn)->getSession(txn)->getSession();
+ LOG(1) << "create uri: " << uri << " config: " << config;
+ return s->create(s, uri.c_str(), config.c_str());
+}
+
+WiredTigerIndex::WiredTigerIndex(OperationContext* ctx,
+ const std::string& uri,
+ const IndexDescriptor* desc)
+ : _ordering(Ordering::make(desc->keyPattern())),
+ _uri(uri),
+ _instanceId(WiredTigerSession::genCursorId()),
+ _collectionNamespace(desc->parentNS()),
+ _indexName(desc->indexName()) {
+ Status versionStatus = WiredTigerUtil::checkApplicationMetadataFormatVersion(
+ ctx, uri, kMinimumIndexVersion, kMaximumIndexVersion);
+ if (!versionStatus.isOK()) {
+ fassertFailedWithStatusNoTrace(28579, versionStatus);
}
-
- void WiredTigerIndex::fullValidate(OperationContext* txn, bool full, long long *numKeysOut,
- BSONObjBuilder* output) const {
- {
- std::vector<std::string> errors;
- int err = WiredTigerUtil::verifyTable(txn, _uri, output ? &errors : NULL);
- if (err == EBUSY) {
- const char* msg = "verify() returned EBUSY. Not treating as invalid.";
- warning() << msg;
- if (output) {
- if (!errors.empty()) {
- *output << "errors" << errors;
- }
- *output << "warning" << msg;
- }
- }
- else if (err) {
- std::string msg = str::stream()
- << "verify() returned " << wiredtiger_strerror(err) << ". "
- << "This indicates structural damage. "
- << "Not examining individual index entries.";
- error() << msg;
- if (output) {
- errors.push_back(msg);
+}
+
+Status WiredTigerIndex::insert(OperationContext* txn,
+ const BSONObj& key,
+ const RecordId& loc,
+ bool dupsAllowed) {
+ invariant(loc.isNormal());
+ dassert(!hasFieldNames(key));
+
+ Status s = checkKeySize(key);
+ if (!s.isOK())
+ return s;
+
+ WiredTigerCursor curwrap(_uri, _instanceId, false, txn);
+ curwrap.assertInActiveTxn();
+ WT_CURSOR* c = curwrap.get();
+
+ return _insert(c, key, loc, dupsAllowed);
+}
+
+void WiredTigerIndex::unindex(OperationContext* txn,
+ const BSONObj& key,
+ const RecordId& loc,
+ bool dupsAllowed) {
+ invariant(loc.isNormal());
+ dassert(!hasFieldNames(key));
+
+ WiredTigerCursor curwrap(_uri, _instanceId, false, txn);
+ curwrap.assertInActiveTxn();
+ WT_CURSOR* c = curwrap.get();
+ invariant(c);
+
+ _unindex(c, key, loc, dupsAllowed);
+}
+
+void WiredTigerIndex::fullValidate(OperationContext* txn,
+ bool full,
+ long long* numKeysOut,
+ BSONObjBuilder* output) const {
+ {
+ std::vector<std::string> errors;
+ int err = WiredTigerUtil::verifyTable(txn, _uri, output ? &errors : NULL);
+ if (err == EBUSY) {
+ const char* msg = "verify() returned EBUSY. Not treating as invalid.";
+ warning() << msg;
+ if (output) {
+ if (!errors.empty()) {
*output << "errors" << errors;
- *output << "valid" << false;
}
- return;
+ *output << "warning" << msg;
+ }
+ } else if (err) {
+ std::string msg = str::stream() << "verify() returned " << wiredtiger_strerror(err)
+ << ". "
+ << "This indicates structural damage. "
+ << "Not examining individual index entries.";
+ error() << msg;
+ if (output) {
+ errors.push_back(msg);
+ *output << "errors" << errors;
+ *output << "valid" << false;
}
+ return;
}
+ }
- if (output) *output << "valid" << true;
-
- auto cursor = newCursor(txn);
- long long count = 0;
- TRACE_INDEX << " fullValidate";
-
- const auto requestedInfo = TRACING_ENABLED ? Cursor::kKeyAndLoc : Cursor::kJustExistance;
- for (auto kv = cursor->seek(BSONObj(), true, requestedInfo); kv; kv = cursor->next()) {
- TRACE_INDEX << "\t" << kv->key << ' ' << kv->loc;
- count++;
- }
+ if (output)
+ *output << "valid" << true;
- if ( numKeysOut ) {
- *numKeysOut = count;
- }
+ auto cursor = newCursor(txn);
+ long long count = 0;
+ TRACE_INDEX << " fullValidate";
- // Nothing further to do if 'full' validation is not requested.
- if (!full) {
- return;
- }
+ const auto requestedInfo = TRACING_ENABLED ? Cursor::kKeyAndLoc : Cursor::kJustExistance;
+ for (auto kv = cursor->seek(BSONObj(), true, requestedInfo); kv; kv = cursor->next()) {
+ TRACE_INDEX << "\t" << kv->key << ' ' << kv->loc;
+ count++;
+ }
- invariant(output);
+ if (numKeysOut) {
+ *numKeysOut = count;
}
- bool WiredTigerIndex::appendCustomStats(OperationContext* txn,
- BSONObjBuilder* output,
- double scale) const {
+ // Nothing further to do if 'full' validation is not requested.
+ if (!full) {
+ return;
+ }
- {
- BSONObjBuilder metadata(output->subobjStart("metadata"));
- Status status = WiredTigerUtil::getApplicationMetadata(txn, uri(), &metadata);
- if (!status.isOK()) {
- metadata.append("error", "unable to retrieve metadata");
- metadata.append("code", static_cast<int>(status.code()));
- metadata.append("reason", status.reason());
- }
- }
- std::string type, sourceURI;
- WiredTigerUtil::fetchTypeAndSourceURI(txn, _uri, &type, &sourceURI);
- StatusWith<std::string> metadataResult = WiredTigerUtil::getMetadata(txn, sourceURI);
- StringData creationStringName("creationString");
- if (!metadataResult.isOK()) {
- BSONObjBuilder creationString(output->subobjStart(creationStringName));
- creationString.append("error", "unable to retrieve creation config");
- creationString.append("code", static_cast<int>(metadataResult.getStatus().code()));
- creationString.append("reason", metadataResult.getStatus().reason());
- }
- else {
- output->append(creationStringName, metadataResult.getValue());
- // Type can be "lsm" or "file"
- output->append("type", type);
- }
+ invariant(output);
+}
- WiredTigerSession* session = WiredTigerRecoveryUnit::get(txn)->getSession(txn);
- WT_SESSION* s = session->getSession();
- Status status = WiredTigerUtil::exportTableToBSON(s, "statistics:" + uri(),
- "statistics=(fast)", output);
+bool WiredTigerIndex::appendCustomStats(OperationContext* txn,
+ BSONObjBuilder* output,
+ double scale) const {
+ {
+ BSONObjBuilder metadata(output->subobjStart("metadata"));
+ Status status = WiredTigerUtil::getApplicationMetadata(txn, uri(), &metadata);
if (!status.isOK()) {
- output->append("error", "unable to retrieve statistics");
- output->append("code", static_cast<int>(status.code()));
- output->append("reason", status.reason());
+ metadata.append("error", "unable to retrieve metadata");
+ metadata.append("code", static_cast<int>(status.code()));
+ metadata.append("reason", status.reason());
}
- return true;
}
-
- Status WiredTigerIndex::dupKeyCheck( OperationContext* txn,
- const BSONObj& key,
- const RecordId& loc) {
- invariant(!hasFieldNames(key));
- invariant(unique());
-
- WiredTigerCursor curwrap(_uri, _instanceId, false, txn);
- WT_CURSOR *c = curwrap.get();
-
- if ( isDup(c, key, loc) )
- return dupKeyError(key);
- return Status::OK();
+ std::string type, sourceURI;
+ WiredTigerUtil::fetchTypeAndSourceURI(txn, _uri, &type, &sourceURI);
+ StatusWith<std::string> metadataResult = WiredTigerUtil::getMetadata(txn, sourceURI);
+ StringData creationStringName("creationString");
+ if (!metadataResult.isOK()) {
+ BSONObjBuilder creationString(output->subobjStart(creationStringName));
+ creationString.append("error", "unable to retrieve creation config");
+ creationString.append("code", static_cast<int>(metadataResult.getStatus().code()));
+ creationString.append("reason", metadataResult.getStatus().reason());
+ } else {
+ output->append(creationStringName, metadataResult.getValue());
+ // Type can be "lsm" or "file"
+ output->append("type", type);
}
- bool WiredTigerIndex::isEmpty(OperationContext* txn) {
- WiredTigerCursor curwrap(_uri, _instanceId, false, txn);
- WT_CURSOR *c = curwrap.get();
- if (!c)
- return true;
- int ret = WT_OP_CHECK(c->next(c));
- if (ret == WT_NOTFOUND)
- return true;
- invariantWTOK(ret);
+ WiredTigerSession* session = WiredTigerRecoveryUnit::get(txn)->getSession(txn);
+ WT_SESSION* s = session->getSession();
+ Status status =
+ WiredTigerUtil::exportTableToBSON(s, "statistics:" + uri(), "statistics=(fast)", output);
+ if (!status.isOK()) {
+ output->append("error", "unable to retrieve statistics");
+ output->append("code", static_cast<int>(status.code()));
+ output->append("reason", status.reason());
+ }
+ return true;
+}
+
+Status WiredTigerIndex::dupKeyCheck(OperationContext* txn,
+ const BSONObj& key,
+ const RecordId& loc) {
+ invariant(!hasFieldNames(key));
+ invariant(unique());
+
+ WiredTigerCursor curwrap(_uri, _instanceId, false, txn);
+ WT_CURSOR* c = curwrap.get();
+
+ if (isDup(c, key, loc))
+ return dupKeyError(key);
+ return Status::OK();
+}
+
+bool WiredTigerIndex::isEmpty(OperationContext* txn) {
+ WiredTigerCursor curwrap(_uri, _instanceId, false, txn);
+ WT_CURSOR* c = curwrap.get();
+ if (!c)
+ return true;
+ int ret = WT_OP_CHECK(c->next(c));
+ if (ret == WT_NOTFOUND)
+ return true;
+ invariantWTOK(ret);
+ return false;
+}
+
+long long WiredTigerIndex::getSpaceUsedBytes(OperationContext* txn) const {
+ WiredTigerSession* session = WiredTigerRecoveryUnit::get(txn)->getSession(txn);
+ return static_cast<long long>(WiredTigerUtil::getIdentSize(session->getSession(), _uri));
+}
+
+bool WiredTigerIndex::isDup(WT_CURSOR* c, const BSONObj& key, const RecordId& loc) {
+ invariant(unique());
+ // First check whether the key exists.
+ KeyString data(key, _ordering);
+ WiredTigerItem item(data.getBuffer(), data.getSize());
+ c->set_key(c, item.Get());
+ int ret = WT_OP_CHECK(c->search(c));
+ if (ret == WT_NOTFOUND) {
return false;
}
+ invariantWTOK(ret);
+
+ // If the key exists, check if we already have this loc at this key. If so, we don't
+ // consider that to be a dup.
+ WT_ITEM value;
+ invariantWTOK(c->get_value(c, &value));
+ BufReader br(value.data, value.size);
+ while (br.remaining()) {
+ if (KeyString::decodeRecordId(&br) == loc)
+ return false;
- long long WiredTigerIndex::getSpaceUsedBytes( OperationContext* txn ) const {
- WiredTigerSession* session = WiredTigerRecoveryUnit::get(txn)->getSession(txn);
- return static_cast<long long>( WiredTigerUtil::getIdentSize( session->getSession(),
- _uri ) );
+ KeyString::TypeBits::fromBuffer(&br); // Just calling this to advance reader.
}
+ return true;
+}
- bool WiredTigerIndex::isDup(WT_CURSOR *c, const BSONObj& key, const RecordId& loc ) {
- invariant( unique() );
- // First check whether the key exists.
- KeyString data( key, _ordering );
- WiredTigerItem item( data.getBuffer(), data.getSize() );
- c->set_key( c, item.Get() );
- int ret = WT_OP_CHECK(c->search(c));
- if (ret == WT_NOTFOUND) {
- return false;
- }
- invariantWTOK( ret );
-
- // If the key exists, check if we already have this loc at this key. If so, we don't
- // consider that to be a dup.
- WT_ITEM value;
- invariantWTOK( c->get_value(c,&value) );
- BufReader br(value.data, value.size);
- while (br.remaining()) {
- if (KeyString::decodeRecordId(&br) == loc)
- return false;
-
- KeyString::TypeBits::fromBuffer(&br); // Just calling this to advance reader.
- }
- return true;
- }
+Status WiredTigerIndex::initAsEmpty(OperationContext* txn) {
+ // No-op
+ return Status::OK();
+}
- Status WiredTigerIndex::initAsEmpty(OperationContext* txn) {
- // No-op
- return Status::OK();
+/**
+ * Base class for WiredTigerIndex bulk builders.
+ *
+ * Manages the bulk cursor used by bulk builders.
+ */
+class WiredTigerIndex::BulkBuilder : public SortedDataBuilderInterface {
+public:
+ BulkBuilder(WiredTigerIndex* idx, OperationContext* txn)
+ : _ordering(idx->_ordering),
+ _txn(txn),
+ _session(WiredTigerRecoveryUnit::get(_txn)->getSessionCache()->getSession()),
+ _cursor(openBulkCursor(idx)) {}
+
+ ~BulkBuilder() {
+ _cursor->close(_cursor);
+ WiredTigerRecoveryUnit::get(_txn)->getSessionCache()->releaseSession(_session);
}
- /**
- * Base class for WiredTigerIndex bulk builders.
- *
- * Manages the bulk cursor used by bulk builders.
- */
- class WiredTigerIndex::BulkBuilder : public SortedDataBuilderInterface {
- public:
- BulkBuilder(WiredTigerIndex* idx, OperationContext* txn)
- : _ordering(idx->_ordering)
- , _txn(txn)
- , _session(WiredTigerRecoveryUnit::get(_txn)->getSessionCache()->getSession())
- , _cursor(openBulkCursor(idx))
- {}
-
- ~BulkBuilder() {
- _cursor->close(_cursor);
- WiredTigerRecoveryUnit::get(_txn)->getSessionCache()->releaseSession(_session);
- }
-
- protected:
- WT_CURSOR* openBulkCursor(WiredTigerIndex* idx) {
- // Open cursors can cause bulk open_cursor to fail with EBUSY.
- // TODO any other cases that could cause EBUSY?
- WiredTigerSession* outerSession = WiredTigerRecoveryUnit::get(_txn)->getSession(_txn);
- outerSession->closeAllCursors();
-
- // Not using cursor cache since we need to set "bulk".
- WT_CURSOR* cursor;
- // We use our own session to ensure we aren't in a transaction.
- WT_SESSION* session = _session->getSession();
- int err = session->open_cursor(session, idx->uri().c_str(), NULL, "bulk", &cursor);
- if (!err)
- return cursor;
-
- warning() << "failed to create WiredTiger bulk cursor: " << wiredtiger_strerror(err);
- warning() << "falling back to non-bulk cursor for index " << idx->uri();
-
- invariantWTOK(session->open_cursor(session, idx->uri().c_str(), NULL, NULL, &cursor));
+protected:
+ WT_CURSOR* openBulkCursor(WiredTigerIndex* idx) {
+ // Open cursors can cause bulk open_cursor to fail with EBUSY.
+ // TODO any other cases that could cause EBUSY?
+ WiredTigerSession* outerSession = WiredTigerRecoveryUnit::get(_txn)->getSession(_txn);
+ outerSession->closeAllCursors();
+
+ // Not using cursor cache since we need to set "bulk".
+ WT_CURSOR* cursor;
+ // We use our own session to ensure we aren't in a transaction.
+ WT_SESSION* session = _session->getSession();
+ int err = session->open_cursor(session, idx->uri().c_str(), NULL, "bulk", &cursor);
+ if (!err)
return cursor;
- }
- const Ordering _ordering;
- OperationContext* const _txn;
- WiredTigerSession* const _session;
- WT_CURSOR* const _cursor;
- };
+ warning() << "failed to create WiredTiger bulk cursor: " << wiredtiger_strerror(err);
+ warning() << "falling back to non-bulk cursor for index " << idx->uri();
- /**
- * Bulk builds a non-unique index.
- */
- class WiredTigerIndex::StandardBulkBuilder : public BulkBuilder {
- public:
- StandardBulkBuilder(WiredTigerIndex* idx, OperationContext* txn)
- : BulkBuilder(idx, txn), _idx(idx) {
- }
-
- Status addKey(const BSONObj& key, const RecordId& loc) {
- {
- const Status s = checkKeySize(key);
- if (!s.isOK())
- return s;
- }
-
- KeyString data( key, _idx->_ordering, loc );
+ invariantWTOK(session->open_cursor(session, idx->uri().c_str(), NULL, NULL, &cursor));
+ return cursor;
+ }
- // Can't use WiredTigerCursor since we aren't using the cache.
- WiredTigerItem item(data.getBuffer(), data.getSize());
- _cursor->set_key(_cursor, item.Get() );
+ const Ordering _ordering;
+ OperationContext* const _txn;
+ WiredTigerSession* const _session;
+ WT_CURSOR* const _cursor;
+};
- WiredTigerItem valueItem =
- data.getTypeBits().isAllZeros() ? emptyItem
- : WiredTigerItem(data.getTypeBits().getBuffer(),
- data.getTypeBits().getSize());
+/**
+ * Bulk builds a non-unique index.
+ */
+class WiredTigerIndex::StandardBulkBuilder : public BulkBuilder {
+public:
+ StandardBulkBuilder(WiredTigerIndex* idx, OperationContext* txn)
+ : BulkBuilder(idx, txn), _idx(idx) {}
- _cursor->set_value(_cursor, valueItem.Get());
+ Status addKey(const BSONObj& key, const RecordId& loc) {
+ {
+ const Status s = checkKeySize(key);
+ if (!s.isOK())
+ return s;
+ }
- invariantWTOK(_cursor->insert(_cursor));
+ KeyString data(key, _idx->_ordering, loc);
- return Status::OK();
- }
+ // Can't use WiredTigerCursor since we aren't using the cache.
+ WiredTigerItem item(data.getBuffer(), data.getSize());
+ _cursor->set_key(_cursor, item.Get());
- void commit(bool mayInterrupt) {
- // TODO do we still need this?
- // this is bizarre, but required as part of the contract
- WriteUnitOfWork uow( _txn );
- uow.commit();
- }
+ WiredTigerItem valueItem = data.getTypeBits().isAllZeros()
+ ? emptyItem
+ : WiredTigerItem(data.getTypeBits().getBuffer(), data.getTypeBits().getSize());
- private:
- WiredTigerIndex* _idx;
- };
+ _cursor->set_value(_cursor, valueItem.Get());
- /**
- * Bulk builds a unique index.
- *
- * In order to support unique indexes in dupsAllowed mode this class only does an actual insert
- * after it sees a key after the one we are trying to insert. This allows us to gather up all
- * duplicate locs and insert them all together. This is necessary since bulk cursors can only
- * append data.
- */
- class WiredTigerIndex::UniqueBulkBuilder : public BulkBuilder {
- public:
- UniqueBulkBuilder(WiredTigerIndex* idx, OperationContext* txn, bool dupsAllowed)
- : BulkBuilder(idx, txn), _idx(idx), _dupsAllowed(dupsAllowed) {
- }
+ invariantWTOK(_cursor->insert(_cursor));
- Status addKey(const BSONObj& newKey, const RecordId& loc) {
- {
- const Status s = checkKeySize(newKey);
- if (!s.isOK())
- return s;
- }
+ return Status::OK();
+ }
- const int cmp = newKey.woCompare(_key, _ordering);
- if (cmp != 0) {
- if (!_key.isEmpty()) { // _key.isEmpty() is only true on the first call to addKey().
- invariant(cmp > 0); // newKey must be > the last key
- // We are done with dups of the last key so we can insert it now.
- doInsert();
- }
- invariant(_records.empty());
- }
- else {
- // Dup found!
- if (!_dupsAllowed) {
- return _idx->dupKeyError(newKey);
- }
+ void commit(bool mayInterrupt) {
+ // TODO do we still need this?
+ // this is bizarre, but required as part of the contract
+ WriteUnitOfWork uow(_txn);
+ uow.commit();
+ }
- // If we get here, we are in the weird mode where dups are allowed on a unique
- // index, so add ourselves to the list of duplicate locs. This also replaces the
- // _key which is correct since any dups seen later are likely to be newer.
- }
+private:
+ WiredTigerIndex* _idx;
+};
- _key = newKey.getOwned();
- _keyString.resetToKey(_key, _idx->ordering());
- _records.push_back(std::make_pair(loc, _keyString.getTypeBits()));
+/**
+ * Bulk builds a unique index.
+ *
+ * In order to support unique indexes in dupsAllowed mode this class only does an actual insert
+ * after it sees a key after the one we are trying to insert. This allows us to gather up all
+ * duplicate locs and insert them all together. This is necessary since bulk cursors can only
+ * append data.
+ */
+class WiredTigerIndex::UniqueBulkBuilder : public BulkBuilder {
+public:
+ UniqueBulkBuilder(WiredTigerIndex* idx, OperationContext* txn, bool dupsAllowed)
+ : BulkBuilder(idx, txn), _idx(idx), _dupsAllowed(dupsAllowed) {}
- return Status::OK();
+ Status addKey(const BSONObj& newKey, const RecordId& loc) {
+ {
+ const Status s = checkKeySize(newKey);
+ if (!s.isOK())
+ return s;
}
- void commit(bool mayInterrupt) {
- WriteUnitOfWork uow( _txn );
- if (!_records.empty()) {
- // This handles inserting the last unique key.
+ const int cmp = newKey.woCompare(_key, _ordering);
+ if (cmp != 0) {
+ if (!_key.isEmpty()) { // _key.isEmpty() is only true on the first call to addKey().
+ invariant(cmp > 0); // newKey must be > the last key
+ // We are done with dups of the last key so we can insert it now.
doInsert();
}
- uow.commit();
- }
-
- private:
- void doInsert() {
- invariant(!_records.empty());
-
- KeyString value;
- for (size_t i = 0; i < _records.size(); i++) {
- value.appendRecordId(_records[i].first);
- // When there is only one record, we can omit AllZeros TypeBits. Otherwise they need
- // to be included.
- if (!(_records[i].second.isAllZeros() && _records.size() == 1)) {
- value.appendTypeBits(_records[i].second);
- }
+ invariant(_records.empty());
+ } else {
+ // Dup found!
+ if (!_dupsAllowed) {
+ return _idx->dupKeyError(newKey);
}
-
- WiredTigerItem keyItem( _keyString.getBuffer(), _keyString.getSize() );
- WiredTigerItem valueItem(value.getBuffer(), value.getSize());
-
- _cursor->set_key(_cursor, keyItem.Get());
- _cursor->set_value(_cursor, valueItem.Get());
- invariantWTOK(_cursor->insert(_cursor));
-
- _records.clear();
+ // If we get here, we are in the weird mode where dups are allowed on a unique
+ // index, so add ourselves to the list of duplicate locs. This also replaces the
+ // _key which is correct since any dups seen later are likely to be newer.
}
- WiredTigerIndex* _idx;
- const bool _dupsAllowed;
- BSONObj _key;
- KeyString _keyString;
- std::vector<std::pair<RecordId, KeyString::TypeBits> > _records;
- };
+ _key = newKey.getOwned();
+ _keyString.resetToKey(_key, _idx->ordering());
+ _records.push_back(std::make_pair(loc, _keyString.getTypeBits()));
-namespace {
+ return Status::OK();
+ }
- /**
- * Implements the basic WT_CURSOR functionality used by both unique and standard indexes.
- */
- class WiredTigerIndexCursorBase : public SortedDataInterface::Cursor {
- public:
- WiredTigerIndexCursorBase(const WiredTigerIndex& idx, OperationContext *txn, bool forward)
- : _txn(txn),
- _cursor(idx.uri(), idx.instanceId(), false, txn),
- _idx(idx),
- _forward(forward) {
+ void commit(bool mayInterrupt) {
+ WriteUnitOfWork uow(_txn);
+ if (!_records.empty()) {
+ // This handles inserting the last unique key.
+ doInsert();
}
+ uow.commit();
+ }
- boost::optional<IndexKeyEntry> next(RequestedInfo parts) override {
- // Advance on a cursor at the end is a no-op
- if (_eof) return {};
-
- if (!_lastMoveWasRestore) advanceWTCursor();
- updatePosition();
- return curr(parts);
- }
+private:
+ void doInsert() {
+ invariant(!_records.empty());
- void setEndPosition(const BSONObj& key, bool inclusive) override {
- TRACE_CURSOR << "setEndPosition inclusive: " << inclusive << ' ' << key;
- if (key.isEmpty()) {
- // This means scan to end of index.
- _endPosition.reset();
- return;
+ KeyString value;
+ for (size_t i = 0; i < _records.size(); i++) {
+ value.appendRecordId(_records[i].first);
+ // When there is only one record, we can omit AllZeros TypeBits. Otherwise they need
+ // to be included.
+ if (!(_records[i].second.isAllZeros() && _records.size() == 1)) {
+ value.appendTypeBits(_records[i].second);
}
-
- // NOTE: this uses the opposite rules as a normal seek because a forward scan should
- // end after the key if inclusive and before if exclusive.
- const auto discriminator = _forward == inclusive ? KeyString::kExclusiveAfter
- : KeyString::kExclusiveBefore;
- _endPosition = stdx::make_unique<KeyString>();
- _endPosition->resetToKey(stripFieldNames(key), _idx.ordering(), discriminator);
}
- boost::optional<IndexKeyEntry> seek(const BSONObj& key, bool inclusive,
- RequestedInfo parts) override {
- const BSONObj finalKey = stripFieldNames(key);
- const auto discriminator = _forward == inclusive ? KeyString::kExclusiveBefore
- : KeyString::kExclusiveAfter;
-
- // By using a discriminator other than kInclusive, there is no need to distinguish
- // unique vs non-unique key formats since both start with the key.
- _query.resetToKey(finalKey, _idx.ordering(), discriminator);
- seekWTCursor(_query);
- updatePosition();
- return curr(parts);
- }
+ WiredTigerItem keyItem(_keyString.getBuffer(), _keyString.getSize());
+ WiredTigerItem valueItem(value.getBuffer(), value.getSize());
- boost::optional<IndexKeyEntry> seek(const IndexSeekPoint& seekPoint,
- RequestedInfo parts) override {
- // TODO: don't go to a bson obj then to a KeyString, go straight
- BSONObj key = IndexEntryComparison::makeQueryObject(seekPoint, _forward);
-
- // makeQueryObject handles the discriminator in the real exclusive cases.
- const auto discriminator = _forward ? KeyString::kExclusiveBefore
- : KeyString::kExclusiveAfter;
- _query.resetToKey(key, _idx.ordering(), discriminator);
- seekWTCursor(_query);
- updatePosition();
- return curr(parts);
- }
+ _cursor->set_key(_cursor, keyItem.Get());
+ _cursor->set_value(_cursor, valueItem.Get());
- void savePositioned() override {
- if (!_txn) return; // still saved
+ invariantWTOK(_cursor->insert(_cursor));
- _savedForCheck = _txn->recoveryUnit();
+ _records.clear();
+ }
- if (!wt_keeptxnopen()) {
- try {
- _cursor.reset();
- }
- catch (const WriteConflictException& wce) {
- // Ignore since this is only called when we are about to kill our transaction
- // anyway.
- }
+ WiredTigerIndex* _idx;
+ const bool _dupsAllowed;
+ BSONObj _key;
+ KeyString _keyString;
+ std::vector<std::pair<RecordId, KeyString::TypeBits>> _records;
+};
- // Our saved position is wherever we were when we last called updatePosition().
- // Any partially completed repositions should not effect our saved position.
- }
+namespace {
- _txn = NULL;
- }
+/**
+ * Implements the basic WT_CURSOR functionality used by both unique and standard indexes.
+ */
+class WiredTigerIndexCursorBase : public SortedDataInterface::Cursor {
+public:
+ WiredTigerIndexCursorBase(const WiredTigerIndex& idx, OperationContext* txn, bool forward)
+ : _txn(txn),
+ _cursor(idx.uri(), idx.instanceId(), false, txn),
+ _idx(idx),
+ _forward(forward) {}
+
+ boost::optional<IndexKeyEntry> next(RequestedInfo parts) override {
+ // Advance on a cursor at the end is a no-op
+ if (_eof)
+ return {};
+
+ if (!_lastMoveWasRestore)
+ advanceWTCursor();
+ updatePosition();
+ return curr(parts);
+ }
- void saveUnpositioned() override {
- savePositioned();
- _eof = true;
+ void setEndPosition(const BSONObj& key, bool inclusive) override {
+ TRACE_CURSOR << "setEndPosition inclusive: " << inclusive << ' ' << key;
+ if (key.isEmpty()) {
+ // This means scan to end of index.
+ _endPosition.reset();
+ return;
}
- void restore(OperationContext *txn) override {
- // Update the session handle with our new operation context.
- invariant( _savedForCheck == txn->recoveryUnit() );
- _txn = txn;
-
- if (!wt_keeptxnopen()) {
- if (!_eof) {
- // Ensure an active session exists, so any restored cursors will bind to it
- WiredTigerRecoveryUnit::get(txn)->getSession(txn);
- _lastMoveWasRestore = !seekWTCursor(_key);
- TRACE_CURSOR << "restore _lastMoveWasRestore:" << _lastMoveWasRestore;
- }
- }
- }
+ // NOTE: this uses the opposite rules as a normal seek because a forward scan should
+ // end after the key if inclusive and before if exclusive.
+ const auto discriminator =
+ _forward == inclusive ? KeyString::kExclusiveAfter : KeyString::kExclusiveBefore;
+ _endPosition = stdx::make_unique<KeyString>();
+ _endPosition->resetToKey(stripFieldNames(key), _idx.ordering(), discriminator);
+ }
- protected:
- // Called after _key has been filled in. Must not throw WriteConflictException.
- virtual void updateLocAndTypeBits() = 0;
+ boost::optional<IndexKeyEntry> seek(const BSONObj& key,
+ bool inclusive,
+ RequestedInfo parts) override {
+ const BSONObj finalKey = stripFieldNames(key);
+ const auto discriminator =
+ _forward == inclusive ? KeyString::kExclusiveBefore : KeyString::kExclusiveAfter;
+
+ // By using a discriminator other than kInclusive, there is no need to distinguish
+ // unique vs non-unique key formats since both start with the key.
+ _query.resetToKey(finalKey, _idx.ordering(), discriminator);
+ seekWTCursor(_query);
+ updatePosition();
+ return curr(parts);
+ }
- boost::optional<IndexKeyEntry> curr(RequestedInfo parts) const {
- if (_eof) return {};
+ boost::optional<IndexKeyEntry> seek(const IndexSeekPoint& seekPoint,
+ RequestedInfo parts) override {
+ // TODO: don't go to a bson obj then to a KeyString, go straight
+ BSONObj key = IndexEntryComparison::makeQueryObject(seekPoint, _forward);
+
+ // makeQueryObject handles the discriminator in the real exclusive cases.
+ const auto discriminator =
+ _forward ? KeyString::kExclusiveBefore : KeyString::kExclusiveAfter;
+ _query.resetToKey(key, _idx.ordering(), discriminator);
+ seekWTCursor(_query);
+ updatePosition();
+ return curr(parts);
+ }
- dassert(!atOrPastEndPointAfterSeeking());
- dassert(!_loc.isNull());
+ void savePositioned() override {
+ if (!_txn)
+ return; // still saved
- BSONObj bson;
- if (TRACING_ENABLED || (parts & kWantKey)) {
- bson = KeyString::toBson(_key.getBuffer(), _key.getSize(), _idx.ordering(),
- _typeBits);
+ _savedForCheck = _txn->recoveryUnit();
- TRACE_CURSOR << " returning " << bson << ' ' << _loc;
+ if (!wt_keeptxnopen()) {
+ try {
+ _cursor.reset();
+ } catch (const WriteConflictException& wce) {
+ // Ignore since this is only called when we are about to kill our transaction
+ // anyway.
}
- return {{std::move(bson), _loc}};
+ // Our saved position is wherever we were when we last called updatePosition().
+ // Any partially completed repositions should not effect our saved position.
}
- bool atOrPastEndPointAfterSeeking() const {
- if (_eof) return true;
- if (!_endPosition) return false;
-
- const int cmp = _key.compare(*_endPosition);
-
- // We set up _endPosition to be in between the last in-range value and the first
- // out-of-range value. In particular, it is constructed to never equal any legal index
- // key.
- dassert(cmp != 0);
+ _txn = NULL;
+ }
- if (_forward) {
- // We may have landed after the end point.
- return cmp > 0;
- }
- else {
- // We may have landed before the end point.
- return cmp < 0;
- }
- }
+ void saveUnpositioned() override {
+ savePositioned();
+ _eof = true;
+ }
- void advanceWTCursor() {
- WT_CURSOR *c = _cursor.get();
- int ret = WT_OP_CHECK(_forward ? c->next(c) : c->prev(c));
- if ( ret == WT_NOTFOUND ) {
- _cursorAtEof = true;
- return;
+ void restore(OperationContext* txn) override {
+ // Update the session handle with our new operation context.
+ invariant(_savedForCheck == txn->recoveryUnit());
+ _txn = txn;
+
+ if (!wt_keeptxnopen()) {
+ if (!_eof) {
+ // Ensure an active session exists, so any restored cursors will bind to it
+ WiredTigerRecoveryUnit::get(txn)->getSession(txn);
+ _lastMoveWasRestore = !seekWTCursor(_key);
+ TRACE_CURSOR << "restore _lastMoveWasRestore:" << _lastMoveWasRestore;
}
- invariantWTOK(ret);
- _cursorAtEof = false;
}
+ }
- // Seeks to query. Returns true on exact match.
- bool seekWTCursor(const KeyString& query) {
- WT_CURSOR *c = _cursor.get();
-
- int cmp = -1;
- const WiredTigerItem keyItem(query.getBuffer(), query.getSize());
- c->set_key(c, keyItem.Get());
-
- int ret = WT_OP_CHECK(c->search_near(c, &cmp));
- if ( ret == WT_NOTFOUND ) {
- _cursorAtEof = true;
- TRACE_CURSOR << "\t not found";
- return false;
- }
- invariantWTOK( ret );
- _cursorAtEof = false;
+protected:
+ // Called after _key has been filled in. Must not throw WriteConflictException.
+ virtual void updateLocAndTypeBits() = 0;
- TRACE_CURSOR << "\t cmp: " << cmp;
+ boost::optional<IndexKeyEntry> curr(RequestedInfo parts) const {
+ if (_eof)
+ return {};
- if (cmp == 0) {
- // Found it!
- return true;
- }
+ dassert(!atOrPastEndPointAfterSeeking());
+ dassert(!_loc.isNull());
- // Make sure we land on a matching key (after/before for forward/reverse).
- if (_forward ? cmp < 0 : cmp > 0) {
- advanceWTCursor();
- }
+ BSONObj bson;
+ if (TRACING_ENABLED || (parts & kWantKey)) {
+ bson = KeyString::toBson(_key.getBuffer(), _key.getSize(), _idx.ordering(), _typeBits);
- return false;
+ TRACE_CURSOR << " returning " << bson << ' ' << _loc;
}
- /**
- * This must be called after moving the cursor to update our cached position. It should not
- * be called after a restore that did not restore to original state since that does not
- * logically move the cursor until the following call to next().
- */
- void updatePosition() {
- _lastMoveWasRestore = false;
- if (_cursorAtEof) {
- _eof = true;
- _loc = RecordId();
- return;
- }
+ return {{std::move(bson), _loc}};
+ }
- _eof = false;
+ bool atOrPastEndPointAfterSeeking() const {
+ if (_eof)
+ return true;
+ if (!_endPosition)
+ return false;
- WT_CURSOR *c = _cursor.get();
- WT_ITEM item;
- invariantWTOK(c->get_key(c, &item));
- _key.resetFromBuffer(item.data, item.size);
+ const int cmp = _key.compare(*_endPosition);
- if (atOrPastEndPointAfterSeeking()) {
- _eof = true;
- return;
- }
+ // We set up _endPosition to be in between the last in-range value and the first
+ // out-of-range value. In particular, it is constructed to never equal any legal index
+ // key.
+ dassert(cmp != 0);
- updateLocAndTypeBits();
+ if (_forward) {
+ // We may have landed after the end point.
+ return cmp > 0;
+ } else {
+ // We may have landed before the end point.
+ return cmp < 0;
}
+ }
- OperationContext *_txn;
- WiredTigerCursor _cursor;
- const WiredTigerIndex& _idx; // not owned
- const bool _forward;
-
- // Ensures we have the same RU at restore time.
- RecoveryUnit* _savedForCheck;
-
- // These are where this cursor instance is. They are not changed in the face of a failing
- // next().
- KeyString _key;
- KeyString::TypeBits _typeBits;
- RecordId _loc;
- bool _eof = false;
-
- // This differs from _eof in that it always reflects the result of the most recent call to
- // reposition _cursor.
- bool _cursorAtEof = false;
-
- // Used by next to decide to return current position rather than moving. Should be reset to
- // false by any operation that moves the cursor, other than subsequent save/restore pairs.
- bool _lastMoveWasRestore = false;
+ void advanceWTCursor() {
+ WT_CURSOR* c = _cursor.get();
+ int ret = WT_OP_CHECK(_forward ? c->next(c) : c->prev(c));
+ if (ret == WT_NOTFOUND) {
+ _cursorAtEof = true;
+ return;
+ }
+ invariantWTOK(ret);
+ _cursorAtEof = false;
+ }
- KeyString _query;
+ // Seeks to query. Returns true on exact match.
+ bool seekWTCursor(const KeyString& query) {
+ WT_CURSOR* c = _cursor.get();
- std::unique_ptr<KeyString> _endPosition;
- };
+ int cmp = -1;
+ const WiredTigerItem keyItem(query.getBuffer(), query.getSize());
+ c->set_key(c, keyItem.Get());
- class WiredTigerIndexStandardCursor final : public WiredTigerIndexCursorBase {
- public:
- WiredTigerIndexStandardCursor(const WiredTigerIndex& idx, OperationContext *txn,
- bool forward)
- : WiredTigerIndexCursorBase(idx, txn, forward) {
+ int ret = WT_OP_CHECK(c->search_near(c, &cmp));
+ if (ret == WT_NOTFOUND) {
+ _cursorAtEof = true;
+ TRACE_CURSOR << "\t not found";
+ return false;
}
+ invariantWTOK(ret);
+ _cursorAtEof = false;
- void updateLocAndTypeBits() override {
- _loc = KeyString::decodeRecordIdAtEnd(_key.getBuffer(), _key.getSize());
+ TRACE_CURSOR << "\t cmp: " << cmp;
- WT_CURSOR *c = _cursor.get();
- WT_ITEM item;
- invariantWTOK( c->get_value(c, &item ) );
- BufReader br(item.data, item.size);
- _typeBits.resetFromBuffer(&br);
+ if (cmp == 0) {
+ // Found it!
+ return true;
}
- };
- class WiredTigerIndexUniqueCursor final : public WiredTigerIndexCursorBase {
- public:
- WiredTigerIndexUniqueCursor(const WiredTigerIndex& idx, OperationContext *txn, bool forward)
- : WiredTigerIndexCursorBase(idx, txn, forward) {
+ // Make sure we land on a matching key (after/before for forward/reverse).
+ if (_forward ? cmp < 0 : cmp > 0) {
+ advanceWTCursor();
}
- void restore(OperationContext *txn) override {
- WiredTigerIndexCursorBase::restore(txn);
+ return false;
+ }
- // In addition to seeking to the correct key, we also need to make sure that the loc is
- // on the correct side of _loc.
- if (_lastMoveWasRestore) return; // We are on a different key so no need to check loc.
- if (_eof) return;
+ /**
+ * This must be called after moving the cursor to update our cached position. It should not
+ * be called after a restore that did not restore to original state since that does not
+ * logically move the cursor until the following call to next().
+ */
+ void updatePosition() {
+ _lastMoveWasRestore = false;
+ if (_cursorAtEof) {
+ _eof = true;
+ _loc = RecordId();
+ return;
+ }
- // If we get here we need to look at the actual RecordId for this key and make sure we
- // are supposed to see it.
- WT_CURSOR *c = _cursor.get();
- WT_ITEM item;
- invariantWTOK( c->get_value(c, &item ) );
+ _eof = false;
- BufReader br(item.data, item.size);
- RecordId locInIndex = KeyString::decodeRecordId(&br);
+ WT_CURSOR* c = _cursor.get();
+ WT_ITEM item;
+ invariantWTOK(c->get_key(c, &item));
+ _key.resetFromBuffer(item.data, item.size);
- TRACE_CURSOR << "restore"
- << " _loc:" << _loc
- << " locInIndex:" << locInIndex;
+ if (atOrPastEndPointAfterSeeking()) {
+ _eof = true;
+ return;
+ }
- if (locInIndex == _loc) return;
+ updateLocAndTypeBits();
+ }
- _lastMoveWasRestore = true;
- if ( _forward && (locInIndex < _loc)) advanceWTCursor();
- if (!_forward && (locInIndex > _loc)) advanceWTCursor();
- }
+ OperationContext* _txn;
+ WiredTigerCursor _cursor;
+ const WiredTigerIndex& _idx; // not owned
+ const bool _forward;
- void updateLocAndTypeBits() override {
- // We assume that cursors can only ever see unique indexes in their "pristine" state,
- // where no duplicates are possible. The cases where dups are allowed should hold
- // sufficient locks to ensure that no cursor ever sees them.
- WT_CURSOR *c = _cursor.get();
- WT_ITEM item;
- invariantWTOK( c->get_value(c, &item ) );
-
- BufReader br(item.data, item.size);
- _loc = KeyString::decodeRecordId(&br);
- _typeBits.resetFromBuffer(&br);
-
- if (!br.atEof()) {
- severe() << "Unique index cursor seeing multiple records for key "
- << curr(kWantKey)->key;
- fassertFailed(28608);
- }
- }
+ // Ensures we have the same RU at restore time.
+ RecoveryUnit* _savedForCheck;
- boost::optional<IndexKeyEntry> seekExact(const BSONObj& key, RequestedInfo parts) override {
- _query.resetToKey(stripFieldNames(key), _idx.ordering());
- const WiredTigerItem keyItem(_query.getBuffer(), _query.getSize());
+ // These are where this cursor instance is. They are not changed in the face of a failing
+ // next().
+ KeyString _key;
+ KeyString::TypeBits _typeBits;
+ RecordId _loc;
+ bool _eof = false;
- WT_CURSOR* c = _cursor.get();
- c->set_key(c, keyItem.Get());
+ // This differs from _eof in that it always reflects the result of the most recent call to
+ // reposition _cursor.
+ bool _cursorAtEof = false;
- // Using search rather than search_near.
- int ret = WT_OP_CHECK(c->search(c));
- if (ret != WT_NOTFOUND) invariantWTOK(ret);
- _cursorAtEof = ret == WT_NOTFOUND;
- updatePosition();
- dassert(_eof || _key.compare(_query) == 0);
- return curr(parts);
- }
- };
+ // Used by next to decide to return current position rather than moving. Should be reset to
+ // false by any operation that moves the cursor, other than subsequent save/restore pairs.
+ bool _lastMoveWasRestore = false;
-} // namespace
+ KeyString _query;
- WiredTigerIndexUnique::WiredTigerIndexUnique( OperationContext* ctx,
- const std::string& uri,
- const IndexDescriptor* desc )
- : WiredTigerIndex( ctx, uri, desc ) {
- }
+ std::unique_ptr<KeyString> _endPosition;
+};
- std::unique_ptr<SortedDataInterface::Cursor> WiredTigerIndexUnique::newCursor(
- OperationContext* txn,
- bool forward) const {
- return stdx::make_unique<WiredTigerIndexUniqueCursor>(*this, txn, forward);
- }
+class WiredTigerIndexStandardCursor final : public WiredTigerIndexCursorBase {
+public:
+ WiredTigerIndexStandardCursor(const WiredTigerIndex& idx, OperationContext* txn, bool forward)
+ : WiredTigerIndexCursorBase(idx, txn, forward) {}
+
+ void updateLocAndTypeBits() override {
+ _loc = KeyString::decodeRecordIdAtEnd(_key.getBuffer(), _key.getSize());
- SortedDataBuilderInterface* WiredTigerIndexUnique::getBulkBuilder(OperationContext* txn,
- bool dupsAllowed) {
- return new UniqueBulkBuilder(this, txn, dupsAllowed);
+ WT_CURSOR* c = _cursor.get();
+ WT_ITEM item;
+ invariantWTOK(c->get_value(c, &item));
+ BufReader br(item.data, item.size);
+ _typeBits.resetFromBuffer(&br);
}
+};
- Status WiredTigerIndexUnique::_insert( WT_CURSOR* c,
- const BSONObj& key,
- const RecordId& loc,
- bool dupsAllowed ) {
+class WiredTigerIndexUniqueCursor final : public WiredTigerIndexCursorBase {
+public:
+ WiredTigerIndexUniqueCursor(const WiredTigerIndex& idx, OperationContext* txn, bool forward)
+ : WiredTigerIndexCursorBase(idx, txn, forward) {}
- const KeyString data( key, _ordering );
- WiredTigerItem keyItem( data.getBuffer(), data.getSize() );
+ void restore(OperationContext* txn) override {
+ WiredTigerIndexCursorBase::restore(txn);
- KeyString value(loc);
- if (!data.getTypeBits().isAllZeros())
- value.appendTypeBits(data.getTypeBits());
+ // In addition to seeking to the correct key, we also need to make sure that the loc is
+ // on the correct side of _loc.
+ if (_lastMoveWasRestore)
+ return; // We are on a different key so no need to check loc.
+ if (_eof)
+ return;
- WiredTigerItem valueItem(value.getBuffer(), value.getSize());
- c->set_key( c, keyItem.Get() );
- c->set_value( c, valueItem.Get() );
- int ret = WT_OP_CHECK(c->insert(c));
+ // If we get here we need to look at the actual RecordId for this key and make sure we
+ // are supposed to see it.
+ WT_CURSOR* c = _cursor.get();
+ WT_ITEM item;
+ invariantWTOK(c->get_value(c, &item));
- if ( ret != WT_DUPLICATE_KEY ) {
- return wtRCToStatus( ret );
- }
+ BufReader br(item.data, item.size);
+ RecordId locInIndex = KeyString::decodeRecordId(&br);
- // we might be in weird mode where there might be multiple values
- // we put them all in the "list"
- // Note that we can't omit AllZeros when there are multiple locs for a value. When we remove
- // down to a single value, it will be cleaned up.
- ret = WT_OP_CHECK(c->search(c));
- invariantWTOK( ret );
-
- WT_ITEM old;
- invariantWTOK( c->get_value(c, &old ) );
-
- bool insertedLoc = false;
-
- value.resetToEmpty();
- BufReader br(old.data, old.size);
- while (br.remaining()) {
- RecordId locInIndex = KeyString::decodeRecordId(&br);
- if (loc == locInIndex)
- return Status::OK(); // already in index
-
- if (!insertedLoc && loc < locInIndex) {
- value.appendRecordId(loc);
- value.appendTypeBits(data.getTypeBits());
- insertedLoc = true;
- }
+ TRACE_CURSOR << "restore"
+ << " _loc:" << _loc << " locInIndex:" << locInIndex;
- // Copy from old to new value
- value.appendRecordId(locInIndex);
- value.appendTypeBits(KeyString::TypeBits::fromBuffer(&br));
- }
+ if (locInIndex == _loc)
+ return;
- if (!dupsAllowed)
- return dupKeyError(key);
+ _lastMoveWasRestore = true;
+ if (_forward && (locInIndex < _loc))
+ advanceWTCursor();
+ if (!_forward && (locInIndex > _loc))
+ advanceWTCursor();
+ }
- if (!insertedLoc) {
- // This loc is higher than all currently in the index for this key
- value.appendRecordId(loc);
- value.appendTypeBits(data.getTypeBits());
+ void updateLocAndTypeBits() override {
+ // We assume that cursors can only ever see unique indexes in their "pristine" state,
+ // where no duplicates are possible. The cases where dups are allowed should hold
+ // sufficient locks to ensure that no cursor ever sees them.
+ WT_CURSOR* c = _cursor.get();
+ WT_ITEM item;
+ invariantWTOK(c->get_value(c, &item));
+
+ BufReader br(item.data, item.size);
+ _loc = KeyString::decodeRecordId(&br);
+ _typeBits.resetFromBuffer(&br);
+
+ if (!br.atEof()) {
+ severe() << "Unique index cursor seeing multiple records for key "
+ << curr(kWantKey)->key;
+ fassertFailed(28608);
}
-
- valueItem = WiredTigerItem(value.getBuffer(), value.getSize());
- c->set_value( c, valueItem.Get() );
- return wtRCToStatus( c->update( c ) );
}
- void WiredTigerIndexUnique::_unindex( WT_CURSOR* c,
- const BSONObj& key,
- const RecordId& loc,
- bool dupsAllowed ) {
- KeyString data( key, _ordering );
- WiredTigerItem keyItem( data.getBuffer(), data.getSize() );
- c->set_key( c, keyItem.Get() );
-
- if ( !dupsAllowed ) {
- // nice and clear
- int ret = WT_OP_CHECK(c->remove(c));
- if (ret == WT_NOTFOUND) {
- return;
- }
- invariantWTOK(ret);
- return;
- }
+ boost::optional<IndexKeyEntry> seekExact(const BSONObj& key, RequestedInfo parts) override {
+ _query.resetToKey(stripFieldNames(key), _idx.ordering());
+ const WiredTigerItem keyItem(_query.getBuffer(), _query.getSize());
- // dups are allowed, so we have to deal with a vector of RecordIds.
+ WT_CURSOR* c = _cursor.get();
+ c->set_key(c, keyItem.Get());
+ // Using search rather than search_near.
int ret = WT_OP_CHECK(c->search(c));
- if ( ret == WT_NOTFOUND )
- return;
- invariantWTOK( ret );
-
- WT_ITEM old;
- invariantWTOK( c->get_value(c, &old ) );
-
- bool foundLoc = false;
- std::vector<std::pair<RecordId, KeyString::TypeBits> > records;
-
- BufReader br(old.data, old.size);
- while (br.remaining()) {
- RecordId locInIndex = KeyString::decodeRecordId(&br);
- KeyString::TypeBits typeBits = KeyString::TypeBits::fromBuffer(&br);
+ if (ret != WT_NOTFOUND)
+ invariantWTOK(ret);
+ _cursorAtEof = ret == WT_NOTFOUND;
+ updatePosition();
+ dassert(_eof || _key.compare(_query) == 0);
+ return curr(parts);
+ }
+};
+
+} // namespace
+
+WiredTigerIndexUnique::WiredTigerIndexUnique(OperationContext* ctx,
+ const std::string& uri,
+ const IndexDescriptor* desc)
+ : WiredTigerIndex(ctx, uri, desc) {}
+
+std::unique_ptr<SortedDataInterface::Cursor> WiredTigerIndexUnique::newCursor(OperationContext* txn,
+ bool forward) const {
+ return stdx::make_unique<WiredTigerIndexUniqueCursor>(*this, txn, forward);
+}
+
+SortedDataBuilderInterface* WiredTigerIndexUnique::getBulkBuilder(OperationContext* txn,
+ bool dupsAllowed) {
+ return new UniqueBulkBuilder(this, txn, dupsAllowed);
+}
+
+Status WiredTigerIndexUnique::_insert(WT_CURSOR* c,
+ const BSONObj& key,
+ const RecordId& loc,
+ bool dupsAllowed) {
+ const KeyString data(key, _ordering);
+ WiredTigerItem keyItem(data.getBuffer(), data.getSize());
+
+ KeyString value(loc);
+ if (!data.getTypeBits().isAllZeros())
+ value.appendTypeBits(data.getTypeBits());
+
+ WiredTigerItem valueItem(value.getBuffer(), value.getSize());
+ c->set_key(c, keyItem.Get());
+ c->set_value(c, valueItem.Get());
+ int ret = WT_OP_CHECK(c->insert(c));
+
+ if (ret != WT_DUPLICATE_KEY) {
+ return wtRCToStatus(ret);
+ }
- if (loc == locInIndex) {
- if (records.empty() && !br.remaining()) {
- // This is the common case: we are removing the only loc for this key.
- // Remove the whole entry.
- invariantWTOK(WT_OP_CHECK(c->remove(c)));
- return;
- }
+ // we might be in weird mode where there might be multiple values
+ // we put them all in the "list"
+ // Note that we can't omit AllZeros when there are multiple locs for a value. When we remove
+ // down to a single value, it will be cleaned up.
+ ret = WT_OP_CHECK(c->search(c));
+ invariantWTOK(ret);
- foundLoc = true;
- continue;
- }
+ WT_ITEM old;
+ invariantWTOK(c->get_value(c, &old));
- records.push_back(std::make_pair(locInIndex, typeBits));
- }
+ bool insertedLoc = false;
- if (!foundLoc) {
- warning().stream() << loc << " not found in the index for key " << key;
- return; // nothing to do
- }
+ value.resetToEmpty();
+ BufReader br(old.data, old.size);
+ while (br.remaining()) {
+ RecordId locInIndex = KeyString::decodeRecordId(&br);
+ if (loc == locInIndex)
+ return Status::OK(); // already in index
- // Put other locs for this key back in the index.
- KeyString newValue;
- invariant(!records.empty());
- for (size_t i = 0; i < records.size(); i++) {
- newValue.appendRecordId(records[i].first);
- // When there is only one record, we can omit AllZeros TypeBits. Otherwise they need
- // to be included.
- if (!(records[i].second.isAllZeros() && records.size() == 1)) {
- newValue.appendTypeBits(records[i].second);
- }
+ if (!insertedLoc && loc < locInIndex) {
+ value.appendRecordId(loc);
+ value.appendTypeBits(data.getTypeBits());
+ insertedLoc = true;
}
- WiredTigerItem valueItem = WiredTigerItem(newValue.getBuffer(), newValue.getSize());
- c->set_value( c, valueItem.Get() );
- invariantWTOK( c->update( c ) );
+ // Copy from old to new value
+ value.appendRecordId(locInIndex);
+ value.appendTypeBits(KeyString::TypeBits::fromBuffer(&br));
}
- // ------------------------------
+ if (!dupsAllowed)
+ return dupKeyError(key);
- WiredTigerIndexStandard::WiredTigerIndexStandard( OperationContext* ctx,
- const std::string& uri,
- const IndexDescriptor* desc )
- : WiredTigerIndex( ctx, uri, desc ) {
+ if (!insertedLoc) {
+ // This loc is higher than all currently in the index for this key
+ value.appendRecordId(loc);
+ value.appendTypeBits(data.getTypeBits());
}
- std::unique_ptr<SortedDataInterface::Cursor> WiredTigerIndexStandard::newCursor(
- OperationContext* txn,
- bool forward) const {
- return stdx::make_unique<WiredTigerIndexStandardCursor>(*this, txn, forward);
+ valueItem = WiredTigerItem(value.getBuffer(), value.getSize());
+ c->set_value(c, valueItem.Get());
+ return wtRCToStatus(c->update(c));
+}
+
+void WiredTigerIndexUnique::_unindex(WT_CURSOR* c,
+ const BSONObj& key,
+ const RecordId& loc,
+ bool dupsAllowed) {
+ KeyString data(key, _ordering);
+ WiredTigerItem keyItem(data.getBuffer(), data.getSize());
+ c->set_key(c, keyItem.Get());
+
+ if (!dupsAllowed) {
+ // nice and clear
+ int ret = WT_OP_CHECK(c->remove(c));
+ if (ret == WT_NOTFOUND) {
+ return;
+ }
+ invariantWTOK(ret);
+ return;
}
- SortedDataBuilderInterface* WiredTigerIndexStandard::getBulkBuilder(OperationContext* txn,
- bool dupsAllowed) {
- // We aren't unique so dups better be allowed.
- invariant(dupsAllowed);
- return new StandardBulkBuilder(this, txn);
- }
+ // dups are allowed, so we have to deal with a vector of RecordIds.
- Status WiredTigerIndexStandard::_insert( WT_CURSOR* c,
- const BSONObj& keyBson,
- const RecordId& loc,
- bool dupsAllowed ) {
- invariant( dupsAllowed );
+ int ret = WT_OP_CHECK(c->search(c));
+ if (ret == WT_NOTFOUND)
+ return;
+ invariantWTOK(ret);
- TRACE_INDEX << " key: " << keyBson << " loc: " << loc;
+ WT_ITEM old;
+ invariantWTOK(c->get_value(c, &old));
- KeyString key( keyBson, _ordering, loc );
- WiredTigerItem keyItem( key.getBuffer(), key.getSize() );
+ bool foundLoc = false;
+ std::vector<std::pair<RecordId, KeyString::TypeBits>> records;
- WiredTigerItem valueItem =
- key.getTypeBits().isAllZeros() ? emptyItem
- : WiredTigerItem(key.getTypeBits().getBuffer(),
- key.getTypeBits().getSize());
+ BufReader br(old.data, old.size);
+ while (br.remaining()) {
+ RecordId locInIndex = KeyString::decodeRecordId(&br);
+ KeyString::TypeBits typeBits = KeyString::TypeBits::fromBuffer(&br);
- c->set_key(c, keyItem.Get());
- c->set_value(c, valueItem.Get());
- int ret = WT_OP_CHECK(c->insert(c));
-
- if ( ret != WT_DUPLICATE_KEY )
- return wtRCToStatus( ret );
- // If the record was already in the index, we just return OK.
- // This can happen, for example, when building a background index while documents are being
- // written and reindexed.
- return Status::OK();
- }
+ if (loc == locInIndex) {
+ if (records.empty() && !br.remaining()) {
+ // This is the common case: we are removing the only loc for this key.
+ // Remove the whole entry.
+ invariantWTOK(WT_OP_CHECK(c->remove(c)));
+ return;
+ }
- void WiredTigerIndexStandard::_unindex( WT_CURSOR* c,
- const BSONObj& key,
- const RecordId& loc,
- bool dupsAllowed ) {
- invariant( dupsAllowed );
- KeyString data( key, _ordering, loc );
- WiredTigerItem item( data.getBuffer(), data.getSize() );
- c->set_key(c, item.Get() );
- int ret = WT_OP_CHECK(c->remove(c));
- if (ret != WT_NOTFOUND) {
- invariantWTOK(ret);
+ foundLoc = true;
+ continue;
}
+
+ records.push_back(std::make_pair(locInIndex, typeBits));
+ }
+
+ if (!foundLoc) {
+ warning().stream() << loc << " not found in the index for key " << key;
+ return; // nothing to do
}
- // ---------------- for compatability with rc4 and previous ------
+ // Put other locs for this key back in the index.
+ KeyString newValue;
+ invariant(!records.empty());
+ for (size_t i = 0; i < records.size(); i++) {
+ newValue.appendRecordId(records[i].first);
+ // When there is only one record, we can omit AllZeros TypeBits. Otherwise they need
+ // to be included.
+ if (!(records[i].second.isAllZeros() && records.size() == 1)) {
+ newValue.appendTypeBits(records[i].second);
+ }
+ }
- int index_collator_customize(WT_COLLATOR *coll,
- WT_SESSION *s,
- const char *uri,
- WT_CONFIG_ITEM *metadata,
- WT_COLLATOR **collp) {
- fassertFailedWithStatusNoTrace(28580,
- Status(ErrorCodes::UnsupportedFormat, str::stream()
+ WiredTigerItem valueItem = WiredTigerItem(newValue.getBuffer(), newValue.getSize());
+ c->set_value(c, valueItem.Get());
+ invariantWTOK(c->update(c));
+}
+
+// ------------------------------
+
+WiredTigerIndexStandard::WiredTigerIndexStandard(OperationContext* ctx,
+ const std::string& uri,
+ const IndexDescriptor* desc)
+ : WiredTigerIndex(ctx, uri, desc) {}
+
+std::unique_ptr<SortedDataInterface::Cursor> WiredTigerIndexStandard::newCursor(
+ OperationContext* txn, bool forward) const {
+ return stdx::make_unique<WiredTigerIndexStandardCursor>(*this, txn, forward);
+}
+
+SortedDataBuilderInterface* WiredTigerIndexStandard::getBulkBuilder(OperationContext* txn,
+ bool dupsAllowed) {
+ // We aren't unique so dups better be allowed.
+ invariant(dupsAllowed);
+ return new StandardBulkBuilder(this, txn);
+}
+
+Status WiredTigerIndexStandard::_insert(WT_CURSOR* c,
+ const BSONObj& keyBson,
+ const RecordId& loc,
+ bool dupsAllowed) {
+ invariant(dupsAllowed);
+
+ TRACE_INDEX << " key: " << keyBson << " loc: " << loc;
+
+ KeyString key(keyBson, _ordering, loc);
+ WiredTigerItem keyItem(key.getBuffer(), key.getSize());
+
+ WiredTigerItem valueItem = key.getTypeBits().isAllZeros()
+ ? emptyItem
+ : WiredTigerItem(key.getTypeBits().getBuffer(), key.getTypeBits().getSize());
+
+ c->set_key(c, keyItem.Get());
+ c->set_value(c, valueItem.Get());
+ int ret = WT_OP_CHECK(c->insert(c));
+
+ if (ret != WT_DUPLICATE_KEY)
+ return wtRCToStatus(ret);
+ // If the record was already in the index, we just return OK.
+ // This can happen, for example, when building a background index while documents are being
+ // written and reindexed.
+ return Status::OK();
+}
+
+void WiredTigerIndexStandard::_unindex(WT_CURSOR* c,
+ const BSONObj& key,
+ const RecordId& loc,
+ bool dupsAllowed) {
+ invariant(dupsAllowed);
+ KeyString data(key, _ordering, loc);
+ WiredTigerItem item(data.getBuffer(), data.getSize());
+ c->set_key(c, item.Get());
+ int ret = WT_OP_CHECK(c->remove(c));
+ if (ret != WT_NOTFOUND) {
+ invariantWTOK(ret);
+ }
+}
+
+// ---------------- for compatability with rc4 and previous ------
+
+int index_collator_customize(WT_COLLATOR* coll,
+ WT_SESSION* s,
+ const char* uri,
+ WT_CONFIG_ITEM* metadata,
+ WT_COLLATOR** collp) {
+ fassertFailedWithStatusNoTrace(28580,
+ Status(ErrorCodes::UnsupportedFormat,
+ str::stream()
<< "Found an index from an unsupported RC version."
<< " Please restart with --repair to fix."));
- }
+}
- extern "C" MONGO_COMPILER_API_EXPORT int index_collator_extension(WT_CONNECTION *conn,
- WT_CONFIG_ARG *cfg) {
- static WT_COLLATOR idx_static;
+extern "C" MONGO_COMPILER_API_EXPORT int index_collator_extension(WT_CONNECTION* conn,
+ WT_CONFIG_ARG* cfg) {
+ static WT_COLLATOR idx_static;
- idx_static.customize = index_collator_customize;
- return conn->add_collator(conn, "mongo_index", &idx_static, NULL);
-
- }
+ idx_static.customize = index_collator_customize;
+ return conn->add_collator(conn, "mongo_index", &idx_static, NULL);
+}
} // namespace mongo
diff --git a/src/mongo/db/storage/wiredtiger/wiredtiger_index.h b/src/mongo/db/storage/wiredtiger/wiredtiger_index.h
index c2b3d42538f..1d61b2b3c9e 100644
--- a/src/mongo/db/storage/wiredtiger/wiredtiger_index.h
+++ b/src/mongo/db/storage/wiredtiger/wiredtiger_index.h
@@ -37,156 +37,154 @@
namespace mongo {
- class IndexCatalogEntry;
- class IndexDescriptor;
- struct WiredTigerItem;
-
- class WiredTigerIndex : public SortedDataInterface {
- public:
-
- /**
- * Parses index options for wired tiger configuration string suitable for table creation.
- * The document 'options' is typically obtained from the 'storageEngine.wiredTiger' field
- * of an IndexDescriptor's info object.
- */
- static StatusWith<std::string> parseIndexOptions(const BSONObj& options);
-
- /**
- * Creates a configuration string suitable for 'config' parameter in WT_SESSION::create().
- * Configuration string is constructed from:
- * built-in defaults
- * 'extraConfig'
- * storageEngine.wiredTiger.configString in index descriptor's info object.
- * Performs simple validation on the supplied parameters.
- * Returns error status if validation fails.
- * Note that even if this function returns an OK status, WT_SESSION:create() may still
- * fail with the constructed configuration string.
- */
- static StatusWith<std::string> generateCreateString(const std::string& extraConfig,
- const IndexDescriptor& desc);
-
- /**
- * Creates a WiredTiger table suitable for implementing a MongoDB index.
- * 'config' should be created with generateCreateString().
- */
- static int Create(OperationContext* txn,
+class IndexCatalogEntry;
+class IndexDescriptor;
+struct WiredTigerItem;
+
+class WiredTigerIndex : public SortedDataInterface {
+public:
+ /**
+ * Parses index options for wired tiger configuration string suitable for table creation.
+ * The document 'options' is typically obtained from the 'storageEngine.wiredTiger' field
+ * of an IndexDescriptor's info object.
+ */
+ static StatusWith<std::string> parseIndexOptions(const BSONObj& options);
+
+ /**
+ * Creates a configuration string suitable for 'config' parameter in WT_SESSION::create().
+ * Configuration string is constructed from:
+ * built-in defaults
+ * 'extraConfig'
+ * storageEngine.wiredTiger.configString in index descriptor's info object.
+ * Performs simple validation on the supplied parameters.
+ * Returns error status if validation fails.
+ * Note that even if this function returns an OK status, WT_SESSION:create() may still
+ * fail with the constructed configuration string.
+ */
+ static StatusWith<std::string> generateCreateString(const std::string& extraConfig,
+ const IndexDescriptor& desc);
+
+ /**
+ * Creates a WiredTiger table suitable for implementing a MongoDB index.
+ * 'config' should be created with generateCreateString().
+ */
+ static int Create(OperationContext* txn, const std::string& uri, const std::string& config);
+
+ /**
+ * @param unique - If this is a unique index.
+ * Note: even if unique, it may be allowed ot be non-unique at times.
+ */
+ WiredTigerIndex(OperationContext* ctx, const std::string& uri, const IndexDescriptor* desc);
+
+ virtual Status insert(OperationContext* txn,
+ const BSONObj& key,
+ const RecordId& loc,
+ bool dupsAllowed);
+
+ virtual void unindex(OperationContext* txn,
+ const BSONObj& key,
+ const RecordId& loc,
+ bool dupsAllowed);
+
+ virtual void fullValidate(OperationContext* txn,
+ bool full,
+ long long* numKeysOut,
+ BSONObjBuilder* output) const;
+ virtual bool appendCustomStats(OperationContext* txn,
+ BSONObjBuilder* output,
+ double scale) const;
+ virtual Status dupKeyCheck(OperationContext* txn, const BSONObj& key, const RecordId& loc);
+
+ virtual bool isEmpty(OperationContext* txn);
+
+ virtual long long getSpaceUsedBytes(OperationContext* txn) const;
+
+ bool isDup(WT_CURSOR* c, const BSONObj& key, const RecordId& loc);
+
+ virtual Status initAsEmpty(OperationContext* txn);
+
+ const std::string& uri() const {
+ return _uri;
+ }
+
+ uint64_t instanceId() const {
+ return _instanceId;
+ }
+ Ordering ordering() const {
+ return _ordering;
+ }
+
+ virtual bool unique() const = 0;
+
+ Status dupKeyError(const BSONObj& key);
+
+protected:
+ virtual Status _insert(WT_CURSOR* c,
+ const BSONObj& key,
+ const RecordId& loc,
+ bool dupsAllowed) = 0;
+
+ virtual void _unindex(WT_CURSOR* c,
+ const BSONObj& key,
+ const RecordId& loc,
+ bool dupsAllowed) = 0;
+
+ class BulkBuilder;
+ class StandardBulkBuilder;
+ class UniqueBulkBuilder;
+
+ const Ordering _ordering;
+ std::string _uri;
+ uint64_t _instanceId;
+ std::string _collectionNamespace;
+ std::string _indexName;
+};
+
+
+class WiredTigerIndexUnique : public WiredTigerIndex {
+public:
+ WiredTigerIndexUnique(OperationContext* ctx,
const std::string& uri,
- const std::string& config);
+ const IndexDescriptor* desc);
- /**
- * @param unique - If this is a unique index.
- * Note: even if unique, it may be allowed ot be non-unique at times.
- */
- WiredTigerIndex(OperationContext* ctx,
- const std::string& uri,
- const IndexDescriptor* desc);
+ std::unique_ptr<SortedDataInterface::Cursor> newCursor(OperationContext* txn,
+ bool forward) const override;
- virtual Status insert(OperationContext* txn,
- const BSONObj& key,
- const RecordId& loc,
- bool dupsAllowed);
+ SortedDataBuilderInterface* getBulkBuilder(OperationContext* txn, bool dupsAllowed) override;
- virtual void unindex(OperationContext* txn,
- const BSONObj& key,
- const RecordId& loc,
- bool dupsAllowed);
+ bool unique() const override {
+ return true;
+ }
- virtual void fullValidate(OperationContext* txn, bool full, long long *numKeysOut,
- BSONObjBuilder* output) const;
- virtual bool appendCustomStats(OperationContext* txn, BSONObjBuilder* output, double scale)
- const;
- virtual Status dupKeyCheck(OperationContext* txn, const BSONObj& key, const RecordId& loc);
+ Status _insert(WT_CURSOR* c,
+ const BSONObj& key,
+ const RecordId& loc,
+ bool dupsAllowed) override;
- virtual bool isEmpty(OperationContext* txn);
+ void _unindex(WT_CURSOR* c, const BSONObj& key, const RecordId& loc, bool dupsAllowed) override;
+};
- virtual long long getSpaceUsedBytes( OperationContext* txn ) const;
+class WiredTigerIndexStandard : public WiredTigerIndex {
+public:
+ WiredTigerIndexStandard(OperationContext* ctx,
+ const std::string& uri,
+ const IndexDescriptor* desc);
- bool isDup(WT_CURSOR *c, const BSONObj& key, const RecordId& loc );
+ std::unique_ptr<SortedDataInterface::Cursor> newCursor(OperationContext* txn,
+ bool forward) const override;
- virtual Status initAsEmpty(OperationContext* txn);
+ SortedDataBuilderInterface* getBulkBuilder(OperationContext* txn, bool dupsAllowed) override;
- const std::string& uri() const { return _uri; }
+ bool unique() const override {
+ return false;
+ }
- uint64_t instanceId() const { return _instanceId; }
- Ordering ordering() const { return _ordering; }
+ Status _insert(WT_CURSOR* c,
+ const BSONObj& key,
+ const RecordId& loc,
+ bool dupsAllowed) override;
- virtual bool unique() const = 0;
+ void _unindex(WT_CURSOR* c, const BSONObj& key, const RecordId& loc, bool dupsAllowed) override;
+};
- Status dupKeyError(const BSONObj& key);
-
- protected:
-
- virtual Status _insert( WT_CURSOR* c,
- const BSONObj& key,
- const RecordId& loc,
- bool dupsAllowed ) = 0;
-
- virtual void _unindex( WT_CURSOR* c,
- const BSONObj& key,
- const RecordId& loc,
- bool dupsAllowed ) = 0;
-
- class BulkBuilder;
- class StandardBulkBuilder;
- class UniqueBulkBuilder;
-
- const Ordering _ordering;
- std::string _uri;
- uint64_t _instanceId;
- std::string _collectionNamespace;
- std::string _indexName;
- };
-
-
- class WiredTigerIndexUnique : public WiredTigerIndex {
- public:
- WiredTigerIndexUnique( OperationContext* ctx,
- const std::string& uri,
- const IndexDescriptor* desc );
-
- std::unique_ptr<SortedDataInterface::Cursor> newCursor(OperationContext* txn,
- bool forward) const override;
-
- SortedDataBuilderInterface* getBulkBuilder(OperationContext* txn,
- bool dupsAllowed) override;
-
- bool unique() const override { return true; }
-
- Status _insert(WT_CURSOR* c,
- const BSONObj& key,
- const RecordId& loc,
- bool dupsAllowed) override;
-
- void _unindex(WT_CURSOR* c,
- const BSONObj& key,
- const RecordId& loc,
- bool dupsAllowed) override;
- };
-
- class WiredTigerIndexStandard : public WiredTigerIndex {
- public:
- WiredTigerIndexStandard( OperationContext* ctx,
- const std::string& uri,
- const IndexDescriptor* desc );
-
- std::unique_ptr<SortedDataInterface::Cursor> newCursor(OperationContext* txn,
- bool forward) const override;
-
- SortedDataBuilderInterface* getBulkBuilder(OperationContext* txn,
- bool dupsAllowed) override;
-
- bool unique() const override { return false; }
-
- Status _insert(WT_CURSOR* c,
- const BSONObj& key,
- const RecordId& loc,
- bool dupsAllowed) override;
-
- void _unindex(WT_CURSOR* c,
- const BSONObj& key,
- const RecordId& loc,
- bool dupsAllowed) override;
-
- };
-
-} // namespace
+} // namespace
diff --git a/src/mongo/db/storage/wiredtiger/wiredtiger_index_test.cpp b/src/mongo/db/storage/wiredtiger/wiredtiger_index_test.cpp
index f7e308e6cf2..ea468aa5d76 100644
--- a/src/mongo/db/storage/wiredtiger/wiredtiger_index_test.cpp
+++ b/src/mongo/db/storage/wiredtiger/wiredtiger_index_test.cpp
@@ -47,96 +47,95 @@
namespace mongo {
- using std::string;
+using std::string;
- class MyHarnessHelper final : public HarnessHelper {
- public:
- MyHarnessHelper() : _dbpath( "wt_test" ), _conn( NULL ) {
+class MyHarnessHelper final : public HarnessHelper {
+public:
+ MyHarnessHelper() : _dbpath("wt_test"), _conn(NULL) {
+ const char* config = "create,cache_size=1G,";
+ int ret = wiredtiger_open(_dbpath.path().c_str(), NULL, config, &_conn);
+ invariantWTOK(ret);
- const char* config = "create,cache_size=1G,";
- int ret = wiredtiger_open( _dbpath.path().c_str(), NULL, config, &_conn);
- invariantWTOK( ret );
-
- _sessionCache = new WiredTigerSessionCache( _conn );
- }
-
- ~MyHarnessHelper() final {
- delete _sessionCache;
- _conn->close(_conn, NULL);
- }
-
- std::unique_ptr<SortedDataInterface> newSortedDataInterface( bool unique ) final {
- std::string ns = "test.wt";
- OperationContextNoop txn( newRecoveryUnit().release() );
-
- BSONObj spec = BSON( "key" << BSON( "a" << 1 ) <<
- "name" << "testIndex" <<
- "ns" << ns );
-
- IndexDescriptor desc( NULL, "", spec );
-
- StatusWith<std::string> result = WiredTigerIndex::generateCreateString("", desc);
- ASSERT_OK(result.getStatus());
-
- string uri = "table:" + ns;
- invariantWTOK( WiredTigerIndex::Create(&txn, uri, result.getValue()));
+ _sessionCache = new WiredTigerSessionCache(_conn);
+ }
- if ( unique )
- return stdx::make_unique<WiredTigerIndexUnique>( &txn, uri, &desc );
- return stdx::make_unique<WiredTigerIndexStandard>( &txn, uri, &desc );
- }
+ ~MyHarnessHelper() final {
+ delete _sessionCache;
+ _conn->close(_conn, NULL);
+ }
- std::unique_ptr<RecoveryUnit> newRecoveryUnit() final {
- return stdx::make_unique<WiredTigerRecoveryUnit>( _sessionCache );
- }
+ std::unique_ptr<SortedDataInterface> newSortedDataInterface(bool unique) final {
+ std::string ns = "test.wt";
+ OperationContextNoop txn(newRecoveryUnit().release());
- private:
- unittest::TempDir _dbpath;
- WT_CONNECTION* _conn;
- WiredTigerSessionCache* _sessionCache;
- };
+ BSONObj spec = BSON("key" << BSON("a" << 1) << "name"
+ << "testIndex"
+ << "ns" << ns);
- std::unique_ptr<HarnessHelper> newHarnessHelper() {
- return stdx::make_unique<MyHarnessHelper>();
- }
+ IndexDescriptor desc(NULL, "", spec);
- TEST(WiredTigerIndexTest, GenerateCreateStringEmptyDocument) {
- BSONObj spec = fromjson("{}");
- StatusWith<std::string> result = WiredTigerIndex::parseIndexOptions(spec);
+ StatusWith<std::string> result = WiredTigerIndex::generateCreateString("", desc);
ASSERT_OK(result.getStatus());
- ASSERT_EQ(result.getValue(), ""); // "," would also be valid.
- }
- TEST(WiredTigerIndexTest, GenerateCreateStringUnknownField) {
- BSONObj spec = fromjson("{unknownField: 1}");
- StatusWith<std::string> result = WiredTigerIndex::parseIndexOptions(spec);
- const Status& status = result.getStatus();
- ASSERT_NOT_OK(status);
- ASSERT_EQUALS(ErrorCodes::InvalidOptions, status);
- }
+ string uri = "table:" + ns;
+ invariantWTOK(WiredTigerIndex::Create(&txn, uri, result.getValue()));
- TEST(WiredTigerIndexTest, GenerateCreateStringNonStringConfig) {
- BSONObj spec = fromjson("{configString: 12345}");
- StatusWith<std::string> result = WiredTigerIndex::parseIndexOptions(spec);
- const Status& status = result.getStatus();
- ASSERT_NOT_OK(status);
- ASSERT_EQUALS(ErrorCodes::TypeMismatch, status);
+ if (unique)
+ return stdx::make_unique<WiredTigerIndexUnique>(&txn, uri, &desc);
+ return stdx::make_unique<WiredTigerIndexStandard>(&txn, uri, &desc);
}
- TEST(WiredTigerIndexTest, GenerateCreateStringEmptyConfigString) {
- BSONObj spec = fromjson("{configString: ''}");
- StatusWith<std::string> result = WiredTigerIndex::parseIndexOptions(spec);
- ASSERT_OK(result.getStatus());
- ASSERT_EQ(result.getValue(), ","); // "" would also be valid.
+ std::unique_ptr<RecoveryUnit> newRecoveryUnit() final {
+ return stdx::make_unique<WiredTigerRecoveryUnit>(_sessionCache);
}
- TEST(WiredTigerIndexTest, GenerateCreateStringValidConfigFormat) {
- // TODO eventually this should fail since "abc" is not a valid WT option.
- BSONObj spec = fromjson("{configString: 'abc=def'}");
- StatusWith<std::string> result = WiredTigerIndex::parseIndexOptions(spec);
- const Status& status = result.getStatus();
- ASSERT_OK(status);
- ASSERT_EQ(result.getValue(), "abc=def,");
- }
+private:
+ unittest::TempDir _dbpath;
+ WT_CONNECTION* _conn;
+ WiredTigerSessionCache* _sessionCache;
+};
+
+std::unique_ptr<HarnessHelper> newHarnessHelper() {
+ return stdx::make_unique<MyHarnessHelper>();
+}
+
+TEST(WiredTigerIndexTest, GenerateCreateStringEmptyDocument) {
+ BSONObj spec = fromjson("{}");
+ StatusWith<std::string> result = WiredTigerIndex::parseIndexOptions(spec);
+ ASSERT_OK(result.getStatus());
+ ASSERT_EQ(result.getValue(), ""); // "," would also be valid.
+}
+
+TEST(WiredTigerIndexTest, GenerateCreateStringUnknownField) {
+ BSONObj spec = fromjson("{unknownField: 1}");
+ StatusWith<std::string> result = WiredTigerIndex::parseIndexOptions(spec);
+ const Status& status = result.getStatus();
+ ASSERT_NOT_OK(status);
+ ASSERT_EQUALS(ErrorCodes::InvalidOptions, status);
+}
+
+TEST(WiredTigerIndexTest, GenerateCreateStringNonStringConfig) {
+ BSONObj spec = fromjson("{configString: 12345}");
+ StatusWith<std::string> result = WiredTigerIndex::parseIndexOptions(spec);
+ const Status& status = result.getStatus();
+ ASSERT_NOT_OK(status);
+ ASSERT_EQUALS(ErrorCodes::TypeMismatch, status);
+}
+
+TEST(WiredTigerIndexTest, GenerateCreateStringEmptyConfigString) {
+ BSONObj spec = fromjson("{configString: ''}");
+ StatusWith<std::string> result = WiredTigerIndex::parseIndexOptions(spec);
+ ASSERT_OK(result.getStatus());
+ ASSERT_EQ(result.getValue(), ","); // "" would also be valid.
+}
+
+TEST(WiredTigerIndexTest, GenerateCreateStringValidConfigFormat) {
+ // TODO eventually this should fail since "abc" is not a valid WT option.
+ BSONObj spec = fromjson("{configString: 'abc=def'}");
+ StatusWith<std::string> result = WiredTigerIndex::parseIndexOptions(spec);
+ const Status& status = result.getStatus();
+ ASSERT_OK(status);
+ ASSERT_EQ(result.getValue(), "abc=def,");
+}
} // namespace mongo
diff --git a/src/mongo/db/storage/wiredtiger/wiredtiger_init.cpp b/src/mongo/db/storage/wiredtiger/wiredtiger_init.cpp
index 23d69d934c2..45888019c49 100644
--- a/src/mongo/db/storage/wiredtiger/wiredtiger_init.cpp
+++ b/src/mongo/db/storage/wiredtiger/wiredtiger_init.cpp
@@ -52,81 +52,74 @@
namespace mongo {
- namespace {
- class WiredTigerFactory : public StorageEngine::Factory {
- public:
- virtual ~WiredTigerFactory(){}
- virtual StorageEngine* create(const StorageGlobalParams& params,
- const StorageEngineLockFile& lockFile) const {
- if (lockFile.createdByUncleanShutdown()) {
- warning() << "Recovering data from the last clean checkpoint.";
- }
- WiredTigerKVEngine* kv = new WiredTigerKVEngine( params.dbpath,
- wiredTigerGlobalOptions.engineConfig,
- params.dur,
- params.repair );
- kv->setRecordStoreExtraOptions( wiredTigerGlobalOptions.collectionConfig );
- kv->setSortedDataInterfaceExtraOptions( wiredTigerGlobalOptions.indexConfig );
- // Intentionally leaked.
- new WiredTigerServerStatusSection(kv);
- new WiredTigerEngineRuntimeConfigParameter(kv);
-
- KVStorageEngineOptions options;
- options.directoryPerDB = params.directoryperdb;
- options.directoryForIndexes = wiredTigerGlobalOptions.directoryForIndexes;
- options.forRepair = params.repair;
- return new KVStorageEngine( kv, options );
- }
-
- virtual StringData getCanonicalName() const {
- return kWiredTigerEngineName;
- }
-
- virtual Status validateCollectionStorageOptions(const BSONObj& options) const {
- return WiredTigerRecordStore::parseOptionsField(options).getStatus();
- }
-
- virtual Status validateIndexStorageOptions(const BSONObj& options) const {
- return WiredTigerIndex::parseIndexOptions(options).getStatus();
- }
-
- virtual Status validateMetadata(const StorageEngineMetadata& metadata,
- const StorageGlobalParams& params) const {
- Status status = metadata.validateStorageEngineOption(
- "directoryPerDB", params.directoryperdb);
- if (!status.isOK()) {
- return status;
- }
-
- status = metadata.validateStorageEngineOption(
- "directoryForIndexes", wiredTigerGlobalOptions.directoryForIndexes);
- if (!status.isOK()) {
- return status;
- }
+namespace {
+class WiredTigerFactory : public StorageEngine::Factory {
+public:
+ virtual ~WiredTigerFactory() {}
+ virtual StorageEngine* create(const StorageGlobalParams& params,
+ const StorageEngineLockFile& lockFile) const {
+ if (lockFile.createdByUncleanShutdown()) {
+ warning() << "Recovering data from the last clean checkpoint.";
+ }
+ WiredTigerKVEngine* kv = new WiredTigerKVEngine(
+ params.dbpath, wiredTigerGlobalOptions.engineConfig, params.dur, params.repair);
+ kv->setRecordStoreExtraOptions(wiredTigerGlobalOptions.collectionConfig);
+ kv->setSortedDataInterfaceExtraOptions(wiredTigerGlobalOptions.indexConfig);
+ // Intentionally leaked.
+ new WiredTigerServerStatusSection(kv);
+ new WiredTigerEngineRuntimeConfigParameter(kv);
+
+ KVStorageEngineOptions options;
+ options.directoryPerDB = params.directoryperdb;
+ options.directoryForIndexes = wiredTigerGlobalOptions.directoryForIndexes;
+ options.forRepair = params.repair;
+ return new KVStorageEngine(kv, options);
+ }
- return Status::OK();
- }
+ virtual StringData getCanonicalName() const {
+ return kWiredTigerEngineName;
+ }
- virtual BSONObj createMetadataOptions(const StorageGlobalParams& params) const {
- BSONObjBuilder builder;
- builder.appendBool("directoryPerDB", params.directoryperdb);
- builder.appendBool("directoryForIndexes",
- wiredTigerGlobalOptions.directoryForIndexes);
- WiredTigerCustomizationHooks::get(getGlobalServiceContext())->appendUID(&builder);
- return builder.obj();
- }
+ virtual Status validateCollectionStorageOptions(const BSONObj& options) const {
+ return WiredTigerRecordStore::parseOptionsField(options).getStatus();
+ }
- };
- } // namespace
+ virtual Status validateIndexStorageOptions(const BSONObj& options) const {
+ return WiredTigerIndex::parseIndexOptions(options).getStatus();
+ }
- MONGO_INITIALIZER_WITH_PREREQUISITES(WiredTigerEngineInit,
- ("SetGlobalEnvironment"))
- (InitializerContext* context ) {
+ virtual Status validateMetadata(const StorageEngineMetadata& metadata,
+ const StorageGlobalParams& params) const {
+ Status status =
+ metadata.validateStorageEngineOption("directoryPerDB", params.directoryperdb);
+ if (!status.isOK()) {
+ return status;
+ }
- getGlobalServiceContext()->registerStorageEngine(kWiredTigerEngineName,
- new WiredTigerFactory());
+ status = metadata.validateStorageEngineOption("directoryForIndexes",
+ wiredTigerGlobalOptions.directoryForIndexes);
+ if (!status.isOK()) {
+ return status;
+ }
return Status::OK();
}
+ virtual BSONObj createMetadataOptions(const StorageGlobalParams& params) const {
+ BSONObjBuilder builder;
+ builder.appendBool("directoryPerDB", params.directoryperdb);
+ builder.appendBool("directoryForIndexes", wiredTigerGlobalOptions.directoryForIndexes);
+ WiredTigerCustomizationHooks::get(getGlobalServiceContext())->appendUID(&builder);
+ return builder.obj();
+ }
+};
+} // namespace
+
+MONGO_INITIALIZER_WITH_PREREQUISITES(WiredTigerEngineInit, ("SetGlobalEnvironment"))
+(InitializerContext* context) {
+ getGlobalServiceContext()->registerStorageEngine(kWiredTigerEngineName,
+ new WiredTigerFactory());
+
+ return Status::OK();
+}
}
diff --git a/src/mongo/db/storage/wiredtiger/wiredtiger_init_test.cpp b/src/mongo/db/storage/wiredtiger/wiredtiger_init_test.cpp
index 2f3ddf38518..1ace8c8c019 100644
--- a/src/mongo/db/storage/wiredtiger/wiredtiger_init_test.cpp
+++ b/src/mongo/db/storage/wiredtiger/wiredtiger_init_test.cpp
@@ -40,125 +40,122 @@
namespace {
- using namespace mongo;
-
- class WiredTigerFactoryTest : public mongo::unittest::Test {
- private:
- virtual void setUp() {
- ServiceContext* globalEnv = getGlobalServiceContext();
- ASSERT_TRUE(globalEnv);
- ASSERT_TRUE(getGlobalServiceContext()->isRegisteredStorageEngine(kWiredTigerEngineName));
- std::unique_ptr<StorageFactoriesIterator> sfi(getGlobalServiceContext()->
- makeStorageFactoriesIterator());
- ASSERT_TRUE(sfi);
- bool found = false;
- while (sfi->more()) {
- const StorageEngine::Factory* currentFactory = sfi->next();
- if (currentFactory->getCanonicalName() == kWiredTigerEngineName) {
- found = true;
- factory = currentFactory;
- break;
- }
+using namespace mongo;
+
+class WiredTigerFactoryTest : public mongo::unittest::Test {
+private:
+ virtual void setUp() {
+ ServiceContext* globalEnv = getGlobalServiceContext();
+ ASSERT_TRUE(globalEnv);
+ ASSERT_TRUE(getGlobalServiceContext()->isRegisteredStorageEngine(kWiredTigerEngineName));
+ std::unique_ptr<StorageFactoriesIterator> sfi(
+ getGlobalServiceContext()->makeStorageFactoriesIterator());
+ ASSERT_TRUE(sfi);
+ bool found = false;
+ while (sfi->more()) {
+ const StorageEngine::Factory* currentFactory = sfi->next();
+ if (currentFactory->getCanonicalName() == kWiredTigerEngineName) {
found = true;
+ factory = currentFactory;
+ break;
}
- ASSERT_TRUE(found);
- _oldOptions = wiredTigerGlobalOptions;
- }
-
- virtual void tearDown() {
- wiredTigerGlobalOptions = _oldOptions;
- factory = NULL;
- }
-
- WiredTigerGlobalOptions _oldOptions;
-
- protected:
- const StorageEngine::Factory* factory;
- };
-
- void _testValidateMetadata(const StorageEngine::Factory* factory,
- const BSONObj& metadataOptions,
- bool directoryPerDB,
- bool directoryForIndexes,
- ErrorCodes::Error expectedCode) {
- // It is fine to specify an invalid data directory for the metadata
- // as long as we do not invoke read() or write().
- StorageEngineMetadata metadata("no_such_directory");
- metadata.setStorageEngineOptions(metadataOptions);
-
- StorageGlobalParams storageOptions;
- storageOptions.directoryperdb = directoryPerDB;
- wiredTigerGlobalOptions.directoryForIndexes = directoryForIndexes;
-
- Status status = factory->validateMetadata(metadata, storageOptions);
- if (expectedCode != status.code()) {
- FAIL(str::stream()
- << "Unexpected StorageEngine::Factory::validateMetadata result. Expected: "
- << ErrorCodes::errorString(expectedCode) << " but got "
- << status.toString()
- << " instead. metadataOptions: " << metadataOptions
- << "; directoryPerDB: " << directoryPerDB
- << "; directoryForIndexes: " << directoryForIndexes);
+ found = true;
}
+ ASSERT_TRUE(found);
+ _oldOptions = wiredTigerGlobalOptions;
}
- // Do not validate fields that are not present in metadata.
- TEST_F(WiredTigerFactoryTest, ValidateMetadataEmptyOptions) {
- _testValidateMetadata(factory, BSONObj(), false, false, ErrorCodes::OK);
- _testValidateMetadata(factory, BSONObj(), false, true, ErrorCodes::OK);
- _testValidateMetadata(factory, BSONObj(), true, false, ErrorCodes::OK);
- _testValidateMetadata(factory, BSONObj(), false, false, ErrorCodes::OK);
- }
-
- TEST_F(WiredTigerFactoryTest, ValidateMetadataDirectoryPerDB) {
- _testValidateMetadata(factory, fromjson("{directoryPerDB: 123}"), false, false,
- ErrorCodes::FailedToParse);
- _testValidateMetadata(factory, fromjson("{directoryPerDB: false}"), false, false,
- ErrorCodes::OK);
- _testValidateMetadata(factory, fromjson("{directoryPerDB: false}"), true, false,
- ErrorCodes::InvalidOptions);
- _testValidateMetadata(factory, fromjson("{directoryPerDB: true}"), false, false,
- ErrorCodes::InvalidOptions);
- _testValidateMetadata(factory, fromjson("{directoryPerDB: true}"), true, false,
- ErrorCodes::OK);
- }
-
- TEST_F(WiredTigerFactoryTest, ValidateMetadataDirectoryForIndexes) {
- _testValidateMetadata(factory, fromjson("{directoryForIndexes: 123}"), false, false,
- ErrorCodes::FailedToParse);
- _testValidateMetadata(factory, fromjson("{directoryForIndexes: false}"), false, false,
- ErrorCodes::OK);
- _testValidateMetadata(factory, fromjson("{directoryForIndexes: false}"), false, true,
- ErrorCodes::InvalidOptions);
- _testValidateMetadata(factory, fromjson("{directoryForIndexes: true}"), false, false,
- ErrorCodes::InvalidOptions);
- _testValidateMetadata(factory, fromjson("{directoryForIndexes: true}"), true, true,
- ErrorCodes::OK);
- }
-
- void _testCreateMetadataOptions(const StorageEngine::Factory* factory,
- bool directoryPerDB,
- bool directoryForIndexes) {
- StorageGlobalParams storageOptions;
- storageOptions.directoryperdb = directoryPerDB;
- wiredTigerGlobalOptions.directoryForIndexes = directoryForIndexes;
-
- BSONObj metadataOptions = factory->createMetadataOptions(storageOptions);
-
- BSONElement directoryPerDBElement = metadataOptions.getField("directoryPerDB");
- ASSERT_TRUE(directoryPerDBElement.isBoolean());
- ASSERT_EQUALS(directoryPerDB, directoryPerDBElement.boolean());
-
- BSONElement directoryForIndexesElement = metadataOptions.getField("directoryForIndexes");
- ASSERT_TRUE(directoryForIndexesElement.isBoolean());
- ASSERT_EQUALS(directoryForIndexes, directoryForIndexesElement.boolean());
+ virtual void tearDown() {
+ wiredTigerGlobalOptions = _oldOptions;
+ factory = NULL;
}
- TEST_F(WiredTigerFactoryTest, CreateMetadataOptions) {
- _testCreateMetadataOptions(factory, false, false);
- _testCreateMetadataOptions(factory, false, true);
- _testCreateMetadataOptions(factory, true, false);
- _testCreateMetadataOptions(factory, true, true);
+ WiredTigerGlobalOptions _oldOptions;
+
+protected:
+ const StorageEngine::Factory* factory;
+};
+
+void _testValidateMetadata(const StorageEngine::Factory* factory,
+ const BSONObj& metadataOptions,
+ bool directoryPerDB,
+ bool directoryForIndexes,
+ ErrorCodes::Error expectedCode) {
+ // It is fine to specify an invalid data directory for the metadata
+ // as long as we do not invoke read() or write().
+ StorageEngineMetadata metadata("no_such_directory");
+ metadata.setStorageEngineOptions(metadataOptions);
+
+ StorageGlobalParams storageOptions;
+ storageOptions.directoryperdb = directoryPerDB;
+ wiredTigerGlobalOptions.directoryForIndexes = directoryForIndexes;
+
+ Status status = factory->validateMetadata(metadata, storageOptions);
+ if (expectedCode != status.code()) {
+ FAIL(str::stream()
+ << "Unexpected StorageEngine::Factory::validateMetadata result. Expected: "
+ << ErrorCodes::errorString(expectedCode) << " but got " << status.toString()
+ << " instead. metadataOptions: " << metadataOptions << "; directoryPerDB: "
+ << directoryPerDB << "; directoryForIndexes: " << directoryForIndexes);
}
+}
+
+// Do not validate fields that are not present in metadata.
+TEST_F(WiredTigerFactoryTest, ValidateMetadataEmptyOptions) {
+ _testValidateMetadata(factory, BSONObj(), false, false, ErrorCodes::OK);
+ _testValidateMetadata(factory, BSONObj(), false, true, ErrorCodes::OK);
+ _testValidateMetadata(factory, BSONObj(), true, false, ErrorCodes::OK);
+ _testValidateMetadata(factory, BSONObj(), false, false, ErrorCodes::OK);
+}
+
+TEST_F(WiredTigerFactoryTest, ValidateMetadataDirectoryPerDB) {
+ _testValidateMetadata(
+ factory, fromjson("{directoryPerDB: 123}"), false, false, ErrorCodes::FailedToParse);
+ _testValidateMetadata(
+ factory, fromjson("{directoryPerDB: false}"), false, false, ErrorCodes::OK);
+ _testValidateMetadata(
+ factory, fromjson("{directoryPerDB: false}"), true, false, ErrorCodes::InvalidOptions);
+ _testValidateMetadata(
+ factory, fromjson("{directoryPerDB: true}"), false, false, ErrorCodes::InvalidOptions);
+ _testValidateMetadata(factory, fromjson("{directoryPerDB: true}"), true, false, ErrorCodes::OK);
+}
+
+TEST_F(WiredTigerFactoryTest, ValidateMetadataDirectoryForIndexes) {
+ _testValidateMetadata(
+ factory, fromjson("{directoryForIndexes: 123}"), false, false, ErrorCodes::FailedToParse);
+ _testValidateMetadata(
+ factory, fromjson("{directoryForIndexes: false}"), false, false, ErrorCodes::OK);
+ _testValidateMetadata(
+ factory, fromjson("{directoryForIndexes: false}"), false, true, ErrorCodes::InvalidOptions);
+ _testValidateMetadata(
+ factory, fromjson("{directoryForIndexes: true}"), false, false, ErrorCodes::InvalidOptions);
+ _testValidateMetadata(
+ factory, fromjson("{directoryForIndexes: true}"), true, true, ErrorCodes::OK);
+}
+
+void _testCreateMetadataOptions(const StorageEngine::Factory* factory,
+ bool directoryPerDB,
+ bool directoryForIndexes) {
+ StorageGlobalParams storageOptions;
+ storageOptions.directoryperdb = directoryPerDB;
+ wiredTigerGlobalOptions.directoryForIndexes = directoryForIndexes;
+
+ BSONObj metadataOptions = factory->createMetadataOptions(storageOptions);
+
+ BSONElement directoryPerDBElement = metadataOptions.getField("directoryPerDB");
+ ASSERT_TRUE(directoryPerDBElement.isBoolean());
+ ASSERT_EQUALS(directoryPerDB, directoryPerDBElement.boolean());
+
+ BSONElement directoryForIndexesElement = metadataOptions.getField("directoryForIndexes");
+ ASSERT_TRUE(directoryForIndexesElement.isBoolean());
+ ASSERT_EQUALS(directoryForIndexes, directoryForIndexesElement.boolean());
+}
+
+TEST_F(WiredTigerFactoryTest, CreateMetadataOptions) {
+ _testCreateMetadataOptions(factory, false, false);
+ _testCreateMetadataOptions(factory, false, true);
+ _testCreateMetadataOptions(factory, true, false);
+ _testCreateMetadataOptions(factory, true, true);
+}
} // namespace
diff --git a/src/mongo/db/storage/wiredtiger/wiredtiger_kv_engine.cpp b/src/mongo/db/storage/wiredtiger/wiredtiger_kv_engine.cpp
index f123ff1fffd..291797c11a8 100644
--- a/src/mongo/db/storage/wiredtiger/wiredtiger_kv_engine.cpp
+++ b/src/mongo/db/storage/wiredtiger/wiredtiger_kv_engine.cpp
@@ -57,426 +57,417 @@
namespace mongo {
- using std::set;
- using std::string;
-
-
- WiredTigerKVEngine::WiredTigerKVEngine( const std::string& path,
- const std::string& extraOpenOptions,
- bool durable,
- bool repair )
- : _eventHandler(WiredTigerUtil::defaultEventHandlers()),
- _path( path ),
- _durable( durable ),
- _sizeStorerSyncTracker( 100000, 60 * 1000 ) {
-
- size_t cacheSizeGB = wiredTigerGlobalOptions.cacheSizeGB;
- if (cacheSizeGB == 0) {
- // Since the user didn't provide a cache size, choose a reasonable default value.
- ProcessInfo pi;
- unsigned long long memSizeMB = pi.getMemSizeMB();
- if ( memSizeMB > 0 ) {
- double cacheMB = memSizeMB / 2;
- cacheSizeGB = static_cast<size_t>( cacheMB / 1024 );
- if ( cacheSizeGB < 1 )
- cacheSizeGB = 1;
- }
+using std::set;
+using std::string;
+
+
+WiredTigerKVEngine::WiredTigerKVEngine(const std::string& path,
+ const std::string& extraOpenOptions,
+ bool durable,
+ bool repair)
+ : _eventHandler(WiredTigerUtil::defaultEventHandlers()),
+ _path(path),
+ _durable(durable),
+ _sizeStorerSyncTracker(100000, 60 * 1000) {
+ size_t cacheSizeGB = wiredTigerGlobalOptions.cacheSizeGB;
+ if (cacheSizeGB == 0) {
+ // Since the user didn't provide a cache size, choose a reasonable default value.
+ ProcessInfo pi;
+ unsigned long long memSizeMB = pi.getMemSizeMB();
+ if (memSizeMB > 0) {
+ double cacheMB = memSizeMB / 2;
+ cacheSizeGB = static_cast<size_t>(cacheMB / 1024);
+ if (cacheSizeGB < 1)
+ cacheSizeGB = 1;
}
+ }
- if ( _durable ) {
- boost::filesystem::path journalPath = path;
- journalPath /= "journal";
- if ( !boost::filesystem::exists( journalPath ) ) {
- try {
- boost::filesystem::create_directory( journalPath );
- }
- catch( std::exception& e) {
- log() << "error creating journal dir " << journalPath.string() << ' ' << e.what();
- throw;
- }
+ if (_durable) {
+ boost::filesystem::path journalPath = path;
+ journalPath /= "journal";
+ if (!boost::filesystem::exists(journalPath)) {
+ try {
+ boost::filesystem::create_directory(journalPath);
+ } catch (std::exception& e) {
+ log() << "error creating journal dir " << journalPath.string() << ' ' << e.what();
+ throw;
}
}
+ }
- std::stringstream ss;
- ss << "create,";
- ss << "cache_size=" << cacheSizeGB << "G,";
- ss << "session_max=20000,";
- ss << "eviction=(threads_max=4),";
- ss << "statistics=(fast),";
- if ( _durable ) {
- ss << "log=(enabled=true,archive=true,path=journal,compressor=";
- ss << wiredTigerGlobalOptions.journalCompressor << "),";
- }
- ss << "file_manager=(close_idle_time=100000),"; //~28 hours, will put better fix in 3.1.x
- ss << "checkpoint=(wait=" << wiredTigerGlobalOptions.checkpointDelaySecs;
- ss << ",log_size=2GB),";
- ss << "statistics_log=(wait=" << wiredTigerGlobalOptions.statisticsLogDelaySecs << "),";
- ss << WiredTigerCustomizationHooks::get(
- getGlobalServiceContext())->getOpenConfig("metadata");
- ss << extraOpenOptions;
- string config = ss.str();
- log() << "wiredtiger_open config: " << config;
- int ret = wiredtiger_open(path.c_str(), &_eventHandler, config.c_str(), &_conn);
- // Invalid argument (EINVAL) is usually caused by invalid configuration string.
- // We still fassert() but without a stack trace.
- if (ret == EINVAL) {
- fassertFailedNoTrace(28561);
- }
- else if (ret != 0) {
- Status s(wtRCToStatus(ret));
- msgassertedNoTrace(28595, s.reason());
- }
+ std::stringstream ss;
+ ss << "create,";
+ ss << "cache_size=" << cacheSizeGB << "G,";
+ ss << "session_max=20000,";
+ ss << "eviction=(threads_max=4),";
+ ss << "statistics=(fast),";
+ if (_durable) {
+ ss << "log=(enabled=true,archive=true,path=journal,compressor=";
+ ss << wiredTigerGlobalOptions.journalCompressor << "),";
+ }
+ ss << "file_manager=(close_idle_time=100000),"; //~28 hours, will put better fix in 3.1.x
+ ss << "checkpoint=(wait=" << wiredTigerGlobalOptions.checkpointDelaySecs;
+ ss << ",log_size=2GB),";
+ ss << "statistics_log=(wait=" << wiredTigerGlobalOptions.statisticsLogDelaySecs << "),";
+ ss << WiredTigerCustomizationHooks::get(getGlobalServiceContext())->getOpenConfig("metadata");
+ ss << extraOpenOptions;
+ string config = ss.str();
+ log() << "wiredtiger_open config: " << config;
+ int ret = wiredtiger_open(path.c_str(), &_eventHandler, config.c_str(), &_conn);
+ // Invalid argument (EINVAL) is usually caused by invalid configuration string.
+ // We still fassert() but without a stack trace.
+ if (ret == EINVAL) {
+ fassertFailedNoTrace(28561);
+ } else if (ret != 0) {
+ Status s(wtRCToStatus(ret));
+ msgassertedNoTrace(28595, s.reason());
+ }
- _sessionCache.reset( new WiredTigerSessionCache( this ) );
+ _sessionCache.reset(new WiredTigerSessionCache(this));
- _sizeStorerUri = "table:sizeStorer";
- {
- WiredTigerSession session(_conn);
- if (repair && _hasUri(session.getSession(), _sizeStorerUri)) {
- log() << "Repairing size cache";
- fassertNoTrace(28577, _salvageIfNeeded(_sizeStorerUri.c_str()));
- }
- _sizeStorer.reset(new WiredTigerSizeStorer(_conn, _sizeStorerUri));
- _sizeStorer->fillCache();
+ _sizeStorerUri = "table:sizeStorer";
+ {
+ WiredTigerSession session(_conn);
+ if (repair && _hasUri(session.getSession(), _sizeStorerUri)) {
+ log() << "Repairing size cache";
+ fassertNoTrace(28577, _salvageIfNeeded(_sizeStorerUri.c_str()));
}
+ _sizeStorer.reset(new WiredTigerSizeStorer(_conn, _sizeStorerUri));
+ _sizeStorer->fillCache();
}
+}
- WiredTigerKVEngine::~WiredTigerKVEngine() {
- if (_conn) {
- cleanShutdown();
- }
-
- _sessionCache.reset( NULL );
+WiredTigerKVEngine::~WiredTigerKVEngine() {
+ if (_conn) {
+ cleanShutdown();
}
- void WiredTigerKVEngine::cleanShutdown() {
- log() << "WiredTigerKVEngine shutting down";
- syncSizeInfo(true);
- if (_conn) {
- // these must be the last things we do before _conn->close();
- _sizeStorer.reset( NULL );
- _sessionCache->shuttingDown();
+ _sessionCache.reset(NULL);
+}
+
+void WiredTigerKVEngine::cleanShutdown() {
+ log() << "WiredTigerKVEngine shutting down";
+ syncSizeInfo(true);
+ if (_conn) {
+ // these must be the last things we do before _conn->close();
+ _sizeStorer.reset(NULL);
+ _sessionCache->shuttingDown();
#if !__has_feature(address_sanitizer)
- const char* config = "leak_memory=true";
+ const char* config = "leak_memory=true";
#else
- const char* config = NULL;
+ const char* config = NULL;
#endif
- invariantWTOK( _conn->close(_conn, config) );
- _conn = NULL;
- }
- }
-
- Status WiredTigerKVEngine::okToRename( OperationContext* opCtx,
- StringData fromNS,
- StringData toNS,
- StringData ident,
- const RecordStore* originalRecordStore ) const {
- _sizeStorer->storeToCache(_uri( ident ),
- originalRecordStore->numRecords( opCtx ),
- originalRecordStore->dataSize( opCtx ) );
- syncSizeInfo(true);
- return Status::OK();
+ invariantWTOK(_conn->close(_conn, config));
+ _conn = NULL;
}
+}
- int64_t WiredTigerKVEngine::getIdentSize( OperationContext* opCtx,
- StringData ident ) {
- WiredTigerSession* session = WiredTigerRecoveryUnit::get(opCtx)->getSession(opCtx);
- return WiredTigerUtil::getIdentSize(session->getSession(), _uri(ident) );
- }
+Status WiredTigerKVEngine::okToRename(OperationContext* opCtx,
+ StringData fromNS,
+ StringData toNS,
+ StringData ident,
+ const RecordStore* originalRecordStore) const {
+ _sizeStorer->storeToCache(
+ _uri(ident), originalRecordStore->numRecords(opCtx), originalRecordStore->dataSize(opCtx));
+ syncSizeInfo(true);
+ return Status::OK();
+}
- Status WiredTigerKVEngine::repairIdent( OperationContext* opCtx,
- StringData ident ) {
- WiredTigerSession* session = WiredTigerRecoveryUnit::get(opCtx)->getSession(opCtx);
- session->closeAllCursors();
- string uri = _uri(ident);
- return _salvageIfNeeded(uri.c_str());
- }
+int64_t WiredTigerKVEngine::getIdentSize(OperationContext* opCtx, StringData ident) {
+ WiredTigerSession* session = WiredTigerRecoveryUnit::get(opCtx)->getSession(opCtx);
+ return WiredTigerUtil::getIdentSize(session->getSession(), _uri(ident));
+}
- Status WiredTigerKVEngine::_salvageIfNeeded(const char* uri) {
- // Using a side session to avoid transactional issues
- WiredTigerSession sessionWrapper(_conn);
- WT_SESSION* session = sessionWrapper.getSession();
+Status WiredTigerKVEngine::repairIdent(OperationContext* opCtx, StringData ident) {
+ WiredTigerSession* session = WiredTigerRecoveryUnit::get(opCtx)->getSession(opCtx);
+ session->closeAllCursors();
+ string uri = _uri(ident);
+ return _salvageIfNeeded(uri.c_str());
+}
- int rc = (session->verify)(session, uri, NULL);
- if (rc == 0) {
- log() << "Verify succeeded on uri " << uri << ". Not salvaging.";
- return Status::OK();
- }
+Status WiredTigerKVEngine::_salvageIfNeeded(const char* uri) {
+ // Using a side session to avoid transactional issues
+ WiredTigerSession sessionWrapper(_conn);
+ WT_SESSION* session = sessionWrapper.getSession();
- if (rc == EBUSY) {
- // SERVER-16457: verify and salvage are occasionally failing with EBUSY. For now we
- // lie and return OK to avoid breaking tests. This block should go away when that ticket
- // is resolved.
- error() << "Verify on " << uri << " failed with EBUSY. Assuming no salvage is needed.";
- return Status::OK();
- }
+ int rc = (session->verify)(session, uri, NULL);
+ if (rc == 0) {
+ log() << "Verify succeeded on uri " << uri << ". Not salvaging.";
+ return Status::OK();
+ }
- // TODO need to cleanup the sizeStorer cache after salvaging.
- log() << "Verify failed on uri " << uri << ". Running a salvage operation.";
- return wtRCToStatus(session->salvage(session, uri, NULL), "Salvage failed:");
+ if (rc == EBUSY) {
+ // SERVER-16457: verify and salvage are occasionally failing with EBUSY. For now we
+ // lie and return OK to avoid breaking tests. This block should go away when that ticket
+ // is resolved.
+ error() << "Verify on " << uri << " failed with EBUSY. Assuming no salvage is needed.";
+ return Status::OK();
}
- int WiredTigerKVEngine::flushAllFiles( bool sync ) {
- LOG(1) << "WiredTigerKVEngine::flushAllFiles";
- syncSizeInfo(true);
+ // TODO need to cleanup the sizeStorer cache after salvaging.
+ log() << "Verify failed on uri " << uri << ". Running a salvage operation.";
+ return wtRCToStatus(session->salvage(session, uri, NULL), "Salvage failed:");
+}
- WiredTigerSession session(_conn);
- WT_SESSION* s = session.getSession();
- invariantWTOK( s->checkpoint(s, NULL ) );
+int WiredTigerKVEngine::flushAllFiles(bool sync) {
+ LOG(1) << "WiredTigerKVEngine::flushAllFiles";
+ syncSizeInfo(true);
- return 1;
- }
+ WiredTigerSession session(_conn);
+ WT_SESSION* s = session.getSession();
+ invariantWTOK(s->checkpoint(s, NULL));
- void WiredTigerKVEngine::syncSizeInfo( bool sync ) const {
- if ( !_sizeStorer )
- return;
+ return 1;
+}
- try {
- _sizeStorer->syncCache(sync);
- }
- catch (const WriteConflictException&) {
- // ignore, we'll try again later.
- }
- }
+void WiredTigerKVEngine::syncSizeInfo(bool sync) const {
+ if (!_sizeStorer)
+ return;
- RecoveryUnit* WiredTigerKVEngine::newRecoveryUnit() {
- return new WiredTigerRecoveryUnit( _sessionCache.get() );
+ try {
+ _sizeStorer->syncCache(sync);
+ } catch (const WriteConflictException&) {
+ // ignore, we'll try again later.
}
+}
- void WiredTigerKVEngine::setRecordStoreExtraOptions( const std::string& options ) {
- _rsOptions = options;
- }
+RecoveryUnit* WiredTigerKVEngine::newRecoveryUnit() {
+ return new WiredTigerRecoveryUnit(_sessionCache.get());
+}
- void WiredTigerKVEngine::setSortedDataInterfaceExtraOptions( const std::string& options ) {
- _indexOptions = options;
- }
+void WiredTigerKVEngine::setRecordStoreExtraOptions(const std::string& options) {
+ _rsOptions = options;
+}
- Status WiredTigerKVEngine::createRecordStore( OperationContext* opCtx,
- StringData ns,
- StringData ident,
- const CollectionOptions& options ) {
- _checkIdentPath( ident );
- WiredTigerSession session(_conn);
+void WiredTigerKVEngine::setSortedDataInterfaceExtraOptions(const std::string& options) {
+ _indexOptions = options;
+}
- StatusWith<std::string> result =
- WiredTigerRecordStore::generateCreateString(ns, options, _rsOptions);
- if (!result.isOK()) {
- return result.getStatus();
- }
- std::string config = result.getValue();
+Status WiredTigerKVEngine::createRecordStore(OperationContext* opCtx,
+ StringData ns,
+ StringData ident,
+ const CollectionOptions& options) {
+ _checkIdentPath(ident);
+ WiredTigerSession session(_conn);
+
+ StatusWith<std::string> result =
+ WiredTigerRecordStore::generateCreateString(ns, options, _rsOptions);
+ if (!result.isOK()) {
+ return result.getStatus();
+ }
+ std::string config = result.getValue();
+
+ string uri = _uri(ident);
+ WT_SESSION* s = session.getSession();
+ LOG(2) << "WiredTigerKVEngine::createRecordStore uri: " << uri << " config: " << config;
+ return wtRCToStatus(s->create(s, uri.c_str(), config.c_str()));
+}
- string uri = _uri( ident );
- WT_SESSION* s = session.getSession();
- LOG(2) << "WiredTigerKVEngine::createRecordStore uri: " << uri << " config: " << config;
- return wtRCToStatus( s->create( s, uri.c_str(), config.c_str() ) );
+RecordStore* WiredTigerKVEngine::getRecordStore(OperationContext* opCtx,
+ StringData ns,
+ StringData ident,
+ const CollectionOptions& options) {
+ if (options.capped) {
+ return new WiredTigerRecordStore(opCtx,
+ ns,
+ _uri(ident),
+ options.capped,
+ options.cappedSize ? options.cappedSize : 4096,
+ options.cappedMaxDocs ? options.cappedMaxDocs : -1,
+ NULL,
+ _sizeStorer.get());
+ } else {
+ return new WiredTigerRecordStore(
+ opCtx, ns, _uri(ident), false, -1, -1, NULL, _sizeStorer.get());
}
+}
+
+string WiredTigerKVEngine::_uri(StringData ident) const {
+ return string("table:") + ident.toString();
+}
- RecordStore* WiredTigerKVEngine::getRecordStore( OperationContext* opCtx,
- StringData ns,
+Status WiredTigerKVEngine::createSortedDataInterface(OperationContext* opCtx,
StringData ident,
- const CollectionOptions& options ) {
-
- if (options.capped) {
- return new WiredTigerRecordStore(opCtx, ns, _uri(ident), options.capped,
- options.cappedSize ? options.cappedSize : 4096,
- options.cappedMaxDocs ? options.cappedMaxDocs : -1,
- NULL,
- _sizeStorer.get() );
- }
- else {
- return new WiredTigerRecordStore(opCtx, ns, _uri(ident),
- false, -1, -1, NULL, _sizeStorer.get() );
- }
+ const IndexDescriptor* desc) {
+ _checkIdentPath(ident);
+ StatusWith<std::string> result = WiredTigerIndex::generateCreateString(_indexOptions, *desc);
+ if (!result.isOK()) {
+ return result.getStatus();
}
- string WiredTigerKVEngine::_uri( StringData ident ) const {
- return string("table:") + ident.toString();
- }
+ std::string config = result.getValue();
- Status WiredTigerKVEngine::createSortedDataInterface( OperationContext* opCtx,
- StringData ident,
- const IndexDescriptor* desc ) {
- _checkIdentPath( ident );
- StatusWith<std::string> result =
- WiredTigerIndex::generateCreateString(_indexOptions, *desc);
- if (!result.isOK()) {
- return result.getStatus();
- }
-
- std::string config = result.getValue();
+ LOG(2) << "WiredTigerKVEngine::createSortedDataInterface ident: " << ident
+ << " config: " << config;
+ return wtRCToStatus(WiredTigerIndex::Create(opCtx, _uri(ident), config));
+}
- LOG(2) << "WiredTigerKVEngine::createSortedDataInterface ident: " << ident
- << " config: " << config;
- return wtRCToStatus(WiredTigerIndex::Create(opCtx, _uri(ident), config));
- }
+SortedDataInterface* WiredTigerKVEngine::getSortedDataInterface(OperationContext* opCtx,
+ StringData ident,
+ const IndexDescriptor* desc) {
+ if (desc->unique())
+ return new WiredTigerIndexUnique(opCtx, _uri(ident), desc);
+ return new WiredTigerIndexStandard(opCtx, _uri(ident), desc);
+}
- SortedDataInterface* WiredTigerKVEngine::getSortedDataInterface( OperationContext* opCtx,
- StringData ident,
- const IndexDescriptor* desc ) {
- if ( desc->unique() )
- return new WiredTigerIndexUnique( opCtx, _uri( ident ), desc );
- return new WiredTigerIndexStandard( opCtx, _uri( ident ), desc );
- }
+Status WiredTigerKVEngine::dropIdent(OperationContext* opCtx, StringData ident) {
+ _drop(ident);
+ return Status::OK();
+}
- Status WiredTigerKVEngine::dropIdent( OperationContext* opCtx,
- StringData ident ) {
- _drop( ident );
- return Status::OK();
- }
+bool WiredTigerKVEngine::_drop(StringData ident) {
+ string uri = _uri(ident);
- bool WiredTigerKVEngine::_drop( StringData ident ) {
- string uri = _uri( ident );
+ WiredTigerSession session(_conn);
- WiredTigerSession session(_conn);
+ int ret = session.getSession()->drop(session.getSession(), uri.c_str(), "force");
+ LOG(1) << "WT drop of " << uri << " res " << ret;
- int ret = session.getSession()->drop( session.getSession(), uri.c_str(), "force" );
- LOG(1) << "WT drop of " << uri << " res " << ret;
+ if (ret == 0) {
+ // yay, it worked
+ return true;
+ }
- if ( ret == 0 ) {
- // yay, it worked
- return true;
+ if (ret == EBUSY) {
+ // this is expected, queue it up
+ {
+ stdx::lock_guard<stdx::mutex> lk(_identToDropMutex);
+ _identToDrop.insert(uri);
}
+ _sessionCache->closeAll();
+ return false;
+ }
- if ( ret == EBUSY ) {
- // this is expected, queue it up
- {
- stdx::lock_guard<stdx::mutex> lk( _identToDropMutex );
- _identToDrop.insert( uri );
- }
- _sessionCache->closeAll();
- return false;
- }
+ invariantWTOK(ret);
+ return false;
+}
- invariantWTOK( ret );
- return false;
+bool WiredTigerKVEngine::haveDropsQueued() const {
+ if (_sizeStorerSyncTracker.intervalHasElapsed()) {
+ _sizeStorerSyncTracker.resetLastTime();
+ syncSizeInfo(false);
}
+ stdx::lock_guard<stdx::mutex> lk(_identToDropMutex);
+ return !_identToDrop.empty();
+}
- bool WiredTigerKVEngine::haveDropsQueued() const {
- if ( _sizeStorerSyncTracker.intervalHasElapsed() ) {
- _sizeStorerSyncTracker.resetLastTime();
- syncSizeInfo(false);
- }
- stdx::lock_guard<stdx::mutex> lk( _identToDropMutex );
- return !_identToDrop.empty();
+void WiredTigerKVEngine::dropAllQueued() {
+ set<string> mine;
+ {
+ stdx::lock_guard<stdx::mutex> lk(_identToDropMutex);
+ mine = _identToDrop;
}
- void WiredTigerKVEngine::dropAllQueued() {
- set<string> mine;
- {
- stdx::lock_guard<stdx::mutex> lk( _identToDropMutex );
- mine = _identToDrop;
- }
+ set<string> deleted;
- set<string> deleted;
+ {
+ WiredTigerSession session(_conn);
+ for (set<string>::const_iterator it = mine.begin(); it != mine.end(); ++it) {
+ string uri = *it;
+ int ret = session.getSession()->drop(session.getSession(), uri.c_str(), "force");
+ LOG(1) << "WT queued drop of " << uri << " res " << ret;
- {
- WiredTigerSession session(_conn);
- for ( set<string>::const_iterator it = mine.begin(); it != mine.end(); ++it ) {
- string uri = *it;
- int ret = session.getSession()->drop( session.getSession(), uri.c_str(), "force" );
- LOG(1) << "WT queued drop of " << uri << " res " << ret;
-
- if ( ret == 0 ) {
- deleted.insert( uri );
- continue;
- }
-
- if ( ret == EBUSY ) {
- // leave in qeuue
- continue;
- }
-
- invariantWTOK( ret );
+ if (ret == 0) {
+ deleted.insert(uri);
+ continue;
}
- }
- {
- stdx::lock_guard<stdx::mutex> lk( _identToDropMutex );
- for ( set<string>::const_iterator it = deleted.begin(); it != deleted.end(); ++it ) {
- _identToDrop.erase( *it );
+ if (ret == EBUSY) {
+ // leave in qeuue
+ continue;
}
- }
- }
- bool WiredTigerKVEngine::supportsDocLocking() const {
- return true;
- }
-
- bool WiredTigerKVEngine::supportsDirectoryPerDB() const {
- return true;
+ invariantWTOK(ret);
+ }
}
- bool WiredTigerKVEngine::hasIdent(OperationContext* opCtx, StringData ident) const {
- return _hasUri(WiredTigerRecoveryUnit::get(opCtx)->getSession(opCtx)->getSession(),
- _uri(ident));
+ {
+ stdx::lock_guard<stdx::mutex> lk(_identToDropMutex);
+ for (set<string>::const_iterator it = deleted.begin(); it != deleted.end(); ++it) {
+ _identToDrop.erase(*it);
+ }
}
+}
- bool WiredTigerKVEngine::_hasUri(WT_SESSION* session, const std::string& uri) const {
- // can't use WiredTigerCursor since this is called from constructor.
- WT_CURSOR* c = NULL;
- int ret = session->open_cursor(session, "metadata:", NULL, NULL, &c);
- if (ret == ENOENT) return false;
- invariantWTOK(ret);
- ON_BLOCK_EXIT(c->close, c);
+bool WiredTigerKVEngine::supportsDocLocking() const {
+ return true;
+}
- c->set_key(c, uri.c_str());
- return c->search(c) == 0;
- }
+bool WiredTigerKVEngine::supportsDirectoryPerDB() const {
+ return true;
+}
- std::vector<std::string> WiredTigerKVEngine::getAllIdents( OperationContext* opCtx ) const {
- std::vector<std::string> all;
- WiredTigerCursor cursor( "metadata:", WiredTigerSession::kMetadataCursorId, false, opCtx );
- WT_CURSOR* c = cursor.get();
- if ( !c )
- return all;
-
- while ( c->next(c) == 0 ) {
- const char* raw;
- c->get_key(c, &raw );
- StringData key(raw);
- size_t idx = key.find( ':' );
- if ( idx == string::npos )
- continue;
- StringData type = key.substr( 0, idx );
- if ( type != "table" )
- continue;
+bool WiredTigerKVEngine::hasIdent(OperationContext* opCtx, StringData ident) const {
+ return _hasUri(WiredTigerRecoveryUnit::get(opCtx)->getSession(opCtx)->getSession(),
+ _uri(ident));
+}
- StringData ident = key.substr(idx+1);
- if ( ident == "sizeStorer" )
- continue;
+bool WiredTigerKVEngine::_hasUri(WT_SESSION* session, const std::string& uri) const {
+ // can't use WiredTigerCursor since this is called from constructor.
+ WT_CURSOR* c = NULL;
+ int ret = session->open_cursor(session, "metadata:", NULL, NULL, &c);
+ if (ret == ENOENT)
+ return false;
+ invariantWTOK(ret);
+ ON_BLOCK_EXIT(c->close, c);
- all.push_back( ident.toString() );
- }
+ c->set_key(c, uri.c_str());
+ return c->search(c) == 0;
+}
+std::vector<std::string> WiredTigerKVEngine::getAllIdents(OperationContext* opCtx) const {
+ std::vector<std::string> all;
+ WiredTigerCursor cursor("metadata:", WiredTigerSession::kMetadataCursorId, false, opCtx);
+ WT_CURSOR* c = cursor.get();
+ if (!c)
return all;
- }
- int WiredTigerKVEngine::reconfigure(const char* str) {
- return _conn->reconfigure(_conn, str);
+ while (c->next(c) == 0) {
+ const char* raw;
+ c->get_key(c, &raw);
+ StringData key(raw);
+ size_t idx = key.find(':');
+ if (idx == string::npos)
+ continue;
+ StringData type = key.substr(0, idx);
+ if (type != "table")
+ continue;
+
+ StringData ident = key.substr(idx + 1);
+ if (ident == "sizeStorer")
+ continue;
+
+ all.push_back(ident.toString());
}
- void WiredTigerKVEngine::_checkIdentPath( StringData ident ) {
- size_t start = 0;
- size_t idx;
- while ( ( idx = ident.find( '/', start ) ) != string::npos ) {
- StringData dir = ident.substr( 0, idx );
-
- boost::filesystem::path subdir = _path;
- subdir /= dir.toString();
- if ( !boost::filesystem::exists( subdir ) ) {
- LOG(1) << "creating subdirectory: " << dir;
- try {
- boost::filesystem::create_directory( subdir );
- }
- catch (const std::exception& e) {
- error() << "error creating path " << subdir.string() << ' ' << e.what();
- throw;
- }
- }
+ return all;
+}
+
+int WiredTigerKVEngine::reconfigure(const char* str) {
+ return _conn->reconfigure(_conn, str);
+}
- start = idx + 1;
+void WiredTigerKVEngine::_checkIdentPath(StringData ident) {
+ size_t start = 0;
+ size_t idx;
+ while ((idx = ident.find('/', start)) != string::npos) {
+ StringData dir = ident.substr(0, idx);
+
+ boost::filesystem::path subdir = _path;
+ subdir /= dir.toString();
+ if (!boost::filesystem::exists(subdir)) {
+ LOG(1) << "creating subdirectory: " << dir;
+ try {
+ boost::filesystem::create_directory(subdir);
+ } catch (const std::exception& e) {
+ error() << "error creating path " << subdir.string() << ' ' << e.what();
+ throw;
+ }
}
+
+ start = idx + 1;
}
}
+}
diff --git a/src/mongo/db/storage/wiredtiger/wiredtiger_kv_engine.h b/src/mongo/db/storage/wiredtiger/wiredtiger_kv_engine.h
index dd40faa4ccf..cf90624d564 100644
--- a/src/mongo/db/storage/wiredtiger/wiredtiger_kv_engine.h
+++ b/src/mongo/db/storage/wiredtiger/wiredtiger_kv_engine.h
@@ -44,113 +44,112 @@
namespace mongo {
- class WiredTigerSessionCache;
- class WiredTigerSizeStorer;
+class WiredTigerSessionCache;
+class WiredTigerSizeStorer;
- class WiredTigerKVEngine : public KVEngine {
- public:
- WiredTigerKVEngine( const std::string& path,
- const std::string& extraOpenOptions = "",
- bool durable = true,
- bool repair = false );
- virtual ~WiredTigerKVEngine();
+class WiredTigerKVEngine : public KVEngine {
+public:
+ WiredTigerKVEngine(const std::string& path,
+ const std::string& extraOpenOptions = "",
+ bool durable = true,
+ bool repair = false);
+ virtual ~WiredTigerKVEngine();
- void setRecordStoreExtraOptions( const std::string& options );
- void setSortedDataInterfaceExtraOptions( const std::string& options );
+ void setRecordStoreExtraOptions(const std::string& options);
+ void setSortedDataInterfaceExtraOptions(const std::string& options);
- virtual bool supportsDocLocking() const;
+ virtual bool supportsDocLocking() const;
- virtual bool supportsDirectoryPerDB() const;
+ virtual bool supportsDirectoryPerDB() const;
- virtual bool isDurable() const { return _durable; }
+ virtual bool isDurable() const {
+ return _durable;
+ }
- virtual RecoveryUnit* newRecoveryUnit();
+ virtual RecoveryUnit* newRecoveryUnit();
- virtual Status createRecordStore( OperationContext* opCtx,
- StringData ns,
- StringData ident,
- const CollectionOptions& options );
+ virtual Status createRecordStore(OperationContext* opCtx,
+ StringData ns,
+ StringData ident,
+ const CollectionOptions& options);
- virtual RecordStore* getRecordStore( OperationContext* opCtx,
- StringData ns,
- StringData ident,
- const CollectionOptions& options );
-
- virtual Status createSortedDataInterface( OperationContext* opCtx,
- StringData ident,
- const IndexDescriptor* desc );
+ virtual RecordStore* getRecordStore(OperationContext* opCtx,
+ StringData ns,
+ StringData ident,
+ const CollectionOptions& options);
- virtual SortedDataInterface* getSortedDataInterface( OperationContext* opCtx,
- StringData ident,
- const IndexDescriptor* desc );
-
- virtual Status dropIdent( OperationContext* opCtx,
- StringData ident );
+ virtual Status createSortedDataInterface(OperationContext* opCtx,
+ StringData ident,
+ const IndexDescriptor* desc);
- virtual Status okToRename( OperationContext* opCtx,
- StringData fromNS,
- StringData toNS,
- StringData ident,
- const RecordStore* originalRecordStore ) const;
+ virtual SortedDataInterface* getSortedDataInterface(OperationContext* opCtx,
+ StringData ident,
+ const IndexDescriptor* desc);
- virtual int flushAllFiles( bool sync );
+ virtual Status dropIdent(OperationContext* opCtx, StringData ident);
- virtual int64_t getIdentSize( OperationContext* opCtx,
- StringData ident );
+ virtual Status okToRename(OperationContext* opCtx,
+ StringData fromNS,
+ StringData toNS,
+ StringData ident,
+ const RecordStore* originalRecordStore) const;
- virtual Status repairIdent( OperationContext* opCtx,
- StringData ident );
+ virtual int flushAllFiles(bool sync);
- virtual bool hasIdent(OperationContext* opCtx, StringData ident) const;
+ virtual int64_t getIdentSize(OperationContext* opCtx, StringData ident);
- std::vector<std::string> getAllIdents( OperationContext* opCtx ) const;
+ virtual Status repairIdent(OperationContext* opCtx, StringData ident);
- virtual void cleanShutdown();
+ virtual bool hasIdent(OperationContext* opCtx, StringData ident) const;
- // wiredtiger specific
- // Calls WT_CONNECTION::reconfigure on the underlying WT_CONNECTION
- // held by this class
- int reconfigure(const char* str);
+ std::vector<std::string> getAllIdents(OperationContext* opCtx) const;
- WT_CONNECTION* getConnection() { return _conn; }
- void dropAllQueued();
- bool haveDropsQueued() const;
+ virtual void cleanShutdown();
- void syncSizeInfo(bool sync) const;
+ // wiredtiger specific
+ // Calls WT_CONNECTION::reconfigure on the underlying WT_CONNECTION
+ // held by this class
+ int reconfigure(const char* str);
- /**
- * Initializes a background job to remove excess documents in the oplog collections.
- * This applies to the capped collections in the local.oplog.* namespaces (specifically
- * local.oplog.rs for replica sets and local.oplog.$main for master/slave replication).
- * Returns true if a background job is running for the namespace.
- */
- static bool initRsOplogBackgroundThread(StringData ns);
+ WT_CONNECTION* getConnection() {
+ return _conn;
+ }
+ void dropAllQueued();
+ bool haveDropsQueued() const;
- private:
+ void syncSizeInfo(bool sync) const;
- Status _salvageIfNeeded(const char* uri);
- void _checkIdentPath( StringData ident );
+ /**
+ * Initializes a background job to remove excess documents in the oplog collections.
+ * This applies to the capped collections in the local.oplog.* namespaces (specifically
+ * local.oplog.rs for replica sets and local.oplog.$main for master/slave replication).
+ * Returns true if a background job is running for the namespace.
+ */
+ static bool initRsOplogBackgroundThread(StringData ns);
- bool _hasUri(WT_SESSION* session, const std::string& uri) const;
+private:
+ Status _salvageIfNeeded(const char* uri);
+ void _checkIdentPath(StringData ident);
- std::string _uri( StringData ident ) const;
- bool _drop( StringData ident );
+ bool _hasUri(WT_SESSION* session, const std::string& uri) const;
- WT_CONNECTION* _conn;
- WT_EVENT_HANDLER _eventHandler;
- std::unique_ptr<WiredTigerSessionCache> _sessionCache;
- std::string _path;
- bool _durable;
+ std::string _uri(StringData ident) const;
+ bool _drop(StringData ident);
- std::string _rsOptions;
- std::string _indexOptions;
+ WT_CONNECTION* _conn;
+ WT_EVENT_HANDLER _eventHandler;
+ std::unique_ptr<WiredTigerSessionCache> _sessionCache;
+ std::string _path;
+ bool _durable;
- std::set<std::string> _identToDrop;
- mutable stdx::mutex _identToDropMutex;
+ std::string _rsOptions;
+ std::string _indexOptions;
- std::unique_ptr<WiredTigerSizeStorer> _sizeStorer;
- std::string _sizeStorerUri;
- mutable ElapsedTracker _sizeStorerSyncTracker;
- };
+ std::set<std::string> _identToDrop;
+ mutable stdx::mutex _identToDropMutex;
+ std::unique_ptr<WiredTigerSizeStorer> _sizeStorer;
+ std::string _sizeStorerUri;
+ mutable ElapsedTracker _sizeStorerSyncTracker;
+};
}
diff --git a/src/mongo/db/storage/wiredtiger/wiredtiger_kv_engine_test.cpp b/src/mongo/db/storage/wiredtiger/wiredtiger_kv_engine_test.cpp
index db944d575d2..576d121bb6c 100644
--- a/src/mongo/db/storage/wiredtiger/wiredtiger_kv_engine_test.cpp
+++ b/src/mongo/db/storage/wiredtiger/wiredtiger_kv_engine_test.cpp
@@ -36,31 +36,32 @@
namespace mongo {
- class WiredTigerKVHarnessHelper : public KVHarnessHelper {
- public:
- WiredTigerKVHarnessHelper()
- : _dbpath( "wt-kv-harness" ) {
- _engine.reset( new WiredTigerKVEngine( _dbpath.path() ) );
- }
+class WiredTigerKVHarnessHelper : public KVHarnessHelper {
+public:
+ WiredTigerKVHarnessHelper() : _dbpath("wt-kv-harness") {
+ _engine.reset(new WiredTigerKVEngine(_dbpath.path()));
+ }
- virtual ~WiredTigerKVHarnessHelper() {
- _engine.reset( NULL );
- }
+ virtual ~WiredTigerKVHarnessHelper() {
+ _engine.reset(NULL);
+ }
- virtual KVEngine* restartEngine() {
- _engine.reset( NULL );
- _engine.reset( new WiredTigerKVEngine( _dbpath.path() ) );
- return _engine.get();
- }
+ virtual KVEngine* restartEngine() {
+ _engine.reset(NULL);
+ _engine.reset(new WiredTigerKVEngine(_dbpath.path()));
+ return _engine.get();
+ }
- virtual KVEngine* getEngine() { return _engine.get(); }
+ virtual KVEngine* getEngine() {
+ return _engine.get();
+ }
- private:
- unittest::TempDir _dbpath;
- std::unique_ptr<WiredTigerKVEngine> _engine;
- };
+private:
+ unittest::TempDir _dbpath;
+ std::unique_ptr<WiredTigerKVEngine> _engine;
+};
- KVHarnessHelper* KVHarnessHelper::create() {
- return new WiredTigerKVHarnessHelper();
- }
+KVHarnessHelper* KVHarnessHelper::create() {
+ return new WiredTigerKVHarnessHelper();
+}
}
diff --git a/src/mongo/db/storage/wiredtiger/wiredtiger_options_init.cpp b/src/mongo/db/storage/wiredtiger/wiredtiger_options_init.cpp
index 5cbd0ba2873..589706daed4 100644
--- a/src/mongo/db/storage/wiredtiger/wiredtiger_options_init.cpp
+++ b/src/mongo/db/storage/wiredtiger/wiredtiger_options_init.cpp
@@ -37,22 +37,21 @@
namespace mongo {
- MONGO_MODULE_STARTUP_OPTIONS_REGISTER(WiredTigerOptions)(InitializerContext* context) {
- return wiredTigerGlobalOptions.add(&moe::startupOptions);
- }
+MONGO_MODULE_STARTUP_OPTIONS_REGISTER(WiredTigerOptions)(InitializerContext* context) {
+ return wiredTigerGlobalOptions.add(&moe::startupOptions);
+}
- MONGO_STARTUP_OPTIONS_VALIDATE(WiredTigerOptions)(InitializerContext* context) {
- return Status::OK();
- }
+MONGO_STARTUP_OPTIONS_VALIDATE(WiredTigerOptions)(InitializerContext* context) {
+ return Status::OK();
+}
- MONGO_STARTUP_OPTIONS_STORE(WiredTigerOptions)(InitializerContext* context) {
- Status ret = wiredTigerGlobalOptions.store(moe::startupOptionsParsed, context->args());
- if (!ret.isOK()) {
- std::cerr << ret.toString() << std::endl;
- std::cerr << "try '" << context->args()[0] << " --help' for more information"
- << std::endl;
- ::_exit(EXIT_BADOPTIONS);
- }
- return Status::OK();
+MONGO_STARTUP_OPTIONS_STORE(WiredTigerOptions)(InitializerContext* context) {
+ Status ret = wiredTigerGlobalOptions.store(moe::startupOptionsParsed, context->args());
+ if (!ret.isOK()) {
+ std::cerr << ret.toString() << std::endl;
+ std::cerr << "try '" << context->args()[0] << " --help' for more information" << std::endl;
+ ::_exit(EXIT_BADOPTIONS);
}
+ return Status::OK();
+}
}
diff --git a/src/mongo/db/storage/wiredtiger/wiredtiger_parameters.cpp b/src/mongo/db/storage/wiredtiger/wiredtiger_parameters.cpp
index 5be812178dc..49ef155b51d 100644
--- a/src/mongo/db/storage/wiredtiger/wiredtiger_parameters.cpp
+++ b/src/mongo/db/storage/wiredtiger/wiredtiger_parameters.cpp
@@ -41,23 +41,25 @@ using std::string;
WiredTigerEngineRuntimeConfigParameter::WiredTigerEngineRuntimeConfigParameter(
WiredTigerKVEngine* engine)
- : ServerParameter(ServerParameterSet::getGlobal(),
- "wiredTigerEngineRuntimeConfig", false, true),
- _engine(engine) {}
+ : ServerParameter(
+ ServerParameterSet::getGlobal(), "wiredTigerEngineRuntimeConfig", false, true),
+ _engine(engine) {}
-void WiredTigerEngineRuntimeConfigParameter::append(OperationContext* txn, BSONObjBuilder& b,
- const std::string& name) {
+void WiredTigerEngineRuntimeConfigParameter::append(OperationContext* txn,
+ BSONObjBuilder& b,
+ const std::string& name) {
b << name << "";
}
Status WiredTigerEngineRuntimeConfigParameter::set(const BSONElement& newValueElement) {
try {
return setFromString(newValueElement.String());
- }
- catch (MsgAssertionException msg) {
- return Status(ErrorCodes::BadValue, mongoutils::str::stream() <<
- "Invalid value for wiredTigerEngineRuntimeConfig via setParameter command: "
+ } catch (MsgAssertionException msg) {
+ return Status(
+ ErrorCodes::BadValue,
+ mongoutils::str::stream()
+ << "Invalid value for wiredTigerEngineRuntimeConfig via setParameter command: "
<< newValueElement);
}
}
@@ -65,18 +67,19 @@ Status WiredTigerEngineRuntimeConfigParameter::set(const BSONElement& newValueEl
Status WiredTigerEngineRuntimeConfigParameter::setFromString(const std::string& str) {
size_t pos = str.find('\0');
if (pos != std::string::npos) {
- return Status(ErrorCodes::BadValue, (str::stream() <<
- "WiredTiger configuration strings cannot have embedded null characters. "
- "Embedded null found at position " << pos));
+ return Status(ErrorCodes::BadValue,
+ (str::stream()
+ << "WiredTiger configuration strings cannot have embedded null characters. "
+ "Embedded null found at position " << pos));
}
log() << "Reconfiguring WiredTiger storage engine with config string: \"" << str << "\"";
int ret = _engine->reconfigure(str.c_str());
if (ret != 0) {
- string result = (mongoutils::str::stream() <<
- "WiredTiger reconfiguration failed with error code (" << ret << "): "
- << wiredtiger_strerror(ret));
+ string result =
+ (mongoutils::str::stream() << "WiredTiger reconfiguration failed with error code ("
+ << ret << "): " << wiredtiger_strerror(ret));
error() << result;
return Status(ErrorCodes::BadValue, result);
@@ -84,5 +87,4 @@ Status WiredTigerEngineRuntimeConfigParameter::setFromString(const std::string&
return Status::OK();
}
-
}
diff --git a/src/mongo/db/storage/wiredtiger/wiredtiger_parameters.h b/src/mongo/db/storage/wiredtiger/wiredtiger_parameters.h
index 4198a851ce3..6742f76be99 100644
--- a/src/mongo/db/storage/wiredtiger/wiredtiger_parameters.h
+++ b/src/mongo/db/storage/wiredtiger/wiredtiger_parameters.h
@@ -32,21 +32,21 @@
namespace mongo {
- /**
- * WT_CONNECTION::reconfigure get/setParameter support
- */
- class WiredTigerEngineRuntimeConfigParameter : public ServerParameter {
- MONGO_DISALLOW_COPYING(WiredTigerEngineRuntimeConfigParameter);
- public:
- explicit WiredTigerEngineRuntimeConfigParameter(WiredTigerKVEngine* engine);
+/**
+ * WT_CONNECTION::reconfigure get/setParameter support
+ */
+class WiredTigerEngineRuntimeConfigParameter : public ServerParameter {
+ MONGO_DISALLOW_COPYING(WiredTigerEngineRuntimeConfigParameter);
+
+public:
+ explicit WiredTigerEngineRuntimeConfigParameter(WiredTigerKVEngine* engine);
- virtual void append(OperationContext* txn, BSONObjBuilder& b,
- const std::string& name);
- virtual Status set(const BSONElement& newValueElement);
+ virtual void append(OperationContext* txn, BSONObjBuilder& b, const std::string& name);
+ virtual Status set(const BSONElement& newValueElement);
- virtual Status setFromString(const std::string& str);
+ virtual Status setFromString(const std::string& str);
- private:
- WiredTigerKVEngine* _engine;
- };
+private:
+ WiredTigerKVEngine* _engine;
+};
}
diff --git a/src/mongo/db/storage/wiredtiger/wiredtiger_record_store.cpp b/src/mongo/db/storage/wiredtiger/wiredtiger_record_store.cpp
index 4970869e1e5..6e96e241bd6 100644
--- a/src/mongo/db/storage/wiredtiger/wiredtiger_record_store.cpp
+++ b/src/mongo/db/storage/wiredtiger/wiredtiger_record_store.cpp
@@ -64,1164 +64,1142 @@
namespace mongo {
- using std::unique_ptr;
- using std::string;
+using std::unique_ptr;
+using std::string;
namespace {
- static const int kMinimumRecordStoreVersion = 1;
- static const int kCurrentRecordStoreVersion = 1; // New record stores use this by default.
- static const int kMaximumRecordStoreVersion = 1;
- BOOST_STATIC_ASSERT(kCurrentRecordStoreVersion >= kMinimumRecordStoreVersion);
- BOOST_STATIC_ASSERT(kCurrentRecordStoreVersion <= kMaximumRecordStoreVersion);
+static const int kMinimumRecordStoreVersion = 1;
+static const int kCurrentRecordStoreVersion = 1; // New record stores use this by default.
+static const int kMaximumRecordStoreVersion = 1;
+BOOST_STATIC_ASSERT(kCurrentRecordStoreVersion >= kMinimumRecordStoreVersion);
+BOOST_STATIC_ASSERT(kCurrentRecordStoreVersion <= kMaximumRecordStoreVersion);
- bool shouldUseOplogHack(OperationContext* opCtx, const std::string& uri) {
- StatusWith<BSONObj> appMetadata = WiredTigerUtil::getApplicationMetadata(opCtx, uri);
- if (!appMetadata.isOK()) {
- return false;
- }
-
- return (appMetadata.getValue().getIntField("oplogKeyExtractionVersion") == 1);
+bool shouldUseOplogHack(OperationContext* opCtx, const std::string& uri) {
+ StatusWith<BSONObj> appMetadata = WiredTigerUtil::getApplicationMetadata(opCtx, uri);
+ if (!appMetadata.isOK()) {
+ return false;
}
-} // namespace
-
- MONGO_FP_DECLARE(WTWriteConflictException);
-
- const std::string kWiredTigerEngineName = "wiredTiger";
-
- class WiredTigerRecordStore::Cursor final : public RecordCursor {
- public:
- Cursor(OperationContext* txn,
- const WiredTigerRecordStore& rs,
- bool forward = true,
- bool forParallelCollectionScan = false)
- : _rs(rs)
- , _txn(txn)
- , _forward(forward)
- , _forParallelCollectionScan(forParallelCollectionScan)
- , _cursor(new WiredTigerCursor(rs.getURI(), rs.instanceId(), true, txn))
- , _readUntilForOplog(WiredTigerRecoveryUnit::get(txn)->getOplogReadTill())
- {}
-
- boost::optional<Record> next() final {
- if (_eof) return {};
-
- WT_CURSOR* c = _cursor->get();
- {
- // Nothing after the next line can throw WCEs.
- // Note that an unpositioned (or eof) WT_CURSOR returns the first/last entry in the
- // table when you call next/prev.
- int advanceRet = WT_OP_CHECK(_forward ? c->next(c) : c->prev(c));
- if (advanceRet == WT_NOTFOUND) {
- _eof = true;
- return {};
- }
- invariantWTOK(advanceRet);
- }
+ return (appMetadata.getValue().getIntField("oplogKeyExtractionVersion") == 1);
+}
- int64_t key;
- invariantWTOK(c->get_key(c, &key));
- const RecordId id = _fromKey(key);
+} // namespace
- if (!isVisible(id)) {
- _eof = true;
- return {};
- }
+MONGO_FP_DECLARE(WTWriteConflictException);
- WT_ITEM value;
- invariantWTOK(c->get_value(c, &value));
- auto data = RecordData(static_cast<const char*>(value.data), value.size);
- data.makeOwned(); // TODO delete this line once safe.
+const std::string kWiredTigerEngineName = "wiredTiger";
- _lastReturnedId = id;
- return {{id, std::move(data)}};
- }
+class WiredTigerRecordStore::Cursor final : public RecordCursor {
+public:
+ Cursor(OperationContext* txn,
+ const WiredTigerRecordStore& rs,
+ bool forward = true,
+ bool forParallelCollectionScan = false)
+ : _rs(rs),
+ _txn(txn),
+ _forward(forward),
+ _forParallelCollectionScan(forParallelCollectionScan),
+ _cursor(new WiredTigerCursor(rs.getURI(), rs.instanceId(), true, txn)),
+ _readUntilForOplog(WiredTigerRecoveryUnit::get(txn)->getOplogReadTill()) {}
- boost::optional<Record> seekExact(const RecordId& id) final {
- if (!isVisible(id)) {
- _eof = true;
- return {};
- }
+ boost::optional<Record> next() final {
+ if (_eof)
+ return {};
- WT_CURSOR* c = _cursor->get();
- c->set_key(c, _makeKey(id));
+ WT_CURSOR* c = _cursor->get();
+ {
// Nothing after the next line can throw WCEs.
- int seekRet = WT_OP_CHECK(c->search(c));
- if (seekRet == WT_NOTFOUND) {
+ // Note that an unpositioned (or eof) WT_CURSOR returns the first/last entry in the
+ // table when you call next/prev.
+ int advanceRet = WT_OP_CHECK(_forward ? c->next(c) : c->prev(c));
+ if (advanceRet == WT_NOTFOUND) {
_eof = true;
return {};
}
- invariantWTOK(seekRet);
+ invariantWTOK(advanceRet);
+ }
- WT_ITEM value;
- invariantWTOK(c->get_value(c, &value));
- auto data = RecordData(static_cast<const char*>(value.data), value.size);
- data.makeOwned(); // TODO delete this line once safe.
+ int64_t key;
+ invariantWTOK(c->get_key(c, &key));
+ const RecordId id = _fromKey(key);
- _lastReturnedId = id;
- return {{id, std::move(data)}};
+ if (!isVisible(id)) {
+ _eof = true;
+ return {};
}
- void savePositioned() final {
- // It must be safe to call save() twice in a row without calling restore().
- if (!_txn) return;
-
- // the cursor and recoveryUnit are valid on restore
- // so we just record the recoveryUnit to make sure
- _savedRecoveryUnit = _txn->recoveryUnit();
- if ( _cursor && !wt_keeptxnopen() ) {
- try {
- _cursor->reset();
- }
- catch (const WriteConflictException& wce) {
- // Ignore since this is only called when we are about to kill our transaction
- // anyway.
- }
- }
+ WT_ITEM value;
+ invariantWTOK(c->get_value(c, &value));
+ auto data = RecordData(static_cast<const char*>(value.data), value.size);
+ data.makeOwned(); // TODO delete this line once safe.
- if (_forParallelCollectionScan) {
- // Delete the cursor since we may come back to a different RecoveryUnit
- _cursor.reset();
- }
- _txn = nullptr;
- }
+ _lastReturnedId = id;
+ return {{id, std::move(data)}};
+ }
- void saveUnpositioned() final {
- savePositioned();
- _lastReturnedId = RecordId();
+ boost::optional<Record> seekExact(const RecordId& id) final {
+ if (!isVisible(id)) {
+ _eof = true;
+ return {};
}
- bool restore(OperationContext* txn) final {
- _txn = txn;
+ WT_CURSOR* c = _cursor->get();
+ c->set_key(c, _makeKey(id));
+ // Nothing after the next line can throw WCEs.
+ int seekRet = WT_OP_CHECK(c->search(c));
+ if (seekRet == WT_NOTFOUND) {
+ _eof = true;
+ return {};
+ }
+ invariantWTOK(seekRet);
- // If we've hit EOF, then this iterator is done and need not be restored.
- if (_eof) return true;
+ WT_ITEM value;
+ invariantWTOK(c->get_value(c, &value));
+ auto data = RecordData(static_cast<const char*>(value.data), value.size);
+ data.makeOwned(); // TODO delete this line once safe.
- bool needRestore = false;
+ _lastReturnedId = id;
+ return {{id, std::move(data)}};
+ }
- if (_forParallelCollectionScan) {
- needRestore = true;
- _savedRecoveryUnit = txn->recoveryUnit();
- _cursor.reset( new WiredTigerCursor( _rs.getURI(), _rs.instanceId(), true, txn ) );
- _forParallelCollectionScan = false; // we only do this the first time
+ void savePositioned() final {
+ // It must be safe to call save() twice in a row without calling restore().
+ if (!_txn)
+ return;
+
+ // the cursor and recoveryUnit are valid on restore
+ // so we just record the recoveryUnit to make sure
+ _savedRecoveryUnit = _txn->recoveryUnit();
+ if (_cursor && !wt_keeptxnopen()) {
+ try {
+ _cursor->reset();
+ } catch (const WriteConflictException& wce) {
+ // Ignore since this is only called when we are about to kill our transaction
+ // anyway.
}
- invariant( _savedRecoveryUnit == txn->recoveryUnit() );
-
- if (!needRestore && wt_keeptxnopen()) return true;
- if (_lastReturnedId.isNull()) return true;
+ }
- // This will ensure an active session exists, so any restored cursors will bind to it
- invariant(WiredTigerRecoveryUnit::get(txn)->getSession(txn) == _cursor->getSession());
+ if (_forParallelCollectionScan) {
+ // Delete the cursor since we may come back to a different RecoveryUnit
+ _cursor.reset();
+ }
+ _txn = nullptr;
+ }
- WT_CURSOR* c = _cursor->get();
- c->set_key(c, _makeKey(_lastReturnedId));
+ void saveUnpositioned() final {
+ savePositioned();
+ _lastReturnedId = RecordId();
+ }
- int cmp;
- int ret = WT_OP_CHECK(c->search_near(c, &cmp));
- if (ret == WT_NOTFOUND) {
- _eof = true;
- return !_rs._isCapped;
- }
- invariantWTOK(ret);
+ bool restore(OperationContext* txn) final {
+ _txn = txn;
- if (cmp == 0) return true; // Landed right where we left off.
+ // If we've hit EOF, then this iterator is done and need not be restored.
+ if (_eof)
+ return true;
- if (_rs._isCapped) {
- // Doc was deleted either by cappedDeleteAsNeeded() or cappedTruncateAfter().
- // It is important that we error out in this case so that consumers don't
- // silently get 'holes' when scanning capped collections. We don't make
- // this guarantee for normal collections so it is ok to skip ahead in that case.
- _eof = true;
- return false;
- }
+ bool needRestore = false;
- if (_forward && cmp > 0) {
- // We landed after where we were. Move back one so that next() will return this
- // document.
- ret = WT_OP_CHECK(c->prev(c));
- }
- else if (!_forward && cmp < 0) {
- // Do the opposite for reverse cursors.
- ret = WT_OP_CHECK(c->next(c));
- }
- if (ret != WT_NOTFOUND) invariantWTOK(ret);
+ if (_forParallelCollectionScan) {
+ needRestore = true;
+ _savedRecoveryUnit = txn->recoveryUnit();
+ _cursor.reset(new WiredTigerCursor(_rs.getURI(), _rs.instanceId(), true, txn));
+ _forParallelCollectionScan = false; // we only do this the first time
+ }
+ invariant(_savedRecoveryUnit == txn->recoveryUnit());
+ if (!needRestore && wt_keeptxnopen())
+ return true;
+ if (_lastReturnedId.isNull())
return true;
- }
- private:
- bool isVisible(const RecordId& id) {
- if (!_rs._isCapped) return true;
+ // This will ensure an active session exists, so any restored cursors will bind to it
+ invariant(WiredTigerRecoveryUnit::get(txn)->getSession(txn) == _cursor->getSession());
- if ( _readUntilForOplog.isNull() || !_rs._isOplog ) {
- // this is the normal capped case
- return !_rs.isCappedHidden(id);
- }
+ WT_CURSOR* c = _cursor->get();
+ c->set_key(c, _makeKey(_lastReturnedId));
- // this is for oplogs
- if (id == _readUntilForOplog) {
- // we allow if its been committed already
- return !_rs.isCappedHidden(id);
- }
+ int cmp;
+ int ret = WT_OP_CHECK(c->search_near(c, &cmp));
+ if (ret == WT_NOTFOUND) {
+ _eof = true;
+ return !_rs._isCapped;
+ }
+ invariantWTOK(ret);
+
+ if (cmp == 0)
+ return true; // Landed right where we left off.
- return id < _readUntilForOplog;
+ if (_rs._isCapped) {
+ // Doc was deleted either by cappedDeleteAsNeeded() or cappedTruncateAfter().
+ // It is important that we error out in this case so that consumers don't
+ // silently get 'holes' when scanning capped collections. We don't make
+ // this guarantee for normal collections so it is ok to skip ahead in that case.
+ _eof = true;
+ return false;
}
- const WiredTigerRecordStore& _rs;
- OperationContext* _txn;
- RecoveryUnit* _savedRecoveryUnit; // only used to sanity check between save/restore.
- const bool _forward;
- bool _forParallelCollectionScan; // This can go away once SERVER-17364 is resolved.
- std::unique_ptr<WiredTigerCursor> _cursor;
- bool _eof = false;
- RecordId _lastReturnedId;
- const RecordId _readUntilForOplog;
- };
-
- StatusWith<std::string> WiredTigerRecordStore::parseOptionsField(const BSONObj options) {
- StringBuilder ss;
- BSONForEach(elem, options) {
- if (elem.fieldNameStringData() == "configString") {
- if (elem.type() != String) {
- return StatusWith<std::string>(ErrorCodes::TypeMismatch, str::stream()
- << "storageEngine.wiredTiger.configString "
- << "must be a string. "
- << "Not adding 'configString' value "
- << elem << " to collection configuration");
- }
- ss << elem.valueStringData() << ',';
- }
- else {
- // Return error on first unrecognized field.
- return StatusWith<std::string>(ErrorCodes::InvalidOptions, str::stream()
- << '\'' << elem.fieldNameStringData() << '\''
- << " is not a supported option in "
- << "storageEngine.wiredTiger");
- }
+ if (_forward && cmp > 0) {
+ // We landed after where we were. Move back one so that next() will return this
+ // document.
+ ret = WT_OP_CHECK(c->prev(c));
+ } else if (!_forward && cmp < 0) {
+ // Do the opposite for reverse cursors.
+ ret = WT_OP_CHECK(c->next(c));
}
- return StatusWith<std::string>(ss.str());
+ if (ret != WT_NOTFOUND)
+ invariantWTOK(ret);
+
+ return true;
}
- // static
- StatusWith<std::string> WiredTigerRecordStore::generateCreateString(
- StringData ns,
- const CollectionOptions& options,
- StringData extraStrings) {
+private:
+ bool isVisible(const RecordId& id) {
+ if (!_rs._isCapped)
+ return true;
- // Separate out a prefix and suffix in the default string. User configuration will
- // override values in the prefix, but not values in the suffix.
- str::stream ss;
- ss << "type=file,";
- // Setting this larger than 10m can hurt latencies and throughput degradation if this
- // is the oplog. See SERVER-16247
- ss << "memory_page_max=10m,";
- // Choose a higher split percent, since most usage is append only. Allow some space
- // for workloads where updates increase the size of documents.
- ss << "split_pct=90,";
- ss << "leaf_value_max=64MB,";
- ss << "checksum=on,";
- if (wiredTigerGlobalOptions.useCollectionPrefixCompression) {
- ss << "prefix_compression,";
+ if (_readUntilForOplog.isNull() || !_rs._isOplog) {
+ // this is the normal capped case
+ return !_rs.isCappedHidden(id);
}
- ss << "block_compressor=" << wiredTigerGlobalOptions.collectionBlockCompressor << ",";
+ // this is for oplogs
+ if (id == _readUntilForOplog) {
+ // we allow if its been committed already
+ return !_rs.isCappedHidden(id);
+ }
- ss << WiredTigerCustomizationHooks::get(getGlobalServiceContext())->getOpenConfig(ns);
+ return id < _readUntilForOplog;
+ }
- ss << extraStrings << ",";
+ const WiredTigerRecordStore& _rs;
+ OperationContext* _txn;
+ RecoveryUnit* _savedRecoveryUnit; // only used to sanity check between save/restore.
+ const bool _forward;
+ bool _forParallelCollectionScan; // This can go away once SERVER-17364 is resolved.
+ std::unique_ptr<WiredTigerCursor> _cursor;
+ bool _eof = false;
+ RecordId _lastReturnedId;
+ const RecordId _readUntilForOplog;
+};
+
+StatusWith<std::string> WiredTigerRecordStore::parseOptionsField(const BSONObj options) {
+ StringBuilder ss;
+ BSONForEach(elem, options) {
+ if (elem.fieldNameStringData() == "configString") {
+ if (elem.type() != String) {
+ return StatusWith<std::string>(ErrorCodes::TypeMismatch,
+ str::stream()
+ << "storageEngine.wiredTiger.configString "
+ << "must be a string. "
+ << "Not adding 'configString' value " << elem
+ << " to collection configuration");
+ }
+ ss << elem.valueStringData() << ',';
+ } else {
+ // Return error on first unrecognized field.
+ return StatusWith<std::string>(ErrorCodes::InvalidOptions,
+ str::stream() << '\'' << elem.fieldNameStringData()
+ << '\'' << " is not a supported option in "
+ << "storageEngine.wiredTiger");
+ }
+ }
+ return StatusWith<std::string>(ss.str());
+}
- StatusWith<std::string> customOptions =
- parseOptionsField(options.storageEngine.getObjectField(kWiredTigerEngineName));
- if (!customOptions.isOK())
- return customOptions;
+// static
+StatusWith<std::string> WiredTigerRecordStore::generateCreateString(
+ StringData ns, const CollectionOptions& options, StringData extraStrings) {
+ // Separate out a prefix and suffix in the default string. User configuration will
+ // override values in the prefix, but not values in the suffix.
+ str::stream ss;
+ ss << "type=file,";
+ // Setting this larger than 10m can hurt latencies and throughput degradation if this
+ // is the oplog. See SERVER-16247
+ ss << "memory_page_max=10m,";
+ // Choose a higher split percent, since most usage is append only. Allow some space
+ // for workloads where updates increase the size of documents.
+ ss << "split_pct=90,";
+ ss << "leaf_value_max=64MB,";
+ ss << "checksum=on,";
+ if (wiredTigerGlobalOptions.useCollectionPrefixCompression) {
+ ss << "prefix_compression,";
+ }
- ss << customOptions.getValue();
+ ss << "block_compressor=" << wiredTigerGlobalOptions.collectionBlockCompressor << ",";
- if ( NamespaceString::oplog(ns) ) {
- // force file for oplog
- ss << "type=file,";
- // Tune down to 10m. See SERVER-16247
- ss << "memory_page_max=10m,";
- }
+ ss << WiredTigerCustomizationHooks::get(getGlobalServiceContext())->getOpenConfig(ns);
- // WARNING: No user-specified config can appear below this line. These options are required
- // for correct behavior of the server.
+ ss << extraStrings << ",";
- ss << "key_format=q,value_format=u";
+ StatusWith<std::string> customOptions =
+ parseOptionsField(options.storageEngine.getObjectField(kWiredTigerEngineName));
+ if (!customOptions.isOK())
+ return customOptions;
- // Record store metadata
- ss << ",app_metadata=(formatVersion=" << kCurrentRecordStoreVersion;
- if (NamespaceString::oplog(ns)) {
- ss << ",oplogKeyExtractionVersion=1";
- }
- ss << ")";
+ ss << customOptions.getValue();
- return StatusWith<std::string>(ss);
+ if (NamespaceString::oplog(ns)) {
+ // force file for oplog
+ ss << "type=file,";
+ // Tune down to 10m. See SERVER-16247
+ ss << "memory_page_max=10m,";
}
- WiredTigerRecordStore::WiredTigerRecordStore(OperationContext* ctx,
- StringData ns,
- StringData uri,
- bool isCapped,
- int64_t cappedMaxSize,
- int64_t cappedMaxDocs,
- CappedDocumentDeleteCallback* cappedDeleteCallback,
- WiredTigerSizeStorer* sizeStorer)
- : RecordStore( ns ),
- _uri( uri.toString() ),
- _instanceId( WiredTigerSession::genCursorId() ),
- _isCapped( isCapped ),
- _isOplog( NamespaceString::oplog( ns ) ),
- _cappedMaxSize( cappedMaxSize ),
- _cappedMaxSizeSlack( std::min(cappedMaxSize/10, int64_t(16*1024*1024)) ),
- _cappedMaxDocs( cappedMaxDocs ),
- _cappedSleep(0),
- _cappedSleepMS(0),
- _cappedDeleteCallback( cappedDeleteCallback ),
- _cappedDeleteCheckCount(0),
- _useOplogHack(shouldUseOplogHack(ctx, _uri)),
- _sizeStorer( sizeStorer ),
- _sizeStorerCounter(0),
- _shuttingDown(false)
- {
- Status versionStatus = WiredTigerUtil::checkApplicationMetadataFormatVersion(
- ctx, uri, kMinimumRecordStoreVersion, kMaximumRecordStoreVersion);
- if (!versionStatus.isOK()) {
- fassertFailedWithStatusNoTrace(28548, versionStatus);
- }
+ // WARNING: No user-specified config can appear below this line. These options are required
+ // for correct behavior of the server.
- if (_isCapped) {
- invariant(_cappedMaxSize > 0);
- invariant(_cappedMaxDocs == -1 || _cappedMaxDocs > 0);
- }
- else {
- invariant(_cappedMaxSize == -1);
- invariant(_cappedMaxDocs == -1);
- }
+ ss << "key_format=q,value_format=u";
- // Find the largest RecordId currently in use and estimate the number of records.
- Cursor cursor(ctx, *this, /*forward=*/false);
- if (auto record = cursor.next()) {
- int64_t max = _makeKey(record->id);
- _oplog_highestSeen = record->id;
- _nextIdNum.store( 1 + max );
-
- if ( _sizeStorer ) {
- long long numRecords;
- long long dataSize;
- _sizeStorer->loadFromCache( uri, &numRecords, &dataSize );
- _numRecords.store( numRecords );
- _dataSize.store( dataSize );
- _sizeStorer->onCreate( this, numRecords, dataSize );
- }
+ // Record store metadata
+ ss << ",app_metadata=(formatVersion=" << kCurrentRecordStoreVersion;
+ if (NamespaceString::oplog(ns)) {
+ ss << ",oplogKeyExtractionVersion=1";
+ }
+ ss << ")";
- else {
- LOG(1) << "Doing scan of collection " << ns << " to get size and count info";
+ return StatusWith<std::string>(ss);
+}
- _numRecords.store(0);
- _dataSize.store(0);
+WiredTigerRecordStore::WiredTigerRecordStore(OperationContext* ctx,
+ StringData ns,
+ StringData uri,
+ bool isCapped,
+ int64_t cappedMaxSize,
+ int64_t cappedMaxDocs,
+ CappedDocumentDeleteCallback* cappedDeleteCallback,
+ WiredTigerSizeStorer* sizeStorer)
+ : RecordStore(ns),
+ _uri(uri.toString()),
+ _instanceId(WiredTigerSession::genCursorId()),
+ _isCapped(isCapped),
+ _isOplog(NamespaceString::oplog(ns)),
+ _cappedMaxSize(cappedMaxSize),
+ _cappedMaxSizeSlack(std::min(cappedMaxSize / 10, int64_t(16 * 1024 * 1024))),
+ _cappedMaxDocs(cappedMaxDocs),
+ _cappedSleep(0),
+ _cappedSleepMS(0),
+ _cappedDeleteCallback(cappedDeleteCallback),
+ _cappedDeleteCheckCount(0),
+ _useOplogHack(shouldUseOplogHack(ctx, _uri)),
+ _sizeStorer(sizeStorer),
+ _sizeStorerCounter(0),
+ _shuttingDown(false) {
+ Status versionStatus = WiredTigerUtil::checkApplicationMetadataFormatVersion(
+ ctx, uri, kMinimumRecordStoreVersion, kMaximumRecordStoreVersion);
+ if (!versionStatus.isOK()) {
+ fassertFailedWithStatusNoTrace(28548, versionStatus);
+ }
- do {
- _numRecords.fetchAndAdd(1);
- _dataSize.fetchAndAdd(record->data.size());
- } while ((record = cursor.next()));
+ if (_isCapped) {
+ invariant(_cappedMaxSize > 0);
+ invariant(_cappedMaxDocs == -1 || _cappedMaxDocs > 0);
+ } else {
+ invariant(_cappedMaxSize == -1);
+ invariant(_cappedMaxDocs == -1);
+ }
- if ( _sizeStorer ) {
- _sizeStorer->storeToCache( _uri, _numRecords.load(), _dataSize.load() );
- }
- }
+ // Find the largest RecordId currently in use and estimate the number of records.
+ Cursor cursor(ctx, *this, /*forward=*/false);
+ if (auto record = cursor.next()) {
+ int64_t max = _makeKey(record->id);
+ _oplog_highestSeen = record->id;
+ _nextIdNum.store(1 + max);
+
+ if (_sizeStorer) {
+ long long numRecords;
+ long long dataSize;
+ _sizeStorer->loadFromCache(uri, &numRecords, &dataSize);
+ _numRecords.store(numRecords);
+ _dataSize.store(dataSize);
+ _sizeStorer->onCreate(this, numRecords, dataSize);
}
+
else {
- _dataSize.store(0);
- _numRecords.store(0);
- // Need to start at 1 so we are always higher than RecordId::min()
- _nextIdNum.store( 1 );
- if ( sizeStorer )
- _sizeStorer->onCreate( this, 0, 0 );
- }
+ LOG(1) << "Doing scan of collection " << ns << " to get size and count info";
- _hasBackgroundThread = WiredTigerKVEngine::initRsOplogBackgroundThread(ns);
- }
+ _numRecords.store(0);
+ _dataSize.store(0);
- WiredTigerRecordStore::~WiredTigerRecordStore() {
- {
- stdx::lock_guard<stdx::timed_mutex> lk(_cappedDeleterMutex);
- _shuttingDown = true;
- }
+ do {
+ _numRecords.fetchAndAdd(1);
+ _dataSize.fetchAndAdd(record->data.size());
+ } while ((record = cursor.next()));
- LOG(1) << "~WiredTigerRecordStore for: " << ns();
- if ( _sizeStorer ) {
- _sizeStorer->onDestroy( this );
+ if (_sizeStorer) {
+ _sizeStorer->storeToCache(_uri, _numRecords.load(), _dataSize.load());
+ }
}
+ } else {
+ _dataSize.store(0);
+ _numRecords.store(0);
+ // Need to start at 1 so we are always higher than RecordId::min()
+ _nextIdNum.store(1);
+ if (sizeStorer)
+ _sizeStorer->onCreate(this, 0, 0);
}
- const char* WiredTigerRecordStore::name() const {
- return kWiredTigerEngineName.c_str();
- }
+ _hasBackgroundThread = WiredTigerKVEngine::initRsOplogBackgroundThread(ns);
+}
- bool WiredTigerRecordStore::inShutdown() const {
+WiredTigerRecordStore::~WiredTigerRecordStore() {
+ {
stdx::lock_guard<stdx::timed_mutex> lk(_cappedDeleterMutex);
- return _shuttingDown;
+ _shuttingDown = true;
}
- long long WiredTigerRecordStore::dataSize( OperationContext *txn ) const {
- return _dataSize.load();
+ LOG(1) << "~WiredTigerRecordStore for: " << ns();
+ if (_sizeStorer) {
+ _sizeStorer->onDestroy(this);
}
+}
- long long WiredTigerRecordStore::numRecords( OperationContext *txn ) const {
- return _numRecords.load();
- }
+const char* WiredTigerRecordStore::name() const {
+ return kWiredTigerEngineName.c_str();
+}
- bool WiredTigerRecordStore::isCapped() const {
- return _isCapped;
- }
+bool WiredTigerRecordStore::inShutdown() const {
+ stdx::lock_guard<stdx::timed_mutex> lk(_cappedDeleterMutex);
+ return _shuttingDown;
+}
- int64_t WiredTigerRecordStore::cappedMaxDocs() const {
- invariant(_isCapped);
- return _cappedMaxDocs;
- }
+long long WiredTigerRecordStore::dataSize(OperationContext* txn) const {
+ return _dataSize.load();
+}
- int64_t WiredTigerRecordStore::cappedMaxSize() const {
- invariant(_isCapped);
- return _cappedMaxSize;
- }
+long long WiredTigerRecordStore::numRecords(OperationContext* txn) const {
+ return _numRecords.load();
+}
- int64_t WiredTigerRecordStore::storageSize( OperationContext* txn,
- BSONObjBuilder* extraInfo,
- int infoLevel ) const {
- WiredTigerSession* session = WiredTigerRecoveryUnit::get(txn)->getSession(txn);
- StatusWith<int64_t> result = WiredTigerUtil::getStatisticsValueAs<int64_t>(
- session->getSession(),
- "statistics:" + getURI(), "statistics=(size)", WT_STAT_DSRC_BLOCK_SIZE);
- uassertStatusOK(result.getStatus());
+bool WiredTigerRecordStore::isCapped() const {
+ return _isCapped;
+}
- int64_t size = result.getValue();
+int64_t WiredTigerRecordStore::cappedMaxDocs() const {
+ invariant(_isCapped);
+ return _cappedMaxDocs;
+}
- if ( size == 0 && _isCapped ) {
- // Many things assume an empty capped collection still takes up space.
- return 1;
- }
- return size;
+int64_t WiredTigerRecordStore::cappedMaxSize() const {
+ invariant(_isCapped);
+ return _cappedMaxSize;
+}
+
+int64_t WiredTigerRecordStore::storageSize(OperationContext* txn,
+ BSONObjBuilder* extraInfo,
+ int infoLevel) const {
+ WiredTigerSession* session = WiredTigerRecoveryUnit::get(txn)->getSession(txn);
+ StatusWith<int64_t> result =
+ WiredTigerUtil::getStatisticsValueAs<int64_t>(session->getSession(),
+ "statistics:" + getURI(),
+ "statistics=(size)",
+ WT_STAT_DSRC_BLOCK_SIZE);
+ uassertStatusOK(result.getStatus());
+
+ int64_t size = result.getValue();
+
+ if (size == 0 && _isCapped) {
+ // Many things assume an empty capped collection still takes up space.
+ return 1;
}
+ return size;
+}
- // Retrieve the value from a positioned cursor.
- RecordData WiredTigerRecordStore::_getData(const WiredTigerCursor& cursor) const {
- WT_ITEM value;
- int ret = cursor->get_value(cursor.get(), &value);
- invariantWTOK(ret);
+// Retrieve the value from a positioned cursor.
+RecordData WiredTigerRecordStore::_getData(const WiredTigerCursor& cursor) const {
+ WT_ITEM value;
+ int ret = cursor->get_value(cursor.get(), &value);
+ invariantWTOK(ret);
- SharedBuffer data = SharedBuffer::allocate(value.size);
- memcpy( data.get(), value.data, value.size );
- return RecordData(data, value.size);
- }
+ SharedBuffer data = SharedBuffer::allocate(value.size);
+ memcpy(data.get(), value.data, value.size);
+ return RecordData(data, value.size);
+}
- RecordData WiredTigerRecordStore::dataFor(OperationContext* txn, const RecordId& loc) const {
- // ownership passes to the shared_array created below
- WiredTigerCursor curwrap( _uri, _instanceId, true, txn);
- WT_CURSOR *c = curwrap.get();
- invariant( c );
- c->set_key(c, _makeKey(loc));
- int ret = WT_OP_CHECK(c->search(c));
- massert(28556, "Didn't find RecordId in WiredTigerRecordStore", ret != WT_NOTFOUND);
- invariantWTOK(ret);
- return _getData(curwrap);
- }
+RecordData WiredTigerRecordStore::dataFor(OperationContext* txn, const RecordId& loc) const {
+ // ownership passes to the shared_array created below
+ WiredTigerCursor curwrap(_uri, _instanceId, true, txn);
+ WT_CURSOR* c = curwrap.get();
+ invariant(c);
+ c->set_key(c, _makeKey(loc));
+ int ret = WT_OP_CHECK(c->search(c));
+ massert(28556, "Didn't find RecordId in WiredTigerRecordStore", ret != WT_NOTFOUND);
+ invariantWTOK(ret);
+ return _getData(curwrap);
+}
- bool WiredTigerRecordStore::findRecord( OperationContext* txn,
- const RecordId& loc, RecordData* out ) const {
- WiredTigerCursor curwrap( _uri, _instanceId, true, txn);
- WT_CURSOR *c = curwrap.get();
- invariant( c );
- c->set_key(c, _makeKey(loc));
- int ret = WT_OP_CHECK(c->search(c));
- if (ret == WT_NOTFOUND) {
- return false;
- }
- invariantWTOK(ret);
- *out = _getData(curwrap);
- return true;
+bool WiredTigerRecordStore::findRecord(OperationContext* txn,
+ const RecordId& loc,
+ RecordData* out) const {
+ WiredTigerCursor curwrap(_uri, _instanceId, true, txn);
+ WT_CURSOR* c = curwrap.get();
+ invariant(c);
+ c->set_key(c, _makeKey(loc));
+ int ret = WT_OP_CHECK(c->search(c));
+ if (ret == WT_NOTFOUND) {
+ return false;
}
+ invariantWTOK(ret);
+ *out = _getData(curwrap);
+ return true;
+}
- void WiredTigerRecordStore::deleteRecord( OperationContext* txn, const RecordId& loc ) {
- WiredTigerCursor cursor( _uri, _instanceId, true, txn );
- cursor.assertInActiveTxn();
- WT_CURSOR *c = cursor.get();
- c->set_key(c, _makeKey(loc));
- int ret = WT_OP_CHECK(c->search(c));
- invariantWTOK(ret);
+void WiredTigerRecordStore::deleteRecord(OperationContext* txn, const RecordId& loc) {
+ WiredTigerCursor cursor(_uri, _instanceId, true, txn);
+ cursor.assertInActiveTxn();
+ WT_CURSOR* c = cursor.get();
+ c->set_key(c, _makeKey(loc));
+ int ret = WT_OP_CHECK(c->search(c));
+ invariantWTOK(ret);
- WT_ITEM old_value;
- ret = c->get_value(c, &old_value);
- invariantWTOK(ret);
+ WT_ITEM old_value;
+ ret = c->get_value(c, &old_value);
+ invariantWTOK(ret);
- int old_length = old_value.size;
+ int old_length = old_value.size;
- ret = WT_OP_CHECK(c->remove(c));
- invariantWTOK(ret);
+ ret = WT_OP_CHECK(c->remove(c));
+ invariantWTOK(ret);
- _changeNumRecords(txn, -1);
- _increaseDataSize(txn, -old_length);
- }
+ _changeNumRecords(txn, -1);
+ _increaseDataSize(txn, -old_length);
+}
- bool WiredTigerRecordStore::cappedAndNeedDelete() const {
- if (!_isCapped)
- return false;
+bool WiredTigerRecordStore::cappedAndNeedDelete() const {
+ if (!_isCapped)
+ return false;
- if (_dataSize.load() >= _cappedMaxSize)
- return true;
+ if (_dataSize.load() >= _cappedMaxSize)
+ return true;
- if ((_cappedMaxDocs != -1) && (_numRecords.load() > _cappedMaxDocs))
- return true;
+ if ((_cappedMaxDocs != -1) && (_numRecords.load() > _cappedMaxDocs))
+ return true;
- return false;
- }
+ return false;
+}
- int64_t WiredTigerRecordStore::cappedDeleteAsNeeded(OperationContext* txn,
- const RecordId& justInserted) {
+int64_t WiredTigerRecordStore::cappedDeleteAsNeeded(OperationContext* txn,
+ const RecordId& justInserted) {
+ // We only want to do the checks occasionally as they are expensive.
+ // This variable isn't thread safe, but has loose semantics anyway.
+ dassert(!_isOplog || _cappedMaxDocs == -1);
- // We only want to do the checks occasionally as they are expensive.
- // This variable isn't thread safe, but has loose semantics anyway.
- dassert( !_isOplog || _cappedMaxDocs == -1 );
+ if (!cappedAndNeedDelete())
+ return 0;
- if (!cappedAndNeedDelete())
- return 0;
+ // ensure only one thread at a time can do deletes, otherwise they'll conflict.
+ stdx::unique_lock<stdx::timed_mutex> lock(_cappedDeleterMutex, stdx::defer_lock);
- // ensure only one thread at a time can do deletes, otherwise they'll conflict.
- stdx::unique_lock<stdx::timed_mutex> lock(_cappedDeleterMutex, stdx::defer_lock);
+ if (_cappedMaxDocs != -1) {
+ lock.lock(); // Max docs has to be exact, so have to check every time.
+ } else if (_hasBackgroundThread) {
+ // We are foreground, and there is a background thread,
- if (_cappedMaxDocs != -1) {
- lock.lock(); // Max docs has to be exact, so have to check every time.
+ // Check if we need some back pressure.
+ if ((_dataSize.load() - _cappedMaxSize) < _cappedMaxSizeSlack) {
+ return 0;
}
- else if(_hasBackgroundThread) {
- // We are foreground, and there is a background thread,
- // Check if we need some back pressure.
- if ((_dataSize.load() - _cappedMaxSize) < _cappedMaxSizeSlack) {
+ // Back pressure needed!
+ // We're not actually going to delete anything, but we're going to syncronize
+ // on the deleter thread.
+ // Don't wait forever: we're in a transaction, we could block eviction.
+ if (!lock.try_lock()) {
+ Date_t before = Date_t::now();
+ (void)lock.timed_lock(boost::posix_time::millisec(200));
+ stdx::chrono::milliseconds delay = Date_t::now() - before;
+ _cappedSleep.fetchAndAdd(1);
+ _cappedSleepMS.fetchAndAdd(delay.count());
+ }
+ return 0;
+ } else {
+ if (!lock.try_lock()) {
+ // Someone else is deleting old records. Apply back-pressure if too far behind,
+ // otherwise continue.
+ if ((_dataSize.load() - _cappedMaxSize) < _cappedMaxSizeSlack)
return 0;
- }
- // Back pressure needed!
- // We're not actually going to delete anything, but we're going to syncronize
- // on the deleter thread.
// Don't wait forever: we're in a transaction, we could block eviction.
- if (!lock.try_lock()) {
- Date_t before = Date_t::now();
- (void)lock.timed_lock(boost::posix_time::millisec(200));
- stdx::chrono::milliseconds delay = Date_t::now() - before;
- _cappedSleep.fetchAndAdd(1);
- _cappedSleepMS.fetchAndAdd(delay.count());
- }
- return 0;
- }
- else {
- if (!lock.try_lock()) {
- // Someone else is deleting old records. Apply back-pressure if too far behind,
- // otherwise continue.
- if ((_dataSize.load() - _cappedMaxSize) < _cappedMaxSizeSlack)
- return 0;
-
- // Don't wait forever: we're in a transaction, we could block eviction.
- Date_t before = Date_t::now();
- bool gotLock = lock.timed_lock(boost::posix_time::millisec(200));
- stdx::chrono::milliseconds delay = Date_t::now() - before;
- _cappedSleep.fetchAndAdd(1);
- _cappedSleepMS.fetchAndAdd(delay.count());
- if (!gotLock)
- return 0;
-
- // If we already waited, let someone else do cleanup unless we are significantly
- // over the limit.
- if ((_dataSize.load() - _cappedMaxSize) < (2 * _cappedMaxSizeSlack))
- return 0;
- }
- }
+ Date_t before = Date_t::now();
+ bool gotLock = lock.timed_lock(boost::posix_time::millisec(200));
+ stdx::chrono::milliseconds delay = Date_t::now() - before;
+ _cappedSleep.fetchAndAdd(1);
+ _cappedSleepMS.fetchAndAdd(delay.count());
+ if (!gotLock)
+ return 0;
- return cappedDeleteAsNeeded_inlock(txn, justInserted);
+ // If we already waited, let someone else do cleanup unless we are significantly
+ // over the limit.
+ if ((_dataSize.load() - _cappedMaxSize) < (2 * _cappedMaxSizeSlack))
+ return 0;
+ }
}
- int64_t WiredTigerRecordStore::cappedDeleteAsNeeded_inlock(OperationContext* txn,
- const RecordId& justInserted) {
- // we do this is a side transaction in case it aborts
- WiredTigerRecoveryUnit* realRecoveryUnit =
- checked_cast<WiredTigerRecoveryUnit*>( txn->releaseRecoveryUnit() );
- invariant( realRecoveryUnit );
- WiredTigerSessionCache* sc = realRecoveryUnit->getSessionCache();
- OperationContext::RecoveryUnitState const realRUstate =
- txn->setRecoveryUnit(new WiredTigerRecoveryUnit(sc),
- OperationContext::kNotInUnitOfWork);
-
- WiredTigerRecoveryUnit::get(txn)->markNoTicketRequired(); // realRecoveryUnit already has
- WT_SESSION* session = WiredTigerRecoveryUnit::get(txn)->getSession(txn)->getSession();
-
- int64_t dataSize = _dataSize.load();
- int64_t numRecords = _numRecords.load();
-
- int64_t sizeOverCap = (dataSize > _cappedMaxSize) ? dataSize - _cappedMaxSize : 0;
- int64_t sizeSaved = 0;
- int64_t docsOverCap = 0, docsRemoved = 0;
- if (_cappedMaxDocs != -1 && numRecords > _cappedMaxDocs)
- docsOverCap = numRecords - _cappedMaxDocs;
-
- try {
- WriteUnitOfWork wuow(txn);
-
- WiredTigerCursor curwrap( _uri, _instanceId, true, txn);
- WT_CURSOR *c = curwrap.get();
- RecordId newestOld;
- int ret = 0;
- while ((sizeSaved < sizeOverCap || docsRemoved < docsOverCap) &&
- (docsRemoved < 20000) &&
- (ret = WT_OP_CHECK(c->next(c))) == 0) {
-
- int64_t key;
- ret = c->get_key(c, &key);
- invariantWTOK(ret);
+ return cappedDeleteAsNeeded_inlock(txn, justInserted);
+}
+
+int64_t WiredTigerRecordStore::cappedDeleteAsNeeded_inlock(OperationContext* txn,
+ const RecordId& justInserted) {
+ // we do this is a side transaction in case it aborts
+ WiredTigerRecoveryUnit* realRecoveryUnit =
+ checked_cast<WiredTigerRecoveryUnit*>(txn->releaseRecoveryUnit());
+ invariant(realRecoveryUnit);
+ WiredTigerSessionCache* sc = realRecoveryUnit->getSessionCache();
+ OperationContext::RecoveryUnitState const realRUstate =
+ txn->setRecoveryUnit(new WiredTigerRecoveryUnit(sc), OperationContext::kNotInUnitOfWork);
+
+ WiredTigerRecoveryUnit::get(txn)->markNoTicketRequired(); // realRecoveryUnit already has
+ WT_SESSION* session = WiredTigerRecoveryUnit::get(txn)->getSession(txn)->getSession();
+
+ int64_t dataSize = _dataSize.load();
+ int64_t numRecords = _numRecords.load();
+
+ int64_t sizeOverCap = (dataSize > _cappedMaxSize) ? dataSize - _cappedMaxSize : 0;
+ int64_t sizeSaved = 0;
+ int64_t docsOverCap = 0, docsRemoved = 0;
+ if (_cappedMaxDocs != -1 && numRecords > _cappedMaxDocs)
+ docsOverCap = numRecords - _cappedMaxDocs;
+
+ try {
+ WriteUnitOfWork wuow(txn);
+
+ WiredTigerCursor curwrap(_uri, _instanceId, true, txn);
+ WT_CURSOR* c = curwrap.get();
+ RecordId newestOld;
+ int ret = 0;
+ while ((sizeSaved < sizeOverCap || docsRemoved < docsOverCap) && (docsRemoved < 20000) &&
+ (ret = WT_OP_CHECK(c->next(c))) == 0) {
+ int64_t key;
+ ret = c->get_key(c, &key);
+ invariantWTOK(ret);
- // don't go past the record we just inserted
- newestOld = _fromKey(key);
- if ( newestOld >= justInserted ) // TODO: use oldest uncommitted instead
- break;
+ // don't go past the record we just inserted
+ newestOld = _fromKey(key);
+ if (newestOld >= justInserted) // TODO: use oldest uncommitted instead
+ break;
- if ( _shuttingDown )
- break;
+ if (_shuttingDown)
+ break;
- WT_ITEM old_value;
- invariantWTOK(c->get_value(c, &old_value));
+ WT_ITEM old_value;
+ invariantWTOK(c->get_value(c, &old_value));
- ++docsRemoved;
- sizeSaved += old_value.size;
+ ++docsRemoved;
+ sizeSaved += old_value.size;
- if ( _cappedDeleteCallback ) {
- uassertStatusOK(
- _cappedDeleteCallback->aboutToDeleteCapped(
- txn,
- newestOld,
- RecordData(static_cast<const char*>(old_value.data), old_value.size)));
- }
+ if (_cappedDeleteCallback) {
+ uassertStatusOK(_cappedDeleteCallback->aboutToDeleteCapped(
+ txn,
+ newestOld,
+ RecordData(static_cast<const char*>(old_value.data), old_value.size)));
}
+ }
- if (ret != WT_NOTFOUND) {
- invariantWTOK(ret);
+ if (ret != WT_NOTFOUND) {
+ invariantWTOK(ret);
+ }
+
+ if (docsRemoved > 0) {
+ // if we scanned to the end of the collection or past our insert, go back one
+ if (ret == WT_NOTFOUND || newestOld >= justInserted) {
+ ret = WT_OP_CHECK(c->prev(c));
}
+ invariantWTOK(ret);
- if (docsRemoved > 0) {
- // if we scanned to the end of the collection or past our insert, go back one
- if (ret == WT_NOTFOUND || newestOld >= justInserted) {
- ret = WT_OP_CHECK(c->prev(c));
- }
- invariantWTOK(ret);
+ WiredTigerCursor startWrap(_uri, _instanceId, true, txn);
+ WT_CURSOR* start = startWrap.get();
+ ret = WT_OP_CHECK(start->next(start));
+ invariantWTOK(ret);
- WiredTigerCursor startWrap( _uri, _instanceId, true, txn);
- WT_CURSOR* start = startWrap.get();
- ret = WT_OP_CHECK(start->next(start));
+ ret = session->truncate(session, NULL, start, c, NULL);
+ if (ret == ENOENT || ret == WT_NOTFOUND) {
+ // TODO we should remove this case once SERVER-17141 is resolved
+ log() << "Soft failure truncating capped collection. Will try again later.";
+ docsRemoved = 0;
+ } else {
invariantWTOK(ret);
-
- ret = session->truncate(session, NULL, start, c, NULL);
- if (ret == ENOENT || ret == WT_NOTFOUND) {
- // TODO we should remove this case once SERVER-17141 is resolved
- log() << "Soft failure truncating capped collection. Will try again later.";
- docsRemoved = 0;
- }
- else {
- invariantWTOK(ret);
- _changeNumRecords(txn, -docsRemoved);
- _increaseDataSize(txn, -sizeSaved);
- wuow.commit();
- }
+ _changeNumRecords(txn, -docsRemoved);
+ _increaseDataSize(txn, -sizeSaved);
+ wuow.commit();
}
}
- catch ( const WriteConflictException& wce ) {
- delete txn->releaseRecoveryUnit();
- txn->setRecoveryUnit(realRecoveryUnit, realRUstate);
- log() << "got conflict truncating capped, ignoring";
- return 0;
- }
- catch ( ... ) {
- delete txn->releaseRecoveryUnit();
- txn->setRecoveryUnit(realRecoveryUnit, realRUstate);
- throw;
- }
-
+ } catch (const WriteConflictException& wce) {
delete txn->releaseRecoveryUnit();
txn->setRecoveryUnit(realRecoveryUnit, realRUstate);
- return docsRemoved;
- }
-
- StatusWith<RecordId> WiredTigerRecordStore::extractAndCheckLocForOplog(const char* data,
- int len) {
- return oploghack::extractKey(data, len);
+ log() << "got conflict truncating capped, ignoring";
+ return 0;
+ } catch (...) {
+ delete txn->releaseRecoveryUnit();
+ txn->setRecoveryUnit(realRecoveryUnit, realRUstate);
+ throw;
}
- StatusWith<RecordId> WiredTigerRecordStore::insertRecord( OperationContext* txn,
- const char* data,
- int len,
- bool enforceQuota ) {
- if ( _isCapped && len > _cappedMaxSize ) {
- return StatusWith<RecordId>( ErrorCodes::BadValue,
- "object to insert exceeds cappedMaxSize" );
- }
-
- RecordId loc;
- if ( _useOplogHack ) {
- StatusWith<RecordId> status = extractAndCheckLocForOplog(data, len);
- if (!status.isOK())
- return status;
- loc = status.getValue();
- if ( loc > _oplog_highestSeen ) {
- stdx::lock_guard<stdx::mutex> lk( _uncommittedDiskLocsMutex );
- if ( loc > _oplog_highestSeen ) {
- _oplog_highestSeen = loc;
- }
- }
- }
- else if ( _isCapped ) {
- stdx::lock_guard<stdx::mutex> lk( _uncommittedDiskLocsMutex );
- loc = _nextId();
- _addUncommitedDiskLoc_inlock( txn, loc );
- }
- else {
- loc = _nextId();
- }
-
- WiredTigerCursor curwrap( _uri, _instanceId, true, txn);
- curwrap.assertInActiveTxn();
- WT_CURSOR *c = curwrap.get();
- invariant( c );
-
- c->set_key(c, _makeKey(loc));
- WiredTigerItem value(data, len);
- c->set_value(c, value.Get());
- int ret = WT_OP_CHECK(c->insert(c));
- if (ret) {
- return StatusWith<RecordId>(wtRCToStatus(ret, "WiredTigerRecordStore::insertRecord"));
- }
-
- _changeNumRecords( txn, 1 );
- _increaseDataSize( txn, len );
+ delete txn->releaseRecoveryUnit();
+ txn->setRecoveryUnit(realRecoveryUnit, realRUstate);
+ return docsRemoved;
+}
- cappedDeleteAsNeeded(txn, loc);
+StatusWith<RecordId> WiredTigerRecordStore::extractAndCheckLocForOplog(const char* data, int len) {
+ return oploghack::extractKey(data, len);
+}
- return StatusWith<RecordId>( loc );
+StatusWith<RecordId> WiredTigerRecordStore::insertRecord(OperationContext* txn,
+ const char* data,
+ int len,
+ bool enforceQuota) {
+ if (_isCapped && len > _cappedMaxSize) {
+ return StatusWith<RecordId>(ErrorCodes::BadValue, "object to insert exceeds cappedMaxSize");
}
- void WiredTigerRecordStore::dealtWithCappedLoc( const RecordId& loc ) {
- stdx::lock_guard<stdx::mutex> lk( _uncommittedDiskLocsMutex );
- SortedDiskLocs::iterator it = std::find(_uncommittedDiskLocs.begin(),
- _uncommittedDiskLocs.end(),
- loc);
- invariant(it != _uncommittedDiskLocs.end());
- _uncommittedDiskLocs.erase(it);
+ RecordId loc;
+ if (_useOplogHack) {
+ StatusWith<RecordId> status = extractAndCheckLocForOplog(data, len);
+ if (!status.isOK())
+ return status;
+ loc = status.getValue();
+ if (loc > _oplog_highestSeen) {
+ stdx::lock_guard<stdx::mutex> lk(_uncommittedDiskLocsMutex);
+ if (loc > _oplog_highestSeen) {
+ _oplog_highestSeen = loc;
+ }
+ }
+ } else if (_isCapped) {
+ stdx::lock_guard<stdx::mutex> lk(_uncommittedDiskLocsMutex);
+ loc = _nextId();
+ _addUncommitedDiskLoc_inlock(txn, loc);
+ } else {
+ loc = _nextId();
}
- bool WiredTigerRecordStore::isCappedHidden( const RecordId& loc ) const {
- stdx::lock_guard<stdx::mutex> lk( _uncommittedDiskLocsMutex );
- if (_uncommittedDiskLocs.empty()) {
- return false;
- }
- return _uncommittedDiskLocs.front() <= loc;
+ WiredTigerCursor curwrap(_uri, _instanceId, true, txn);
+ curwrap.assertInActiveTxn();
+ WT_CURSOR* c = curwrap.get();
+ invariant(c);
+
+ c->set_key(c, _makeKey(loc));
+ WiredTigerItem value(data, len);
+ c->set_value(c, value.Get());
+ int ret = WT_OP_CHECK(c->insert(c));
+ if (ret) {
+ return StatusWith<RecordId>(wtRCToStatus(ret, "WiredTigerRecordStore::insertRecord"));
}
- StatusWith<RecordId> WiredTigerRecordStore::insertRecord( OperationContext* txn,
- const DocWriter* doc,
- bool enforceQuota ) {
- const int len = doc->documentSize();
+ _changeNumRecords(txn, 1);
+ _increaseDataSize(txn, len);
- boost::shared_array<char> buf( new char[len] );
- doc->writeDocument( buf.get() );
+ cappedDeleteAsNeeded(txn, loc);
- return insertRecord( txn, buf.get(), len, enforceQuota );
- }
+ return StatusWith<RecordId>(loc);
+}
- StatusWith<RecordId> WiredTigerRecordStore::updateRecord( OperationContext* txn,
- const RecordId& loc,
- const char* data,
- int len,
- bool enforceQuota,
- UpdateNotifier* notifier ) {
- WiredTigerCursor curwrap( _uri, _instanceId, true, txn);
- curwrap.assertInActiveTxn();
- WT_CURSOR *c = curwrap.get();
- invariant( c );
- c->set_key(c, _makeKey(loc));
- int ret = WT_OP_CHECK(c->search(c));
- invariantWTOK(ret);
+void WiredTigerRecordStore::dealtWithCappedLoc(const RecordId& loc) {
+ stdx::lock_guard<stdx::mutex> lk(_uncommittedDiskLocsMutex);
+ SortedDiskLocs::iterator it =
+ std::find(_uncommittedDiskLocs.begin(), _uncommittedDiskLocs.end(), loc);
+ invariant(it != _uncommittedDiskLocs.end());
+ _uncommittedDiskLocs.erase(it);
+}
- WT_ITEM old_value;
- ret = c->get_value(c, &old_value);
- invariantWTOK(ret);
+bool WiredTigerRecordStore::isCappedHidden(const RecordId& loc) const {
+ stdx::lock_guard<stdx::mutex> lk(_uncommittedDiskLocsMutex);
+ if (_uncommittedDiskLocs.empty()) {
+ return false;
+ }
+ return _uncommittedDiskLocs.front() <= loc;
+}
- int old_length = old_value.size;
+StatusWith<RecordId> WiredTigerRecordStore::insertRecord(OperationContext* txn,
+ const DocWriter* doc,
+ bool enforceQuota) {
+ const int len = doc->documentSize();
- c->set_key(c, _makeKey(loc));
- WiredTigerItem value(data, len);
- c->set_value(c, value.Get());
- ret = WT_OP_CHECK(c->insert(c));
- invariantWTOK(ret);
+ boost::shared_array<char> buf(new char[len]);
+ doc->writeDocument(buf.get());
- _increaseDataSize(txn, len - old_length);
+ return insertRecord(txn, buf.get(), len, enforceQuota);
+}
- cappedDeleteAsNeeded(txn, loc);
+StatusWith<RecordId> WiredTigerRecordStore::updateRecord(OperationContext* txn,
+ const RecordId& loc,
+ const char* data,
+ int len,
+ bool enforceQuota,
+ UpdateNotifier* notifier) {
+ WiredTigerCursor curwrap(_uri, _instanceId, true, txn);
+ curwrap.assertInActiveTxn();
+ WT_CURSOR* c = curwrap.get();
+ invariant(c);
+ c->set_key(c, _makeKey(loc));
+ int ret = WT_OP_CHECK(c->search(c));
+ invariantWTOK(ret);
+
+ WT_ITEM old_value;
+ ret = c->get_value(c, &old_value);
+ invariantWTOK(ret);
+
+ int old_length = old_value.size;
+
+ c->set_key(c, _makeKey(loc));
+ WiredTigerItem value(data, len);
+ c->set_value(c, value.Get());
+ ret = WT_OP_CHECK(c->insert(c));
+ invariantWTOK(ret);
+
+ _increaseDataSize(txn, len - old_length);
+
+ cappedDeleteAsNeeded(txn, loc);
+
+ return StatusWith<RecordId>(loc);
+}
- return StatusWith<RecordId>( loc );
- }
+bool WiredTigerRecordStore::updateWithDamagesSupported() const {
+ return false;
+}
- bool WiredTigerRecordStore::updateWithDamagesSupported() const {
- return false;
- }
+Status WiredTigerRecordStore::updateWithDamages(OperationContext* txn,
+ const RecordId& loc,
+ const RecordData& oldRec,
+ const char* damageSource,
+ const mutablebson::DamageVector& damages) {
+ invariant(false);
+}
- Status WiredTigerRecordStore::updateWithDamages( OperationContext* txn,
- const RecordId& loc,
- const RecordData& oldRec,
- const char* damageSource,
- const mutablebson::DamageVector& damages ) {
- invariant(false);
+void WiredTigerRecordStore::_oplogSetStartHack(WiredTigerRecoveryUnit* wru) const {
+ stdx::lock_guard<stdx::mutex> lk(_uncommittedDiskLocsMutex);
+ if (_uncommittedDiskLocs.empty()) {
+ wru->setOplogReadTill(_oplog_highestSeen);
+ } else {
+ wru->setOplogReadTill(_uncommittedDiskLocs.front());
}
+}
- void WiredTigerRecordStore::_oplogSetStartHack( WiredTigerRecoveryUnit* wru ) const {
- stdx::lock_guard<stdx::mutex> lk( _uncommittedDiskLocsMutex );
- if ( _uncommittedDiskLocs.empty() ) {
- wru->setOplogReadTill( _oplog_highestSeen );
- }
- else {
- wru->setOplogReadTill( _uncommittedDiskLocs.front() );
+std::unique_ptr<RecordCursor> WiredTigerRecordStore::getCursor(OperationContext* txn,
+ bool forward) const {
+ if (_isOplog && forward) {
+ WiredTigerRecoveryUnit* wru = WiredTigerRecoveryUnit::get(txn);
+ if (!wru->inActiveTxn() || wru->getOplogReadTill().isNull()) {
+ // if we don't have a session, we have no snapshot, so we can update our view
+ _oplogSetStartHack(wru);
}
}
- std::unique_ptr<RecordCursor> WiredTigerRecordStore::getCursor(OperationContext* txn,
- bool forward) const {
-
- if ( _isOplog && forward ) {
- WiredTigerRecoveryUnit* wru = WiredTigerRecoveryUnit::get(txn);
- if ( !wru->inActiveTxn() || wru->getOplogReadTill().isNull() ) {
- // if we don't have a session, we have no snapshot, so we can update our view
- _oplogSetStartHack( wru );
- }
- }
+ return stdx::make_unique<Cursor>(txn, *this, forward);
+}
- return stdx::make_unique<Cursor>(txn, *this, forward);
- }
+std::vector<std::unique_ptr<RecordCursor>> WiredTigerRecordStore::getManyCursors(
+ OperationContext* txn) const {
+ std::vector<std::unique_ptr<RecordCursor>> cursors(1);
+ cursors[0] = stdx::make_unique<Cursor>(txn,
+ *this,
+ /*forward=*/true,
+ /*forParallelCollectionScan=*/true);
+ return cursors;
+}
- std::vector<std::unique_ptr<RecordCursor>> WiredTigerRecordStore::getManyCursors(
- OperationContext* txn) const {
- std::vector<std::unique_ptr<RecordCursor>> cursors(1);
- cursors[0] = stdx::make_unique<Cursor>(txn, *this, /*forward=*/true,
- /*forParallelCollectionScan=*/true);
- return cursors;
+Status WiredTigerRecordStore::truncate(OperationContext* txn) {
+ WiredTigerCursor startWrap(_uri, _instanceId, true, txn);
+ WT_CURSOR* start = startWrap.get();
+ int ret = WT_OP_CHECK(start->next(start));
+ // Empty collections don't have anything to truncate.
+ if (ret == WT_NOTFOUND) {
+ return Status::OK();
}
+ invariantWTOK(ret);
- Status WiredTigerRecordStore::truncate( OperationContext* txn ) {
- WiredTigerCursor startWrap( _uri, _instanceId, true, txn);
- WT_CURSOR* start = startWrap.get();
- int ret = WT_OP_CHECK(start->next(start));
- //Empty collections don't have anything to truncate.
- if (ret == WT_NOTFOUND) {
- return Status::OK();
- }
- invariantWTOK(ret);
+ WT_SESSION* session = WiredTigerRecoveryUnit::get(txn)->getSession(txn)->getSession();
+ invariantWTOK(WT_OP_CHECK(session->truncate(session, NULL, start, NULL, NULL)));
+ _changeNumRecords(txn, -numRecords(txn));
+ _increaseDataSize(txn, -dataSize(txn));
- WT_SESSION* session = WiredTigerRecoveryUnit::get(txn)->getSession(txn)->getSession();
- invariantWTOK(WT_OP_CHECK(session->truncate(session, NULL, start, NULL, NULL)));
- _changeNumRecords(txn, -numRecords(txn));
- _increaseDataSize(txn, -dataSize(txn));
+ return Status::OK();
+}
- return Status::OK();
- }
+Status WiredTigerRecordStore::compact(OperationContext* txn,
+ RecordStoreCompactAdaptor* adaptor,
+ const CompactOptions* options,
+ CompactStats* stats) {
+ WiredTigerSessionCache* cache = WiredTigerRecoveryUnit::get(txn)->getSessionCache();
+ WiredTigerSession* session = cache->getSession();
+ WT_SESSION* s = session->getSession();
+ int ret = s->compact(s, getURI().c_str(), "timeout=0");
+ invariantWTOK(ret);
+ cache->releaseSession(session);
+ return Status::OK();
+}
- Status WiredTigerRecordStore::compact( OperationContext* txn,
- RecordStoreCompactAdaptor* adaptor,
- const CompactOptions* options,
- CompactStats* stats ) {
- WiredTigerSessionCache* cache = WiredTigerRecoveryUnit::get(txn)->getSessionCache();
- WiredTigerSession* session = cache->getSession();
- WT_SESSION *s = session->getSession();
- int ret = s->compact(s, getURI().c_str(), "timeout=0");
- invariantWTOK(ret);
- cache->releaseSession(session);
- return Status::OK();
+Status WiredTigerRecordStore::validate(OperationContext* txn,
+ bool full,
+ bool scanData,
+ ValidateAdaptor* adaptor,
+ ValidateResults* results,
+ BSONObjBuilder* output) {
+ {
+ int err = WiredTigerUtil::verifyTable(txn, _uri, &results->errors);
+ if (err == EBUSY) {
+ const char* msg = "verify() returned EBUSY. Not treating as invalid.";
+ warning() << msg;
+ results->errors.push_back(msg);
+ } else if (err) {
+ std::string msg = str::stream() << "verify() returned " << wiredtiger_strerror(err)
+ << ". "
+ << "This indicates structural damage. "
+ << "Not examining individual documents.";
+ error() << msg;
+ results->errors.push_back(msg);
+ results->valid = false;
+ return Status::OK();
+ }
}
- Status WiredTigerRecordStore::validate( OperationContext* txn,
- bool full,
- bool scanData,
- ValidateAdaptor* adaptor,
- ValidateResults* results,
- BSONObjBuilder* output ) {
-
- {
- int err = WiredTigerUtil::verifyTable(txn, _uri, &results->errors);
- if (err == EBUSY) {
- const char* msg = "verify() returned EBUSY. Not treating as invalid.";
- warning() << msg;
- results->errors.push_back(msg);
- }
- else if (err) {
- std::string msg = str::stream()
- << "verify() returned " << wiredtiger_strerror(err) << ". "
- << "This indicates structural damage. "
- << "Not examining individual documents.";
- error() << msg;
- results->errors.push_back(msg);
+ long long nrecords = 0;
+ long long dataSizeTotal = 0;
+ results->valid = true;
+ Cursor cursor(txn, *this, true);
+ while (auto record = cursor.next()) {
+ ++nrecords;
+ if (full && scanData) {
+ size_t dataSize;
+ Status status = adaptor->validate(record->data, &dataSize);
+ if (!status.isOK()) {
results->valid = false;
- return Status::OK();
+ results->errors.push_back(str::stream() << record->id << " is corrupted");
}
+ dataSizeTotal += static_cast<long long>(dataSize);
}
+ }
- long long nrecords = 0;
- long long dataSizeTotal = 0;
- results->valid = true;
- Cursor cursor(txn, *this, true);
- while (auto record = cursor.next()) {
- ++nrecords;
- if ( full && scanData ) {
- size_t dataSize;
- Status status = adaptor->validate( record->data, &dataSize );
- if ( !status.isOK() ) {
- results->valid = false;
- results->errors.push_back( str::stream() << record->id << " is corrupted" );
- }
- dataSizeTotal += static_cast<long long>(dataSize);
- }
+ if (_sizeStorer && full && scanData && results->valid) {
+ if (nrecords != _numRecords.load() || dataSizeTotal != _dataSize.load()) {
+ warning() << _uri << ": Existing record and data size counters (" << _numRecords.load()
+ << " records " << _dataSize.load() << " bytes) "
+ << "are inconsistent with full validation results (" << nrecords
+ << " records " << dataSizeTotal << " bytes). "
+ << "Updating counters with new values.";
}
- if (_sizeStorer && full && scanData && results->valid) {
- if (nrecords != _numRecords.load() || dataSizeTotal != _dataSize.load()) {
- warning() << _uri << ": Existing record and data size counters ("
- << _numRecords.load() << " records " << _dataSize.load() << " bytes) "
- << "are inconsistent with full validation results ("
- << nrecords << " records " << dataSizeTotal << " bytes). "
- << "Updating counters with new values.";
- }
-
- _numRecords.store(nrecords);
- _dataSize.store(dataSizeTotal);
-
- long long oldNumRecords;
- long long oldDataSize;
- _sizeStorer->loadFromCache(_uri, &oldNumRecords, &oldDataSize);
- if (nrecords != oldNumRecords || dataSizeTotal != oldDataSize) {
- warning() << _uri << ": Existing data in size storer ("
- << oldNumRecords << " records " << oldDataSize << " bytes) "
- << "is inconsistent with full validation results ("
- << _numRecords.load() << " records " << _dataSize.load() << " bytes). "
- << "Updating size storer with new values.";
- }
-
- _sizeStorer->storeToCache(_uri, _numRecords.load(), _dataSize.load());
+ _numRecords.store(nrecords);
+ _dataSize.store(dataSizeTotal);
+
+ long long oldNumRecords;
+ long long oldDataSize;
+ _sizeStorer->loadFromCache(_uri, &oldNumRecords, &oldDataSize);
+ if (nrecords != oldNumRecords || dataSizeTotal != oldDataSize) {
+ warning() << _uri << ": Existing data in size storer (" << oldNumRecords << " records "
+ << oldDataSize << " bytes) "
+ << "is inconsistent with full validation results (" << _numRecords.load()
+ << " records " << _dataSize.load() << " bytes). "
+ << "Updating size storer with new values.";
}
- output->appendNumber( "nrecords", nrecords );
- return Status::OK();
+ _sizeStorer->storeToCache(_uri, _numRecords.load(), _dataSize.load());
}
- void WiredTigerRecordStore::appendCustomStats( OperationContext* txn,
- BSONObjBuilder* result,
- double scale ) const {
- result->appendBool( "capped", _isCapped );
- if ( _isCapped ) {
- result->appendIntOrLL("max", _cappedMaxDocs );
- result->appendIntOrLL("maxSize", static_cast<long long>(_cappedMaxSize / scale) );
- result->appendIntOrLL("sleepCount", _cappedSleep.load());
- result->appendIntOrLL("sleepMS", _cappedSleepMS.load());
- }
- WiredTigerSession* session = WiredTigerRecoveryUnit::get(txn)->getSession(txn);
- WT_SESSION* s = session->getSession();
- BSONObjBuilder bob(result->subobjStart(kWiredTigerEngineName));
- {
- BSONObjBuilder metadata(bob.subobjStart("metadata"));
- Status status = WiredTigerUtil::getApplicationMetadata(txn, getURI(), &metadata);
- if (!status.isOK()) {
- metadata.append("error", "unable to retrieve metadata");
- metadata.append("code", static_cast<int>(status.code()));
- metadata.append("reason", status.reason());
- }
- }
-
- std::string type, sourceURI;
- WiredTigerUtil::fetchTypeAndSourceURI(txn, _uri, &type, &sourceURI);
- StatusWith<std::string> metadataResult = WiredTigerUtil::getMetadata(txn, sourceURI);
- StringData creationStringName("creationString");
- if (!metadataResult.isOK()) {
- BSONObjBuilder creationString(bob.subobjStart(creationStringName));
- creationString.append("error", "unable to retrieve creation config");
- creationString.append("code", static_cast<int>(metadataResult.getStatus().code()));
- creationString.append("reason", metadataResult.getStatus().reason());
- }
- else {
- bob.append("creationString", metadataResult.getValue());
- // Type can be "lsm" or "file"
- bob.append("type", type);
- }
+ output->appendNumber("nrecords", nrecords);
+ return Status::OK();
+}
- Status status = WiredTigerUtil::exportTableToBSON(s, "statistics:" + getURI(),
- "statistics=(fast)", &bob);
+void WiredTigerRecordStore::appendCustomStats(OperationContext* txn,
+ BSONObjBuilder* result,
+ double scale) const {
+ result->appendBool("capped", _isCapped);
+ if (_isCapped) {
+ result->appendIntOrLL("max", _cappedMaxDocs);
+ result->appendIntOrLL("maxSize", static_cast<long long>(_cappedMaxSize / scale));
+ result->appendIntOrLL("sleepCount", _cappedSleep.load());
+ result->appendIntOrLL("sleepMS", _cappedSleepMS.load());
+ }
+ WiredTigerSession* session = WiredTigerRecoveryUnit::get(txn)->getSession(txn);
+ WT_SESSION* s = session->getSession();
+ BSONObjBuilder bob(result->subobjStart(kWiredTigerEngineName));
+ {
+ BSONObjBuilder metadata(bob.subobjStart("metadata"));
+ Status status = WiredTigerUtil::getApplicationMetadata(txn, getURI(), &metadata);
if (!status.isOK()) {
- bob.append("error", "unable to retrieve statistics");
- bob.append("code", static_cast<int>(status.code()));
- bob.append("reason", status.reason());
+ metadata.append("error", "unable to retrieve metadata");
+ metadata.append("code", static_cast<int>(status.code()));
+ metadata.append("reason", status.reason());
}
-
}
- Status WiredTigerRecordStore::oplogDiskLocRegister( OperationContext* txn,
- const Timestamp& opTime ) {
- StatusWith<RecordId> loc = oploghack::keyForOptime( opTime );
- if ( !loc.isOK() )
- return loc.getStatus();
+ std::string type, sourceURI;
+ WiredTigerUtil::fetchTypeAndSourceURI(txn, _uri, &type, &sourceURI);
+ StatusWith<std::string> metadataResult = WiredTigerUtil::getMetadata(txn, sourceURI);
+ StringData creationStringName("creationString");
+ if (!metadataResult.isOK()) {
+ BSONObjBuilder creationString(bob.subobjStart(creationStringName));
+ creationString.append("error", "unable to retrieve creation config");
+ creationString.append("code", static_cast<int>(metadataResult.getStatus().code()));
+ creationString.append("reason", metadataResult.getStatus().reason());
+ } else {
+ bob.append("creationString", metadataResult.getValue());
+ // Type can be "lsm" or "file"
+ bob.append("type", type);
+ }
- stdx::lock_guard<stdx::mutex> lk( _uncommittedDiskLocsMutex );
- _addUncommitedDiskLoc_inlock( txn, loc.getValue() );
- return Status::OK();
+ Status status =
+ WiredTigerUtil::exportTableToBSON(s, "statistics:" + getURI(), "statistics=(fast)", &bob);
+ if (!status.isOK()) {
+ bob.append("error", "unable to retrieve statistics");
+ bob.append("code", static_cast<int>(status.code()));
+ bob.append("reason", status.reason());
}
+}
- class WiredTigerRecordStore::CappedInsertChange : public RecoveryUnit::Change {
- public:
- CappedInsertChange( WiredTigerRecordStore* rs, const RecordId& loc )
- : _rs( rs ), _loc( loc ) {
- }
+Status WiredTigerRecordStore::oplogDiskLocRegister(OperationContext* txn, const Timestamp& opTime) {
+ StatusWith<RecordId> loc = oploghack::keyForOptime(opTime);
+ if (!loc.isOK())
+ return loc.getStatus();
- virtual void commit() {
- _rs->dealtWithCappedLoc( _loc );
- }
+ stdx::lock_guard<stdx::mutex> lk(_uncommittedDiskLocsMutex);
+ _addUncommitedDiskLoc_inlock(txn, loc.getValue());
+ return Status::OK();
+}
- virtual void rollback() {
- _rs->dealtWithCappedLoc( _loc );
- }
+class WiredTigerRecordStore::CappedInsertChange : public RecoveryUnit::Change {
+public:
+ CappedInsertChange(WiredTigerRecordStore* rs, const RecordId& loc) : _rs(rs), _loc(loc) {}
- private:
- WiredTigerRecordStore* _rs;
- RecordId _loc;
- };
-
- void WiredTigerRecordStore::_addUncommitedDiskLoc_inlock( OperationContext* txn,
- const RecordId& loc ) {
- // todo: make this a dassert at some point
- invariant( _uncommittedDiskLocs.empty() ||
- _uncommittedDiskLocs.back() < loc );
- _uncommittedDiskLocs.push_back( loc );
- txn->recoveryUnit()->registerChange( new CappedInsertChange( this, loc ) );
- _oplog_highestSeen = loc;
+ virtual void commit() {
+ _rs->dealtWithCappedLoc(_loc);
}
- boost::optional<RecordId> WiredTigerRecordStore::oplogStartHack(
- OperationContext* txn,
- const RecordId& startingPosition) const {
-
- if (!_useOplogHack)
- return boost::none;
-
- {
- WiredTigerRecoveryUnit* wru = WiredTigerRecoveryUnit::get(txn);
- _oplogSetStartHack( wru );
- }
+ virtual void rollback() {
+ _rs->dealtWithCappedLoc(_loc);
+ }
- WiredTigerCursor cursor(_uri, _instanceId, true, txn);
- WT_CURSOR* c = cursor.get();
+private:
+ WiredTigerRecordStore* _rs;
+ RecordId _loc;
+};
+
+void WiredTigerRecordStore::_addUncommitedDiskLoc_inlock(OperationContext* txn,
+ const RecordId& loc) {
+ // todo: make this a dassert at some point
+ invariant(_uncommittedDiskLocs.empty() || _uncommittedDiskLocs.back() < loc);
+ _uncommittedDiskLocs.push_back(loc);
+ txn->recoveryUnit()->registerChange(new CappedInsertChange(this, loc));
+ _oplog_highestSeen = loc;
+}
- int cmp;
- c->set_key(c, _makeKey(startingPosition));
- int ret = WT_OP_CHECK(c->search_near(c, &cmp));
- if (ret == 0 && cmp > 0) ret = c->prev(c); // landed one higher than startingPosition
- if (ret == WT_NOTFOUND) return RecordId(); // nothing <= startingPosition
- invariantWTOK(ret);
+boost::optional<RecordId> WiredTigerRecordStore::oplogStartHack(
+ OperationContext* txn, const RecordId& startingPosition) const {
+ if (!_useOplogHack)
+ return boost::none;
- int64_t key;
- ret = c->get_key(c, &key);
- invariantWTOK(ret);
- return _fromKey(key);
+ {
+ WiredTigerRecoveryUnit* wru = WiredTigerRecoveryUnit::get(txn);
+ _oplogSetStartHack(wru);
}
- void WiredTigerRecordStore::updateStatsAfterRepair(OperationContext* txn,
- long long numRecords,
- long long dataSize) {
- _numRecords.store(numRecords);
- _dataSize.store(dataSize);
- _sizeStorer->storeToCache(_uri, numRecords, dataSize);
- }
+ WiredTigerCursor cursor(_uri, _instanceId, true, txn);
+ WT_CURSOR* c = cursor.get();
+
+ int cmp;
+ c->set_key(c, _makeKey(startingPosition));
+ int ret = WT_OP_CHECK(c->search_near(c, &cmp));
+ if (ret == 0 && cmp > 0)
+ ret = c->prev(c); // landed one higher than startingPosition
+ if (ret == WT_NOTFOUND)
+ return RecordId(); // nothing <= startingPosition
+ invariantWTOK(ret);
+
+ int64_t key;
+ ret = c->get_key(c, &key);
+ invariantWTOK(ret);
+ return _fromKey(key);
+}
- RecordId WiredTigerRecordStore::_nextId() {
- invariant(!_useOplogHack);
- RecordId out = RecordId(_nextIdNum.fetchAndAdd(1));
- invariant(out.isNormal());
- return out;
- }
+void WiredTigerRecordStore::updateStatsAfterRepair(OperationContext* txn,
+ long long numRecords,
+ long long dataSize) {
+ _numRecords.store(numRecords);
+ _dataSize.store(dataSize);
+ _sizeStorer->storeToCache(_uri, numRecords, dataSize);
+}
- WiredTigerRecoveryUnit* WiredTigerRecordStore::_getRecoveryUnit( OperationContext* txn ) {
- return checked_cast<WiredTigerRecoveryUnit*>( txn->recoveryUnit() );
- }
+RecordId WiredTigerRecordStore::_nextId() {
+ invariant(!_useOplogHack);
+ RecordId out = RecordId(_nextIdNum.fetchAndAdd(1));
+ invariant(out.isNormal());
+ return out;
+}
- class WiredTigerRecordStore::NumRecordsChange : public RecoveryUnit::Change {
- public:
- NumRecordsChange(WiredTigerRecordStore* rs, int64_t diff) :_rs(rs), _diff(diff) {}
- virtual void commit() {}
- virtual void rollback() {
- _rs->_numRecords.fetchAndAdd( -_diff );
- }
+WiredTigerRecoveryUnit* WiredTigerRecordStore::_getRecoveryUnit(OperationContext* txn) {
+ return checked_cast<WiredTigerRecoveryUnit*>(txn->recoveryUnit());
+}
- private:
- WiredTigerRecordStore* _rs;
- int64_t _diff;
- };
-
- void WiredTigerRecordStore::_changeNumRecords( OperationContext* txn, int64_t diff ) {
- txn->recoveryUnit()->registerChange(new NumRecordsChange(this, diff));
- if ( diff > 0 ) {
- if ( _numRecords.fetchAndAdd( diff ) < diff )
- _numRecords.store( diff );
- } else if ( _numRecords.fetchAndAdd( diff ) < 0 ) {
- _numRecords.store( 0 );
- }
+class WiredTigerRecordStore::NumRecordsChange : public RecoveryUnit::Change {
+public:
+ NumRecordsChange(WiredTigerRecordStore* rs, int64_t diff) : _rs(rs), _diff(diff) {}
+ virtual void commit() {}
+ virtual void rollback() {
+ _rs->_numRecords.fetchAndAdd(-_diff);
}
- class WiredTigerRecordStore::DataSizeChange : public RecoveryUnit::Change {
- public:
- DataSizeChange(WiredTigerRecordStore* rs, int amount) :_rs(rs), _amount(amount) {}
- virtual void commit() {}
- virtual void rollback() {
- _rs->_increaseDataSize( NULL, -_amount );
- }
+private:
+ WiredTigerRecordStore* _rs;
+ int64_t _diff;
+};
+
+void WiredTigerRecordStore::_changeNumRecords(OperationContext* txn, int64_t diff) {
+ txn->recoveryUnit()->registerChange(new NumRecordsChange(this, diff));
+ if (diff > 0) {
+ if (_numRecords.fetchAndAdd(diff) < diff)
+ _numRecords.store(diff);
+ } else if (_numRecords.fetchAndAdd(diff) < 0) {
+ _numRecords.store(0);
+ }
+}
- private:
- WiredTigerRecordStore* _rs;
- bool _amount;
- };
+class WiredTigerRecordStore::DataSizeChange : public RecoveryUnit::Change {
+public:
+ DataSizeChange(WiredTigerRecordStore* rs, int amount) : _rs(rs), _amount(amount) {}
+ virtual void commit() {}
+ virtual void rollback() {
+ _rs->_increaseDataSize(NULL, -_amount);
+ }
- void WiredTigerRecordStore::_increaseDataSize( OperationContext* txn, int amount ) {
- if ( txn )
- txn->recoveryUnit()->registerChange(new DataSizeChange(this, amount));
+private:
+ WiredTigerRecordStore* _rs;
+ bool _amount;
+};
- if ( _dataSize.fetchAndAdd(amount) < 0 ) {
- if ( amount > 0 ) {
- _dataSize.store( amount );
- }
- else {
- _dataSize.store( 0 );
- }
- }
+void WiredTigerRecordStore::_increaseDataSize(OperationContext* txn, int amount) {
+ if (txn)
+ txn->recoveryUnit()->registerChange(new DataSizeChange(this, amount));
- if ( _sizeStorer && _sizeStorerCounter++ % 1000 == 0 ) {
- _sizeStorer->storeToCache( _uri, _numRecords.load(), _dataSize.load() );
+ if (_dataSize.fetchAndAdd(amount) < 0) {
+ if (amount > 0) {
+ _dataSize.store(amount);
+ } else {
+ _dataSize.store(0);
}
}
- int64_t WiredTigerRecordStore::_makeKey( const RecordId& loc ) {
- return loc.repr();
- }
- RecordId WiredTigerRecordStore::_fromKey( int64_t key ) {
- return RecordId(key);
+ if (_sizeStorer && _sizeStorerCounter++ % 1000 == 0) {
+ _sizeStorer->storeToCache(_uri, _numRecords.load(), _dataSize.load());
}
+}
- void WiredTigerRecordStore::temp_cappedTruncateAfter( OperationContext* txn,
- RecordId end,
- bool inclusive ) {
- WriteUnitOfWork wuow(txn);
- Cursor cursor(txn, *this);
- while (auto record = cursor.next()) {
- RecordId loc = record->id;
- if ( end < loc || ( inclusive && end == loc ) ) {
- deleteRecord( txn, loc );
- }
+int64_t WiredTigerRecordStore::_makeKey(const RecordId& loc) {
+ return loc.repr();
+}
+RecordId WiredTigerRecordStore::_fromKey(int64_t key) {
+ return RecordId(key);
+}
+
+void WiredTigerRecordStore::temp_cappedTruncateAfter(OperationContext* txn,
+ RecordId end,
+ bool inclusive) {
+ WriteUnitOfWork wuow(txn);
+ Cursor cursor(txn, *this);
+ while (auto record = cursor.next()) {
+ RecordId loc = record->id;
+ if (end < loc || (inclusive && end == loc)) {
+ deleteRecord(txn, loc);
}
- wuow.commit();
}
+ wuow.commit();
+}
}
diff --git a/src/mongo/db/storage/wiredtiger/wiredtiger_record_store.h b/src/mongo/db/storage/wiredtiger/wiredtiger_record_store.h
index 00fd04d42b9..1aa0dec93e7 100644
--- a/src/mongo/db/storage/wiredtiger/wiredtiger_record_store.h
+++ b/src/mongo/db/storage/wiredtiger/wiredtiger_record_store.h
@@ -49,217 +49,225 @@
namespace mongo {
- class RecoveryUnit;
- class WiredTigerCursor;
- class WiredTigerRecoveryUnit;
- class WiredTigerSizeStorer;
-
- extern const std::string kWiredTigerEngineName;
-
- class WiredTigerRecordStore : public RecordStore {
- public:
-
- /**
- * Parses collections options for wired tiger configuration string for table creation.
- * The document 'options' is typically obtained from the 'wiredTiger' field of
- * CollectionOptions::storageEngine.
- */
- static StatusWith<std::string> parseOptionsField(const BSONObj options);
-
- /**
- * Creates a configuration string suitable for 'config' parameter in WT_SESSION::create().
- * Configuration string is constructed from:
- * built-in defaults
- * storageEngine.wiredTiger.configString in 'options'
- * 'extraStrings'
- * Performs simple validation on the supplied parameters.
- * Returns error status if validation fails.
- * Note that even if this function returns an OK status, WT_SESSION:create() may still
- * fail with the constructed configuration string.
- */
- static StatusWith<std::string> generateCreateString(StringData ns,
- const CollectionOptions &options,
- StringData extraStrings);
-
- WiredTigerRecordStore(OperationContext* txn,
- StringData ns,
- StringData uri,
- bool isCapped = false,
- int64_t cappedMaxSize = -1,
- int64_t cappedMaxDocs = -1,
- CappedDocumentDeleteCallback* cappedDeleteCallback = NULL,
- WiredTigerSizeStorer* sizeStorer = NULL );
+class RecoveryUnit;
+class WiredTigerCursor;
+class WiredTigerRecoveryUnit;
+class WiredTigerSizeStorer;
+
+extern const std::string kWiredTigerEngineName;
+
+class WiredTigerRecordStore : public RecordStore {
+public:
+ /**
+ * Parses collections options for wired tiger configuration string for table creation.
+ * The document 'options' is typically obtained from the 'wiredTiger' field of
+ * CollectionOptions::storageEngine.
+ */
+ static StatusWith<std::string> parseOptionsField(const BSONObj options);
+
+ /**
+ * Creates a configuration string suitable for 'config' parameter in WT_SESSION::create().
+ * Configuration string is constructed from:
+ * built-in defaults
+ * storageEngine.wiredTiger.configString in 'options'
+ * 'extraStrings'
+ * Performs simple validation on the supplied parameters.
+ * Returns error status if validation fails.
+ * Note that even if this function returns an OK status, WT_SESSION:create() may still
+ * fail with the constructed configuration string.
+ */
+ static StatusWith<std::string> generateCreateString(StringData ns,
+ const CollectionOptions& options,
+ StringData extraStrings);
+
+ WiredTigerRecordStore(OperationContext* txn,
+ StringData ns,
+ StringData uri,
+ bool isCapped = false,
+ int64_t cappedMaxSize = -1,
+ int64_t cappedMaxDocs = -1,
+ CappedDocumentDeleteCallback* cappedDeleteCallback = NULL,
+ WiredTigerSizeStorer* sizeStorer = NULL);
+
+ virtual ~WiredTigerRecordStore();
+
+ // name of the RecordStore implementation
+ virtual const char* name() const;
+
+ virtual long long dataSize(OperationContext* txn) const;
+
+ virtual long long numRecords(OperationContext* txn) const;
+
+ virtual bool isCapped() const;
+
+ virtual int64_t storageSize(OperationContext* txn,
+ BSONObjBuilder* extraInfo = NULL,
+ int infoLevel = 0) const;
+
+ // CRUD related
+
+ virtual RecordData dataFor(OperationContext* txn, const RecordId& loc) const;
+
+ virtual bool findRecord(OperationContext* txn, const RecordId& loc, RecordData* out) const;
+
+ virtual void deleteRecord(OperationContext* txn, const RecordId& dl);
+
+ virtual StatusWith<RecordId> insertRecord(OperationContext* txn,
+ const char* data,
+ int len,
+ bool enforceQuota);
+
+ virtual StatusWith<RecordId> insertRecord(OperationContext* txn,
+ const DocWriter* doc,
+ bool enforceQuota);
+
+ virtual StatusWith<RecordId> updateRecord(OperationContext* txn,
+ const RecordId& oldLocation,
+ const char* data,
+ int len,
+ bool enforceQuota,
+ UpdateNotifier* notifier);
+
+ virtual bool updateWithDamagesSupported() const;
+
+ virtual Status updateWithDamages(OperationContext* txn,
+ const RecordId& loc,
+ const RecordData& oldRec,
+ const char* damageSource,
+ const mutablebson::DamageVector& damages);
+
+ std::unique_ptr<RecordCursor> getCursor(OperationContext* txn, bool forward) const final;
+ std::vector<std::unique_ptr<RecordCursor>> getManyCursors(OperationContext* txn) const final;
+
+ virtual Status truncate(OperationContext* txn);
+
+ virtual bool compactSupported() const {
+ return true;
+ }
+ virtual bool compactsInPlace() const {
+ return true;
+ }
+
+ virtual Status compact(OperationContext* txn,
+ RecordStoreCompactAdaptor* adaptor,
+ const CompactOptions* options,
+ CompactStats* stats);
+
+ virtual Status validate(OperationContext* txn,
+ bool full,
+ bool scanData,
+ ValidateAdaptor* adaptor,
+ ValidateResults* results,
+ BSONObjBuilder* output);
+
+ virtual void appendCustomStats(OperationContext* txn,
+ BSONObjBuilder* result,
+ double scale) const;
+
+ virtual void temp_cappedTruncateAfter(OperationContext* txn, RecordId end, bool inclusive);
+
+ virtual boost::optional<RecordId> oplogStartHack(OperationContext* txn,
+ const RecordId& startingPosition) const;
+
+ virtual Status oplogDiskLocRegister(OperationContext* txn, const Timestamp& opTime);
+
+ virtual void updateStatsAfterRepair(OperationContext* txn,
+ long long numRecords,
+ long long dataSize);
+
+ bool isOplog() const {
+ return _isOplog;
+ }
+ bool usingOplogHack() const {
+ return _useOplogHack;
+ }
- virtual ~WiredTigerRecordStore();
+ void setCappedDeleteCallback(CappedDocumentDeleteCallback* cb) {
+ _cappedDeleteCallback = cb;
+ }
+ int64_t cappedMaxDocs() const;
+ int64_t cappedMaxSize() const;
- // name of the RecordStore implementation
- virtual const char* name() const;
+ const std::string& getURI() const {
+ return _uri;
+ }
+ uint64_t instanceId() const {
+ return _instanceId;
+ }
- virtual long long dataSize( OperationContext *txn ) const;
-
- virtual long long numRecords( OperationContext* txn ) const;
-
- virtual bool isCapped() const;
-
- virtual int64_t storageSize( OperationContext* txn,
- BSONObjBuilder* extraInfo = NULL,
- int infoLevel = 0 ) const;
-
- // CRUD related
-
- virtual RecordData dataFor( OperationContext* txn, const RecordId& loc ) const;
-
- virtual bool findRecord( OperationContext* txn, const RecordId& loc, RecordData* out ) const;
-
- virtual void deleteRecord( OperationContext* txn, const RecordId& dl );
-
- virtual StatusWith<RecordId> insertRecord( OperationContext* txn,
- const char* data,
- int len,
- bool enforceQuota );
-
- virtual StatusWith<RecordId> insertRecord( OperationContext* txn,
- const DocWriter* doc,
- bool enforceQuota );
-
- virtual StatusWith<RecordId> updateRecord( OperationContext* txn,
- const RecordId& oldLocation,
- const char* data,
- int len,
- bool enforceQuota,
- UpdateNotifier* notifier );
-
- virtual bool updateWithDamagesSupported() const;
-
- virtual Status updateWithDamages( OperationContext* txn,
- const RecordId& loc,
- const RecordData& oldRec,
- const char* damageSource,
- const mutablebson::DamageVector& damages );
-
- std::unique_ptr<RecordCursor> getCursor(OperationContext* txn, bool forward) const final;
- std::vector<std::unique_ptr<RecordCursor>> getManyCursors(
- OperationContext* txn) const final;
-
- virtual Status truncate( OperationContext* txn );
-
- virtual bool compactSupported() const { return true; }
- virtual bool compactsInPlace() const { return true; }
-
- virtual Status compact( OperationContext* txn,
- RecordStoreCompactAdaptor* adaptor,
- const CompactOptions* options,
- CompactStats* stats );
-
- virtual Status validate( OperationContext* txn,
- bool full,
- bool scanData,
- ValidateAdaptor* adaptor,
- ValidateResults* results,
- BSONObjBuilder* output );
-
- virtual void appendCustomStats( OperationContext* txn,
- BSONObjBuilder* result,
- double scale ) const;
-
- virtual void temp_cappedTruncateAfter(OperationContext* txn,
- RecordId end,
- bool inclusive);
-
- virtual boost::optional<RecordId> oplogStartHack(OperationContext* txn,
- const RecordId& startingPosition) const;
-
- virtual Status oplogDiskLocRegister( OperationContext* txn,
- const Timestamp& opTime );
-
- virtual void updateStatsAfterRepair(OperationContext* txn,
- long long numRecords,
- long long dataSize);
-
- bool isOplog() const { return _isOplog; }
- bool usingOplogHack() const { return _useOplogHack; }
-
- void setCappedDeleteCallback(CappedDocumentDeleteCallback* cb) {
- _cappedDeleteCallback = cb;
- }
- int64_t cappedMaxDocs() const;
- int64_t cappedMaxSize() const;
-
- const std::string& getURI() const { return _uri; }
- uint64_t instanceId() const { return _instanceId; }
-
- void setSizeStorer( WiredTigerSizeStorer* ss ) { _sizeStorer = ss; }
-
- void dealtWithCappedLoc( const RecordId& loc );
- bool isCappedHidden( const RecordId& loc ) const;
-
- bool inShutdown() const;
- int64_t cappedDeleteAsNeeded(OperationContext* txn,
- const RecordId& justInserted);
-
- int64_t cappedDeleteAsNeeded_inlock(OperationContext* txn,
- const RecordId& justInserted);
-
- stdx::timed_mutex& cappedDeleterMutex() { return _cappedDeleterMutex; }
-
- private:
- class Cursor;
-
- class CappedInsertChange;
- class NumRecordsChange;
- class DataSizeChange;
-
- static WiredTigerRecoveryUnit* _getRecoveryUnit( OperationContext* txn );
-
- static int64_t _makeKey(const RecordId &loc);
- static RecordId _fromKey(int64_t k);
-
- void _addUncommitedDiskLoc_inlock( OperationContext* txn, const RecordId& loc );
-
- RecordId _nextId();
- void _setId(RecordId loc);
- bool cappedAndNeedDelete() const;
- void _changeNumRecords(OperationContext* txn, int64_t diff);
- void _increaseDataSize(OperationContext* txn, int amount);
- RecordData _getData( const WiredTigerCursor& cursor) const;
- StatusWith<RecordId> extractAndCheckLocForOplog(const char* data, int len);
- void _oplogSetStartHack( WiredTigerRecoveryUnit* wru ) const;
-
- const std::string _uri;
- const uint64_t _instanceId; // not persisted
-
- // The capped settings should not be updated once operations have started
- const bool _isCapped;
- const bool _isOplog;
- const int64_t _cappedMaxSize;
- const int64_t _cappedMaxSizeSlack; // when to start applying backpressure
- const int64_t _cappedMaxDocs;
- AtomicInt64 _cappedSleep;
- AtomicInt64 _cappedSleepMS;
- CappedDocumentDeleteCallback* _cappedDeleteCallback;
- int _cappedDeleteCheckCount; // see comment in ::cappedDeleteAsNeeded
- mutable stdx::timed_mutex _cappedDeleterMutex; // see comment in ::cappedDeleteAsNeeded
-
- const bool _useOplogHack;
-
- typedef std::vector<RecordId> SortedDiskLocs;
- SortedDiskLocs _uncommittedDiskLocs;
- RecordId _oplog_visibleTo;
- RecordId _oplog_highestSeen;
- mutable stdx::mutex _uncommittedDiskLocsMutex;
-
- AtomicInt64 _nextIdNum;
- AtomicInt64 _dataSize;
- AtomicInt64 _numRecords;
-
- WiredTigerSizeStorer* _sizeStorer; // not owned, can be NULL
- int _sizeStorerCounter;
-
- bool _shuttingDown;
- bool _hasBackgroundThread;
- };
-
- // WT failpoint to throw write conflict exceptions randomly
- MONGO_FP_FORWARD_DECLARE(WTWriteConflictException);
+ void setSizeStorer(WiredTigerSizeStorer* ss) {
+ _sizeStorer = ss;
+ }
+
+ void dealtWithCappedLoc(const RecordId& loc);
+ bool isCappedHidden(const RecordId& loc) const;
+
+ bool inShutdown() const;
+ int64_t cappedDeleteAsNeeded(OperationContext* txn, const RecordId& justInserted);
+
+ int64_t cappedDeleteAsNeeded_inlock(OperationContext* txn, const RecordId& justInserted);
+
+ stdx::timed_mutex& cappedDeleterMutex() {
+ return _cappedDeleterMutex;
+ }
+
+private:
+ class Cursor;
+
+ class CappedInsertChange;
+ class NumRecordsChange;
+ class DataSizeChange;
+
+ static WiredTigerRecoveryUnit* _getRecoveryUnit(OperationContext* txn);
+
+ static int64_t _makeKey(const RecordId& loc);
+ static RecordId _fromKey(int64_t k);
+
+ void _addUncommitedDiskLoc_inlock(OperationContext* txn, const RecordId& loc);
+
+ RecordId _nextId();
+ void _setId(RecordId loc);
+ bool cappedAndNeedDelete() const;
+ void _changeNumRecords(OperationContext* txn, int64_t diff);
+ void _increaseDataSize(OperationContext* txn, int amount);
+ RecordData _getData(const WiredTigerCursor& cursor) const;
+ StatusWith<RecordId> extractAndCheckLocForOplog(const char* data, int len);
+ void _oplogSetStartHack(WiredTigerRecoveryUnit* wru) const;
+
+ const std::string _uri;
+ const uint64_t _instanceId; // not persisted
+
+ // The capped settings should not be updated once operations have started
+ const bool _isCapped;
+ const bool _isOplog;
+ const int64_t _cappedMaxSize;
+ const int64_t _cappedMaxSizeSlack; // when to start applying backpressure
+ const int64_t _cappedMaxDocs;
+ AtomicInt64 _cappedSleep;
+ AtomicInt64 _cappedSleepMS;
+ CappedDocumentDeleteCallback* _cappedDeleteCallback;
+ int _cappedDeleteCheckCount; // see comment in ::cappedDeleteAsNeeded
+ mutable stdx::timed_mutex _cappedDeleterMutex; // see comment in ::cappedDeleteAsNeeded
+
+ const bool _useOplogHack;
+
+ typedef std::vector<RecordId> SortedDiskLocs;
+ SortedDiskLocs _uncommittedDiskLocs;
+ RecordId _oplog_visibleTo;
+ RecordId _oplog_highestSeen;
+ mutable stdx::mutex _uncommittedDiskLocsMutex;
+
+ AtomicInt64 _nextIdNum;
+ AtomicInt64 _dataSize;
+ AtomicInt64 _numRecords;
+
+ WiredTigerSizeStorer* _sizeStorer; // not owned, can be NULL
+ int _sizeStorerCounter;
+ bool _shuttingDown;
+ bool _hasBackgroundThread;
+};
+
+// WT failpoint to throw write conflict exceptions randomly
+MONGO_FP_FORWARD_DECLARE(WTWriteConflictException);
}
diff --git a/src/mongo/db/storage/wiredtiger/wiredtiger_record_store_mock.cpp b/src/mongo/db/storage/wiredtiger/wiredtiger_record_store_mock.cpp
index 3a2a3d8e5d3..f83e1e63e5c 100644
--- a/src/mongo/db/storage/wiredtiger/wiredtiger_record_store_mock.cpp
+++ b/src/mongo/db/storage/wiredtiger/wiredtiger_record_store_mock.cpp
@@ -39,13 +39,13 @@
namespace mongo {
- // static
- bool WiredTigerKVEngine::initRsOplogBackgroundThread(StringData ns) {
- return false;
- }
+// static
+bool WiredTigerKVEngine::initRsOplogBackgroundThread(StringData ns) {
+ return false;
+}
- MONGO_INITIALIZER(SetGlobalEnvironment)(InitializerContext* context) {
- setGlobalServiceContext(stdx::make_unique<ServiceContextNoop>());
- return Status::OK();
- }
+MONGO_INITIALIZER(SetGlobalEnvironment)(InitializerContext* context) {
+ setGlobalServiceContext(stdx::make_unique<ServiceContextNoop>());
+ return Status::OK();
+}
} // namespace mongo
diff --git a/src/mongo/db/storage/wiredtiger/wiredtiger_record_store_mongod.cpp b/src/mongo/db/storage/wiredtiger/wiredtiger_record_store_mongod.cpp
index ef3f4007ecd..dae37303017 100644
--- a/src/mongo/db/storage/wiredtiger/wiredtiger_record_store_mongod.cpp
+++ b/src/mongo/db/storage/wiredtiger/wiredtiger_record_store_mongod.cpp
@@ -52,121 +52,117 @@
namespace mongo {
- namespace {
+namespace {
- std::set<NamespaceString> _backgroundThreadNamespaces;
- stdx::mutex _backgroundThreadMutex;
+std::set<NamespaceString> _backgroundThreadNamespaces;
+stdx::mutex _backgroundThreadMutex;
- class WiredTigerRecordStoreThread : public BackgroundJob {
- public:
- WiredTigerRecordStoreThread(const NamespaceString& ns)
- : BackgroundJob(true /* deleteSelf */), _ns(ns) {
- _name = std::string("WiredTigerRecordStoreThread for ") + _ns.toString();
- }
+class WiredTigerRecordStoreThread : public BackgroundJob {
+public:
+ WiredTigerRecordStoreThread(const NamespaceString& ns)
+ : BackgroundJob(true /* deleteSelf */), _ns(ns) {
+ _name = std::string("WiredTigerRecordStoreThread for ") + _ns.toString();
+ }
+
+ virtual std::string name() const {
+ return _name;
+ }
+
+ /**
+ * @return Number of documents deleted.
+ */
+ int64_t _deleteExcessDocuments() {
+ if (!getGlobalServiceContext()->getGlobalStorageEngine()) {
+ LOG(1) << "no global storage engine yet";
+ return 0;
+ }
+
+ OperationContextImpl txn;
+ checked_cast<WiredTigerRecoveryUnit*>(txn.recoveryUnit())->markNoTicketRequired();
+
+ try {
+ ScopedTransaction transaction(&txn, MODE_IX);
- virtual std::string name() const {
- return _name;
+ AutoGetDb autoDb(&txn, _ns.db(), MODE_IX);
+ Database* db = autoDb.getDb();
+ if (!db) {
+ LOG(2) << "no local database yet";
+ return 0;
}
- /**
- * @return Number of documents deleted.
- */
- int64_t _deleteExcessDocuments() {
- if (!getGlobalServiceContext()->getGlobalStorageEngine()) {
- LOG(1) << "no global storage engine yet";
- return 0;
- }
-
- OperationContextImpl txn;
- checked_cast<WiredTigerRecoveryUnit*>(txn.recoveryUnit())->markNoTicketRequired();
-
- try {
- ScopedTransaction transaction(&txn, MODE_IX);
-
- AutoGetDb autoDb(&txn, _ns.db(), MODE_IX);
- Database* db = autoDb.getDb();
- if (!db) {
- LOG(2) << "no local database yet";
- return 0;
- }
-
- Lock::CollectionLock collectionLock(txn.lockState(), _ns.ns(), MODE_IX);
- Collection* collection = db->getCollection(_ns);
- if (!collection) {
- LOG(2) << "no collection " << _ns;
- return 0;
- }
-
- OldClientContext ctx(&txn, _ns, false);
- WiredTigerRecordStore* rs =
- checked_cast<WiredTigerRecordStore*>(collection->getRecordStore());
- WriteUnitOfWork wuow(&txn);
- stdx::lock_guard<stdx::timed_mutex> lock(rs->cappedDeleterMutex());
- int64_t removed = rs->cappedDeleteAsNeeded_inlock(&txn, RecordId::max());
- wuow.commit();
- return removed;
- }
- catch (const std::exception& e) {
- severe() << "error in WiredTigerRecordStoreThread: " << e.what();
- fassertFailedNoTrace(!"error in WiredTigerRecordStoreThread");
- }
- catch (...) {
- fassertFailedNoTrace(!"unknown error in WiredTigerRecordStoreThread");
- }
+ Lock::CollectionLock collectionLock(txn.lockState(), _ns.ns(), MODE_IX);
+ Collection* collection = db->getCollection(_ns);
+ if (!collection) {
+ LOG(2) << "no collection " << _ns;
+ return 0;
}
- virtual void run() {
- Client::initThread(_name.c_str());
-
- while (!inShutdown()) {
- int64_t removed = _deleteExcessDocuments();
- LOG(2) << "WiredTigerRecordStoreThread deleted " << removed;
- if (removed == 0) {
- // If we removed 0 documents, sleep a bit in case we're on a laptop
- // or something to be nice.
- sleepmillis(1000);
- }
- else if(removed < 1000) {
- // 1000 is the batch size, so we didn't even do a full batch,
- // which is the most efficient.
- sleepmillis(10);
- }
- }
-
- log() << "shutting down";
+ OldClientContext ctx(&txn, _ns, false);
+ WiredTigerRecordStore* rs =
+ checked_cast<WiredTigerRecordStore*>(collection->getRecordStore());
+ WriteUnitOfWork wuow(&txn);
+ stdx::lock_guard<stdx::timed_mutex> lock(rs->cappedDeleterMutex());
+ int64_t removed = rs->cappedDeleteAsNeeded_inlock(&txn, RecordId::max());
+ wuow.commit();
+ return removed;
+ } catch (const std::exception& e) {
+ severe() << "error in WiredTigerRecordStoreThread: " << e.what();
+ fassertFailedNoTrace(!"error in WiredTigerRecordStoreThread");
+ } catch (...) {
+ fassertFailedNoTrace(!"unknown error in WiredTigerRecordStoreThread");
+ }
+ }
+
+ virtual void run() {
+ Client::initThread(_name.c_str());
+
+ while (!inShutdown()) {
+ int64_t removed = _deleteExcessDocuments();
+ LOG(2) << "WiredTigerRecordStoreThread deleted " << removed;
+ if (removed == 0) {
+ // If we removed 0 documents, sleep a bit in case we're on a laptop
+ // or something to be nice.
+ sleepmillis(1000);
+ } else if (removed < 1000) {
+ // 1000 is the batch size, so we didn't even do a full batch,
+ // which is the most efficient.
+ sleepmillis(10);
}
+ }
- private:
- NamespaceString _ns;
- std::string _name;
- };
+ log() << "shutting down";
+ }
- } // namespace
+private:
+ NamespaceString _ns;
+ std::string _name;
+};
- // static
- bool WiredTigerKVEngine::initRsOplogBackgroundThread(StringData ns) {
- if (!NamespaceString::oplog(ns)) {
- return false;
- }
+} // namespace
- if (storageGlobalParams.repair) {
- LOG(1) << "not starting WiredTigerRecordStoreThread for " << ns
- << " because we are in repair";
- return false;
- }
+// static
+bool WiredTigerKVEngine::initRsOplogBackgroundThread(StringData ns) {
+ if (!NamespaceString::oplog(ns)) {
+ return false;
+ }
- stdx::lock_guard<stdx::mutex> lock(_backgroundThreadMutex);
- NamespaceString nss(ns);
- if (_backgroundThreadNamespaces.count(nss)) {
- log() << "WiredTigerRecordStoreThread " << ns << " already started";
- }
- else {
- log() << "Starting WiredTigerRecordStoreThread " << ns;
- BackgroundJob* backgroundThread = new WiredTigerRecordStoreThread(nss);
- backgroundThread->go();
- _backgroundThreadNamespaces.insert(nss);
- }
- return true;
+ if (storageGlobalParams.repair) {
+ LOG(1) << "not starting WiredTigerRecordStoreThread for " << ns
+ << " because we are in repair";
+ return false;
+ }
+
+ stdx::lock_guard<stdx::mutex> lock(_backgroundThreadMutex);
+ NamespaceString nss(ns);
+ if (_backgroundThreadNamespaces.count(nss)) {
+ log() << "WiredTigerRecordStoreThread " << ns << " already started";
+ } else {
+ log() << "Starting WiredTigerRecordStoreThread " << ns;
+ BackgroundJob* backgroundThread = new WiredTigerRecordStoreThread(nss);
+ backgroundThread->go();
+ _backgroundThreadNamespaces.insert(nss);
}
+ return true;
+}
} // namespace mongo
diff --git a/src/mongo/db/storage/wiredtiger/wiredtiger_record_store_test.cpp b/src/mongo/db/storage/wiredtiger/wiredtiger_record_store_test.cpp
index 52a3f0cfefc..aae1056bf8b 100644
--- a/src/mongo/db/storage/wiredtiger/wiredtiger_record_store_test.cpp
+++ b/src/mongo/db/storage/wiredtiger/wiredtiger_record_store_test.cpp
@@ -50,833 +50,819 @@
namespace mongo {
- using std::unique_ptr;
- using std::string;
- using std::stringstream;
-
- class WiredTigerHarnessHelper : public HarnessHelper {
- public:
- static WT_CONNECTION* createConnection(StringData dbpath, StringData extraStrings) {
- WT_CONNECTION* conn = NULL;
-
- std::stringstream ss;
- ss << "create,";
- ss << "statistics=(all),";
- ss << extraStrings;
- string config = ss.str();
- int ret = wiredtiger_open(dbpath.toString().c_str(), NULL, config.c_str(), &conn);
- ASSERT_OK(wtRCToStatus(ret));
- ASSERT(conn);
-
- return conn;
- }
-
- WiredTigerHarnessHelper()
- : _dbpath("wt_test"),
- _conn(createConnection(_dbpath.path(), "")),
- _sessionCache(new WiredTigerSessionCache(_conn)) { }
+using std::unique_ptr;
+using std::string;
+using std::stringstream;
+
+class WiredTigerHarnessHelper : public HarnessHelper {
+public:
+ static WT_CONNECTION* createConnection(StringData dbpath, StringData extraStrings) {
+ WT_CONNECTION* conn = NULL;
+
+ std::stringstream ss;
+ ss << "create,";
+ ss << "statistics=(all),";
+ ss << extraStrings;
+ string config = ss.str();
+ int ret = wiredtiger_open(dbpath.toString().c_str(), NULL, config.c_str(), &conn);
+ ASSERT_OK(wtRCToStatus(ret));
+ ASSERT(conn);
+
+ return conn;
+ }
- WiredTigerHarnessHelper(StringData extraStrings)
- : _dbpath("wt_test"),
- _conn(createConnection(_dbpath.path(), extraStrings)),
- _sessionCache(new WiredTigerSessionCache(_conn)) { }
+ WiredTigerHarnessHelper()
+ : _dbpath("wt_test"),
+ _conn(createConnection(_dbpath.path(), "")),
+ _sessionCache(new WiredTigerSessionCache(_conn)) {}
- ~WiredTigerHarnessHelper() {
- delete _sessionCache;
- _conn->close(_conn, NULL);
- }
+ WiredTigerHarnessHelper(StringData extraStrings)
+ : _dbpath("wt_test"),
+ _conn(createConnection(_dbpath.path(), extraStrings)),
+ _sessionCache(new WiredTigerSessionCache(_conn)) {}
- virtual RecordStore* newNonCappedRecordStore() { return newNonCappedRecordStore("a.b"); }
- RecordStore* newNonCappedRecordStore(const std::string& ns) {
- WiredTigerRecoveryUnit* ru = new WiredTigerRecoveryUnit( _sessionCache );
- OperationContextNoop txn( ru );
- string uri = "table:" + ns;
+ ~WiredTigerHarnessHelper() {
+ delete _sessionCache;
+ _conn->close(_conn, NULL);
+ }
- StatusWith<std::string> result =
- WiredTigerRecordStore::generateCreateString(ns, CollectionOptions(), "");
- ASSERT_TRUE(result.isOK());
- std::string config = result.getValue();
+ virtual RecordStore* newNonCappedRecordStore() {
+ return newNonCappedRecordStore("a.b");
+ }
+ RecordStore* newNonCappedRecordStore(const std::string& ns) {
+ WiredTigerRecoveryUnit* ru = new WiredTigerRecoveryUnit(_sessionCache);
+ OperationContextNoop txn(ru);
+ string uri = "table:" + ns;
- {
- WriteUnitOfWork uow(&txn);
- WT_SESSION* s = ru->getSession(&txn)->getSession();
- invariantWTOK( s->create( s, uri.c_str(), config.c_str() ) );
- uow.commit();
- }
+ StatusWith<std::string> result =
+ WiredTigerRecordStore::generateCreateString(ns, CollectionOptions(), "");
+ ASSERT_TRUE(result.isOK());
+ std::string config = result.getValue();
- return new WiredTigerRecordStore( &txn, ns, uri );
+ {
+ WriteUnitOfWork uow(&txn);
+ WT_SESSION* s = ru->getSession(&txn)->getSession();
+ invariantWTOK(s->create(s, uri.c_str(), config.c_str()));
+ uow.commit();
}
- virtual RecordStore* newCappedRecordStore( const std::string& ns,
- int64_t cappedMaxSize,
- int64_t cappedMaxDocs ) {
-
- WiredTigerRecoveryUnit* ru = new WiredTigerRecoveryUnit( _sessionCache );
- OperationContextNoop txn( ru );
- string uri = "table:a.b";
-
- CollectionOptions options;
- options.capped = true;
+ return new WiredTigerRecordStore(&txn, ns, uri);
+ }
- StatusWith<std::string> result =
- WiredTigerRecordStore::generateCreateString(ns, options, "");
- ASSERT_TRUE(result.isOK());
- std::string config = result.getValue();
+ virtual RecordStore* newCappedRecordStore(const std::string& ns,
+ int64_t cappedMaxSize,
+ int64_t cappedMaxDocs) {
+ WiredTigerRecoveryUnit* ru = new WiredTigerRecoveryUnit(_sessionCache);
+ OperationContextNoop txn(ru);
+ string uri = "table:a.b";
- {
- WriteUnitOfWork uow(&txn);
- WT_SESSION* s = ru->getSession(&txn)->getSession();
- invariantWTOK( s->create( s, uri.c_str(), config.c_str() ) );
- uow.commit();
- }
+ CollectionOptions options;
+ options.capped = true;
- return new WiredTigerRecordStore( &txn, ns, uri, true, cappedMaxSize, cappedMaxDocs );
- }
+ StatusWith<std::string> result =
+ WiredTigerRecordStore::generateCreateString(ns, options, "");
+ ASSERT_TRUE(result.isOK());
+ std::string config = result.getValue();
- virtual RecoveryUnit* newRecoveryUnit() {
- return new WiredTigerRecoveryUnit( _sessionCache );
+ {
+ WriteUnitOfWork uow(&txn);
+ WT_SESSION* s = ru->getSession(&txn)->getSession();
+ invariantWTOK(s->create(s, uri.c_str(), config.c_str()));
+ uow.commit();
}
- WT_CONNECTION* conn() const { return _conn; }
-
- private:
- unittest::TempDir _dbpath;
- WT_CONNECTION* _conn;
- WiredTigerSessionCache* _sessionCache;
- };
-
- HarnessHelper* newHarnessHelper() {
- return new WiredTigerHarnessHelper();
+ return new WiredTigerRecordStore(&txn, ns, uri, true, cappedMaxSize, cappedMaxDocs);
}
- TEST(WiredTigerRecordStoreTest, GenerateCreateStringEmptyDocument) {
- BSONObj spec = fromjson("{}");
- StatusWith<std::string> result = WiredTigerRecordStore::parseOptionsField(spec);
- ASSERT_OK(result.getStatus());
- ASSERT_EQ(result.getValue(), ""); // "," would also be valid.
+ virtual RecoveryUnit* newRecoveryUnit() {
+ return new WiredTigerRecoveryUnit(_sessionCache);
}
- TEST(WiredTigerRecordStoreTest, GenerateCreateStringUnknownField) {
- BSONObj spec = fromjson("{unknownField: 1}");
- StatusWith<std::string> result = WiredTigerRecordStore::parseOptionsField(spec);
- const Status& status = result.getStatus();
- ASSERT_NOT_OK(status);
- ASSERT_EQUALS(ErrorCodes::InvalidOptions, status);
+ WT_CONNECTION* conn() const {
+ return _conn;
}
- TEST(WiredTigerRecordStoreTest, GenerateCreateStringNonStringConfig) {
- BSONObj spec = fromjson("{configString: 12345}");
- StatusWith<std::string> result = WiredTigerRecordStore::parseOptionsField(spec);
- const Status& status = result.getStatus();
- ASSERT_NOT_OK(status);
- ASSERT_EQUALS(ErrorCodes::TypeMismatch, status);
- }
+private:
+ unittest::TempDir _dbpath;
+ WT_CONNECTION* _conn;
+ WiredTigerSessionCache* _sessionCache;
+};
- TEST(WiredTigerRecordStoreTest, GenerateCreateStringEmptyConfigString) {
- BSONObj spec = fromjson("{configString: ''}");
- StatusWith<std::string> result = WiredTigerRecordStore::parseOptionsField(spec);
- ASSERT_OK(result.getStatus());
- ASSERT_EQ(result.getValue(), ","); // "" would also be valid.
- }
+HarnessHelper* newHarnessHelper() {
+ return new WiredTigerHarnessHelper();
+}
- TEST(WiredTigerRecordStoreTest, GenerateCreateStringValidConfigFormat) {
- // TODO eventually this should fail since "abc" is not a valid WT option.
- BSONObj spec = fromjson("{configString: 'abc=def'}");
- StatusWith<std::string> result = WiredTigerRecordStore::parseOptionsField(spec);
- const Status& status = result.getStatus();
- ASSERT_OK(status);
- ASSERT_EQ(result.getValue(), "abc=def,");
- }
+TEST(WiredTigerRecordStoreTest, GenerateCreateStringEmptyDocument) {
+ BSONObj spec = fromjson("{}");
+ StatusWith<std::string> result = WiredTigerRecordStore::parseOptionsField(spec);
+ ASSERT_OK(result.getStatus());
+ ASSERT_EQ(result.getValue(), ""); // "," would also be valid.
+}
- TEST(WiredTigerRecordStoreTest, Isolation1 ) {
- unique_ptr<HarnessHelper> harnessHelper( newHarnessHelper() );
- unique_ptr<RecordStore> rs( harnessHelper->newNonCappedRecordStore() );
+TEST(WiredTigerRecordStoreTest, GenerateCreateStringUnknownField) {
+ BSONObj spec = fromjson("{unknownField: 1}");
+ StatusWith<std::string> result = WiredTigerRecordStore::parseOptionsField(spec);
+ const Status& status = result.getStatus();
+ ASSERT_NOT_OK(status);
+ ASSERT_EQUALS(ErrorCodes::InvalidOptions, status);
+}
- RecordId loc1;
- RecordId loc2;
+TEST(WiredTigerRecordStoreTest, GenerateCreateStringNonStringConfig) {
+ BSONObj spec = fromjson("{configString: 12345}");
+ StatusWith<std::string> result = WiredTigerRecordStore::parseOptionsField(spec);
+ const Status& status = result.getStatus();
+ ASSERT_NOT_OK(status);
+ ASSERT_EQUALS(ErrorCodes::TypeMismatch, status);
+}
- {
- unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- {
- WriteUnitOfWork uow( opCtx.get() );
+TEST(WiredTigerRecordStoreTest, GenerateCreateStringEmptyConfigString) {
+ BSONObj spec = fromjson("{configString: ''}");
+ StatusWith<std::string> result = WiredTigerRecordStore::parseOptionsField(spec);
+ ASSERT_OK(result.getStatus());
+ ASSERT_EQ(result.getValue(), ","); // "" would also be valid.
+}
- StatusWith<RecordId> res = rs->insertRecord( opCtx.get(), "a", 2, false );
- ASSERT_OK( res.getStatus() );
- loc1 = res.getValue();
+TEST(WiredTigerRecordStoreTest, GenerateCreateStringValidConfigFormat) {
+ // TODO eventually this should fail since "abc" is not a valid WT option.
+ BSONObj spec = fromjson("{configString: 'abc=def'}");
+ StatusWith<std::string> result = WiredTigerRecordStore::parseOptionsField(spec);
+ const Status& status = result.getStatus();
+ ASSERT_OK(status);
+ ASSERT_EQ(result.getValue(), "abc=def,");
+}
- res = rs->insertRecord( opCtx.get(), "a", 2, false );
- ASSERT_OK( res.getStatus() );
- loc2 = res.getValue();
+TEST(WiredTigerRecordStoreTest, Isolation1) {
+ unique_ptr<HarnessHelper> harnessHelper(newHarnessHelper());
+ unique_ptr<RecordStore> rs(harnessHelper->newNonCappedRecordStore());
- uow.commit();
- }
- }
+ RecordId loc1;
+ RecordId loc2;
+ {
+ unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
{
- unique_ptr<OperationContext> t1( harnessHelper->newOperationContext() );
- unique_ptr<OperationContext> t2( harnessHelper->newOperationContext() );
-
- unique_ptr<WriteUnitOfWork> w1( new WriteUnitOfWork( t1.get() ) );
- unique_ptr<WriteUnitOfWork> w2( new WriteUnitOfWork( t2.get() ) );
-
- rs->dataFor( t1.get(), loc1 );
- rs->dataFor( t2.get(), loc1 );
+ WriteUnitOfWork uow(opCtx.get());
- ASSERT_OK( rs->updateRecord( t1.get(), loc1, "b", 2, false, NULL ).getStatus() );
- ASSERT_OK( rs->updateRecord( t1.get(), loc2, "B", 2, false, NULL ).getStatus() );
+ StatusWith<RecordId> res = rs->insertRecord(opCtx.get(), "a", 2, false);
+ ASSERT_OK(res.getStatus());
+ loc1 = res.getValue();
- try {
- // this should fail
- rs->updateRecord( t2.get(), loc1, "c", 2, false, NULL );
- ASSERT( 0 );
- }
- catch ( WriteConflictException& dle ) {
- w2.reset( NULL );
- t2.reset( NULL );
- }
+ res = rs->insertRecord(opCtx.get(), "a", 2, false);
+ ASSERT_OK(res.getStatus());
+ loc2 = res.getValue();
- w1->commit(); // this should succeed
+ uow.commit();
}
}
- TEST(WiredTigerRecordStoreTest, Isolation2 ) {
- unique_ptr<HarnessHelper> harnessHelper( newHarnessHelper() );
- unique_ptr<RecordStore> rs( harnessHelper->newNonCappedRecordStore() );
-
- RecordId loc1;
- RecordId loc2;
+ {
+ unique_ptr<OperationContext> t1(harnessHelper->newOperationContext());
+ unique_ptr<OperationContext> t2(harnessHelper->newOperationContext());
- {
- unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- {
- WriteUnitOfWork uow( opCtx.get() );
+ unique_ptr<WriteUnitOfWork> w1(new WriteUnitOfWork(t1.get()));
+ unique_ptr<WriteUnitOfWork> w2(new WriteUnitOfWork(t2.get()));
- StatusWith<RecordId> res = rs->insertRecord( opCtx.get(), "a", 2, false );
- ASSERT_OK( res.getStatus() );
- loc1 = res.getValue();
+ rs->dataFor(t1.get(), loc1);
+ rs->dataFor(t2.get(), loc1);
- res = rs->insertRecord( opCtx.get(), "a", 2, false );
- ASSERT_OK( res.getStatus() );
- loc2 = res.getValue();
+ ASSERT_OK(rs->updateRecord(t1.get(), loc1, "b", 2, false, NULL).getStatus());
+ ASSERT_OK(rs->updateRecord(t1.get(), loc2, "B", 2, false, NULL).getStatus());
- uow.commit();
- }
+ try {
+ // this should fail
+ rs->updateRecord(t2.get(), loc1, "c", 2, false, NULL);
+ ASSERT(0);
+ } catch (WriteConflictException& dle) {
+ w2.reset(NULL);
+ t2.reset(NULL);
}
- {
- unique_ptr<OperationContext> t1( harnessHelper->newOperationContext() );
- unique_ptr<OperationContext> t2( harnessHelper->newOperationContext() );
+ w1->commit(); // this should succeed
+ }
+}
- // ensure we start transactions
- rs->dataFor( t1.get(), loc2 );
- rs->dataFor( t2.get(), loc2 );
+TEST(WiredTigerRecordStoreTest, Isolation2) {
+ unique_ptr<HarnessHelper> harnessHelper(newHarnessHelper());
+ unique_ptr<RecordStore> rs(harnessHelper->newNonCappedRecordStore());
- {
- WriteUnitOfWork w( t1.get() );
- ASSERT_OK( rs->updateRecord( t1.get(), loc1, "b", 2, false, NULL ).getStatus() );
- w.commit();
- }
+ RecordId loc1;
+ RecordId loc2;
- {
- WriteUnitOfWork w( t2.get() );
- ASSERT_EQUALS( string("a"), rs->dataFor( t2.get(), loc1 ).data() );
- try {
- // this should fail as our version of loc1 is too old
- rs->updateRecord( t2.get(), loc1, "c", 2, false, NULL );
- ASSERT( 0 );
- }
- catch ( WriteConflictException& dle ) {
- }
+ {
+ unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
+ {
+ WriteUnitOfWork uow(opCtx.get());
- }
+ StatusWith<RecordId> res = rs->insertRecord(opCtx.get(), "a", 2, false);
+ ASSERT_OK(res.getStatus());
+ loc1 = res.getValue();
+
+ res = rs->insertRecord(opCtx.get(), "a", 2, false);
+ ASSERT_OK(res.getStatus());
+ loc2 = res.getValue();
+ uow.commit();
}
}
- TEST(WiredTigerRecordStoreTest, SizeStorer1 ) {
- unique_ptr<WiredTigerHarnessHelper> harnessHelper(new WiredTigerHarnessHelper());
- unique_ptr<RecordStore> rs( harnessHelper->newNonCappedRecordStore() );
-
- string uri = checked_cast<WiredTigerRecordStore*>( rs.get() )->getURI();
+ {
+ unique_ptr<OperationContext> t1(harnessHelper->newOperationContext());
+ unique_ptr<OperationContext> t2(harnessHelper->newOperationContext());
- string indexUri = "table:myindex";
- WiredTigerSizeStorer ss(harnessHelper->conn(), indexUri);
- checked_cast<WiredTigerRecordStore*>( rs.get() )->setSizeStorer( &ss );
-
- int N = 12;
+ // ensure we start transactions
+ rs->dataFor(t1.get(), loc2);
+ rs->dataFor(t2.get(), loc2);
{
- unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- {
- WriteUnitOfWork uow( opCtx.get() );
- for ( int i = 0; i < N; i++ ) {
- StatusWith<RecordId> res = rs->insertRecord( opCtx.get(), "a", 2, false );
- ASSERT_OK( res.getStatus() );
- }
- uow.commit();
- }
+ WriteUnitOfWork w(t1.get());
+ ASSERT_OK(rs->updateRecord(t1.get(), loc1, "b", 2, false, NULL).getStatus());
+ w.commit();
}
{
- unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- ASSERT_EQUALS( N, rs->numRecords( opCtx.get() ) );
+ WriteUnitOfWork w(t2.get());
+ ASSERT_EQUALS(string("a"), rs->dataFor(t2.get(), loc1).data());
+ try {
+ // this should fail as our version of loc1 is too old
+ rs->updateRecord(t2.get(), loc1, "c", 2, false, NULL);
+ ASSERT(0);
+ } catch (WriteConflictException& dle) {
+ }
}
+ }
+}
- rs.reset( NULL );
+TEST(WiredTigerRecordStoreTest, SizeStorer1) {
+ unique_ptr<WiredTigerHarnessHelper> harnessHelper(new WiredTigerHarnessHelper());
+ unique_ptr<RecordStore> rs(harnessHelper->newNonCappedRecordStore());
- {
- long long numRecords;
- long long dataSize;
- ss.loadFromCache( uri, &numRecords, &dataSize );
- ASSERT_EQUALS( N, numRecords );
- }
+ string uri = checked_cast<WiredTigerRecordStore*>(rs.get())->getURI();
- {
- unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- rs.reset( new WiredTigerRecordStore( opCtx.get(), "a.b", uri,
- false, -1, -1, NULL, &ss ) );
- }
+ string indexUri = "table:myindex";
+ WiredTigerSizeStorer ss(harnessHelper->conn(), indexUri);
+ checked_cast<WiredTigerRecordStore*>(rs.get())->setSizeStorer(&ss);
+ int N = 12;
+
+ {
+ unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
{
- unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- ASSERT_EQUALS( N, rs->numRecords( opCtx.get() ) );
+ WriteUnitOfWork uow(opCtx.get());
+ for (int i = 0; i < N; i++) {
+ StatusWith<RecordId> res = rs->insertRecord(opCtx.get(), "a", 2, false);
+ ASSERT_OK(res.getStatus());
+ }
+ uow.commit();
}
+ }
- {
- unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- WiredTigerRecoveryUnit* ru =
- checked_cast<WiredTigerRecoveryUnit*>( opCtx->recoveryUnit() );
+ {
+ unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
+ ASSERT_EQUALS(N, rs->numRecords(opCtx.get()));
+ }
- {
- WriteUnitOfWork uow( opCtx.get() );
- WT_SESSION* s = ru->getSession(opCtx.get())->getSession();
- invariantWTOK( s->create( s, indexUri.c_str(), "" ) );
- uow.commit();
- }
+ rs.reset(NULL);
- ss.syncCache(true);
- }
+ {
+ long long numRecords;
+ long long dataSize;
+ ss.loadFromCache(uri, &numRecords, &dataSize);
+ ASSERT_EQUALS(N, numRecords);
+ }
+
+ {
+ unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
+ rs.reset(new WiredTigerRecordStore(opCtx.get(), "a.b", uri, false, -1, -1, NULL, &ss));
+ }
+
+ {
+ unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
+ ASSERT_EQUALS(N, rs->numRecords(opCtx.get()));
+ }
+
+ {
+ unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
+ WiredTigerRecoveryUnit* ru = checked_cast<WiredTigerRecoveryUnit*>(opCtx->recoveryUnit());
{
- unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- WiredTigerSizeStorer ss2(harnessHelper->conn(), indexUri);
- ss2.fillCache();
- long long numRecords;
- long long dataSize;
- ss2.loadFromCache( uri, &numRecords, &dataSize );
- ASSERT_EQUALS( N, numRecords );
+ WriteUnitOfWork uow(opCtx.get());
+ WT_SESSION* s = ru->getSession(opCtx.get())->getSession();
+ invariantWTOK(s->create(s, indexUri.c_str(), ""));
+ uow.commit();
}
- rs.reset( NULL ); // this has to be deleted before ss
+ ss.syncCache(true);
}
-namespace {
+ {
+ unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
+ WiredTigerSizeStorer ss2(harnessHelper->conn(), indexUri);
+ ss2.fillCache();
+ long long numRecords;
+ long long dataSize;
+ ss2.loadFromCache(uri, &numRecords, &dataSize);
+ ASSERT_EQUALS(N, numRecords);
+ }
- class GoodValidateAdaptor : public ValidateAdaptor {
- public:
- virtual Status validate(const RecordData& record, size_t* dataSize) {
- *dataSize = static_cast<size_t>(record.size());
- return Status::OK();
- }
- };
+ rs.reset(NULL); // this has to be deleted before ss
+}
- class BadValidateAdaptor : public ValidateAdaptor {
- public:
- virtual Status validate(const RecordData& record, size_t* dataSize) {
- *dataSize = static_cast<size_t>(record.size());
- return Status(ErrorCodes::UnknownError, "");
- }
- };
-
- class SizeStorerValidateTest : public mongo::unittest::Test {
- private:
- virtual void setUp() {
- harnessHelper.reset(new WiredTigerHarnessHelper());
- sizeStorer.reset(new WiredTigerSizeStorer(harnessHelper->conn(), "table:sizeStorer"));
- rs.reset(harnessHelper->newNonCappedRecordStore());
- WiredTigerRecordStore* wtrs = checked_cast<WiredTigerRecordStore*>(rs.get());
- wtrs->setSizeStorer(sizeStorer.get());
- uri = wtrs->getURI();
-
- expectedNumRecords = 10000;
- expectedDataSize = expectedNumRecords * 2;
- {
- unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
- WriteUnitOfWork uow( opCtx.get() );
- for (int i=0; i < expectedNumRecords; i++) {
- ASSERT_OK(rs->insertRecord( opCtx.get(), "a", 2, false ).getStatus());
- }
- uow.commit();
- }
- ASSERT_EQUALS(expectedNumRecords, rs->numRecords(NULL));
- ASSERT_EQUALS(expectedDataSize, rs->dataSize(NULL));
- sizeStorer->storeToCache(uri, 0, 0);
- }
- virtual void tearDown() {
- expectedNumRecords = 0;
- expectedDataSize = 0;
-
- rs.reset(NULL);
- sizeStorer.reset(NULL);
- harnessHelper.reset(NULL);
- rs.reset(NULL);
- }
+namespace {
- protected:
- long long getNumRecords() const {
- long long numRecords;
- long long unused;
- sizeStorer->loadFromCache(uri, &numRecords, &unused);
- return numRecords;
- }
+class GoodValidateAdaptor : public ValidateAdaptor {
+public:
+ virtual Status validate(const RecordData& record, size_t* dataSize) {
+ *dataSize = static_cast<size_t>(record.size());
+ return Status::OK();
+ }
+};
- long long getDataSize() const {
- long long unused;
- long long dataSize;
- sizeStorer->loadFromCache(uri, &unused, &dataSize);
- return dataSize;
+class BadValidateAdaptor : public ValidateAdaptor {
+public:
+ virtual Status validate(const RecordData& record, size_t* dataSize) {
+ *dataSize = static_cast<size_t>(record.size());
+ return Status(ErrorCodes::UnknownError, "");
+ }
+};
+
+class SizeStorerValidateTest : public mongo::unittest::Test {
+private:
+ virtual void setUp() {
+ harnessHelper.reset(new WiredTigerHarnessHelper());
+ sizeStorer.reset(new WiredTigerSizeStorer(harnessHelper->conn(), "table:sizeStorer"));
+ rs.reset(harnessHelper->newNonCappedRecordStore());
+ WiredTigerRecordStore* wtrs = checked_cast<WiredTigerRecordStore*>(rs.get());
+ wtrs->setSizeStorer(sizeStorer.get());
+ uri = wtrs->getURI();
+
+ expectedNumRecords = 10000;
+ expectedDataSize = expectedNumRecords * 2;
+ {
+ unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
+ WriteUnitOfWork uow(opCtx.get());
+ for (int i = 0; i < expectedNumRecords; i++) {
+ ASSERT_OK(rs->insertRecord(opCtx.get(), "a", 2, false).getStatus());
+ }
+ uow.commit();
}
+ ASSERT_EQUALS(expectedNumRecords, rs->numRecords(NULL));
+ ASSERT_EQUALS(expectedDataSize, rs->dataSize(NULL));
+ sizeStorer->storeToCache(uri, 0, 0);
+ }
+ virtual void tearDown() {
+ expectedNumRecords = 0;
+ expectedDataSize = 0;
- std::unique_ptr<WiredTigerHarnessHelper> harnessHelper;
- std::unique_ptr<WiredTigerSizeStorer> sizeStorer;
- std::unique_ptr<RecordStore> rs;
- std::string uri;
+ rs.reset(NULL);
+ sizeStorer.reset(NULL);
+ harnessHelper.reset(NULL);
+ rs.reset(NULL);
+ }
- long long expectedNumRecords;
- long long expectedDataSize;
- };
+protected:
+ long long getNumRecords() const {
+ long long numRecords;
+ long long unused;
+ sizeStorer->loadFromCache(uri, &numRecords, &unused);
+ return numRecords;
+ }
- // Basic validation - size storer data is not updated.
- TEST_F(SizeStorerValidateTest, Basic) {
- unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
- ValidateResults results;
- BSONObjBuilder output;
- ASSERT_OK(rs->validate(opCtx.get(), false, false, NULL, &results, &output));
- BSONObj obj = output.obj();
- ASSERT_EQUALS(expectedNumRecords, obj.getIntField("nrecords"));
- ASSERT_EQUALS(0, getNumRecords());
- ASSERT_EQUALS(0, getDataSize());
+ long long getDataSize() const {
+ long long unused;
+ long long dataSize;
+ sizeStorer->loadFromCache(uri, &unused, &dataSize);
+ return dataSize;
}
- // Full validation - size storer data is updated.
- TEST_F(SizeStorerValidateTest, FullWithGoodAdaptor) {
- unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
- GoodValidateAdaptor adaptor;
- ValidateResults results;
- BSONObjBuilder output;
- ASSERT_OK(rs->validate(opCtx.get(), true, true, &adaptor, &results, &output));
- BSONObj obj = output.obj();
- ASSERT_EQUALS(expectedNumRecords, obj.getIntField("nrecords"));
- ASSERT_EQUALS(expectedNumRecords, getNumRecords());
- ASSERT_EQUALS(expectedDataSize, getDataSize());
- }
-
- // Full validation with a validation adaptor that fails - size storer data is not updated.
- TEST_F(SizeStorerValidateTest, FullWithBadAdapter) {
- unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
- BadValidateAdaptor adaptor;
- ValidateResults results;
- BSONObjBuilder output;
- ASSERT_OK(rs->validate(opCtx.get(), true, true, &adaptor, &results, &output));
- BSONObj obj = output.obj();
- ASSERT_EQUALS(expectedNumRecords, obj.getIntField("nrecords"));
- ASSERT_EQUALS(0, getNumRecords());
- ASSERT_EQUALS(0, getDataSize());
- }
-
- // Load bad _numRecords and _dataSize values at record store creation.
- TEST_F(SizeStorerValidateTest, InvalidSizeStorerAtCreation) {
- rs.reset(NULL);
+ std::unique_ptr<WiredTigerHarnessHelper> harnessHelper;
+ std::unique_ptr<WiredTigerSizeStorer> sizeStorer;
+ std::unique_ptr<RecordStore> rs;
+ std::string uri;
+
+ long long expectedNumRecords;
+ long long expectedDataSize;
+};
+
+// Basic validation - size storer data is not updated.
+TEST_F(SizeStorerValidateTest, Basic) {
+ unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
+ ValidateResults results;
+ BSONObjBuilder output;
+ ASSERT_OK(rs->validate(opCtx.get(), false, false, NULL, &results, &output));
+ BSONObj obj = output.obj();
+ ASSERT_EQUALS(expectedNumRecords, obj.getIntField("nrecords"));
+ ASSERT_EQUALS(0, getNumRecords());
+ ASSERT_EQUALS(0, getDataSize());
+}
- unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
- sizeStorer->storeToCache(uri, expectedNumRecords*2, expectedDataSize*2);
- rs.reset(new WiredTigerRecordStore(opCtx.get(), "a.b", uri, false, -1, -1, NULL,
- sizeStorer.get()));
- ASSERT_EQUALS(expectedNumRecords*2, rs->numRecords(NULL));
- ASSERT_EQUALS(expectedDataSize*2, rs->dataSize(NULL));
-
- // Full validation should fix record and size counters.
- GoodValidateAdaptor adaptor;
- ValidateResults results;
- BSONObjBuilder output;
- ASSERT_OK(rs->validate(opCtx.get(), true, true, &adaptor, &results, &output));
- BSONObj obj = output.obj();
- ASSERT_EQUALS(expectedNumRecords, obj.getIntField("nrecords"));
- ASSERT_EQUALS(expectedNumRecords, getNumRecords());
- ASSERT_EQUALS(expectedDataSize, getDataSize());
+// Full validation - size storer data is updated.
+TEST_F(SizeStorerValidateTest, FullWithGoodAdaptor) {
+ unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
+ GoodValidateAdaptor adaptor;
+ ValidateResults results;
+ BSONObjBuilder output;
+ ASSERT_OK(rs->validate(opCtx.get(), true, true, &adaptor, &results, &output));
+ BSONObj obj = output.obj();
+ ASSERT_EQUALS(expectedNumRecords, obj.getIntField("nrecords"));
+ ASSERT_EQUALS(expectedNumRecords, getNumRecords());
+ ASSERT_EQUALS(expectedDataSize, getDataSize());
+}
- ASSERT_EQUALS(expectedNumRecords, rs->numRecords(NULL));
- ASSERT_EQUALS(expectedDataSize, rs->dataSize(NULL));
+// Full validation with a validation adaptor that fails - size storer data is not updated.
+TEST_F(SizeStorerValidateTest, FullWithBadAdapter) {
+ unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
+ BadValidateAdaptor adaptor;
+ ValidateResults results;
+ BSONObjBuilder output;
+ ASSERT_OK(rs->validate(opCtx.get(), true, true, &adaptor, &results, &output));
+ BSONObj obj = output.obj();
+ ASSERT_EQUALS(expectedNumRecords, obj.getIntField("nrecords"));
+ ASSERT_EQUALS(0, getNumRecords());
+ ASSERT_EQUALS(0, getDataSize());
+}
+
+// Load bad _numRecords and _dataSize values at record store creation.
+TEST_F(SizeStorerValidateTest, InvalidSizeStorerAtCreation) {
+ rs.reset(NULL);
+
+ unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
+ sizeStorer->storeToCache(uri, expectedNumRecords * 2, expectedDataSize * 2);
+ rs.reset(
+ new WiredTigerRecordStore(opCtx.get(), "a.b", uri, false, -1, -1, NULL, sizeStorer.get()));
+ ASSERT_EQUALS(expectedNumRecords * 2, rs->numRecords(NULL));
+ ASSERT_EQUALS(expectedDataSize * 2, rs->dataSize(NULL));
+
+ // Full validation should fix record and size counters.
+ GoodValidateAdaptor adaptor;
+ ValidateResults results;
+ BSONObjBuilder output;
+ ASSERT_OK(rs->validate(opCtx.get(), true, true, &adaptor, &results, &output));
+ BSONObj obj = output.obj();
+ ASSERT_EQUALS(expectedNumRecords, obj.getIntField("nrecords"));
+ ASSERT_EQUALS(expectedNumRecords, getNumRecords());
+ ASSERT_EQUALS(expectedDataSize, getDataSize());
+
+ ASSERT_EQUALS(expectedNumRecords, rs->numRecords(NULL));
+ ASSERT_EQUALS(expectedDataSize, rs->dataSize(NULL));
}
} // namespace
- StatusWith<RecordId> insertBSON(unique_ptr<OperationContext>& opCtx,
- unique_ptr<RecordStore>& rs,
- const Timestamp& opTime) {
- BSONObj obj = BSON( "ts" << opTime );
- WriteUnitOfWork wuow(opCtx.get());
- WiredTigerRecordStore* wrs = checked_cast<WiredTigerRecordStore*>(rs.get());
- invariant( wrs );
- Status status = wrs->oplogDiskLocRegister( opCtx.get(), opTime );
- if (!status.isOK())
- return StatusWith<RecordId>( status );
- StatusWith<RecordId> res = rs->insertRecord(opCtx.get(),
- obj.objdata(),
- obj.objsize(),
- false);
- if (res.isOK())
- wuow.commit();
- return res;
- }
-
- // TODO make generic
- TEST(WiredTigerRecordStoreTest, OplogHack) {
- WiredTigerHarnessHelper harnessHelper;
- unique_ptr<RecordStore> rs(harnessHelper.newNonCappedRecordStore("local.oplog.foo"));
- {
- unique_ptr<OperationContext> opCtx(harnessHelper.newOperationContext());
+StatusWith<RecordId> insertBSON(unique_ptr<OperationContext>& opCtx,
+ unique_ptr<RecordStore>& rs,
+ const Timestamp& opTime) {
+ BSONObj obj = BSON("ts" << opTime);
+ WriteUnitOfWork wuow(opCtx.get());
+ WiredTigerRecordStore* wrs = checked_cast<WiredTigerRecordStore*>(rs.get());
+ invariant(wrs);
+ Status status = wrs->oplogDiskLocRegister(opCtx.get(), opTime);
+ if (!status.isOK())
+ return StatusWith<RecordId>(status);
+ StatusWith<RecordId> res = rs->insertRecord(opCtx.get(), obj.objdata(), obj.objsize(), false);
+ if (res.isOK())
+ wuow.commit();
+ return res;
+}
- // always illegal
- ASSERT_EQ(insertBSON(opCtx, rs, Timestamp(2,-1)).getStatus(),
- ErrorCodes::BadValue);
+// TODO make generic
+TEST(WiredTigerRecordStoreTest, OplogHack) {
+ WiredTigerHarnessHelper harnessHelper;
+ unique_ptr<RecordStore> rs(harnessHelper.newNonCappedRecordStore("local.oplog.foo"));
+ {
+ unique_ptr<OperationContext> opCtx(harnessHelper.newOperationContext());
- {
- BSONObj obj = BSON("not_ts" << Timestamp(2,1));
- ASSERT_EQ(rs->insertRecord(opCtx.get(), obj.objdata(), obj.objsize(),
- false ).getStatus(),
- ErrorCodes::BadValue);
-
- obj = BSON( "ts" << "not a Timestamp" );
- ASSERT_EQ(rs->insertRecord(opCtx.get(), obj.objdata(), obj.objsize(),
- false ).getStatus(),
- ErrorCodes::BadValue);
- }
+ // always illegal
+ ASSERT_EQ(insertBSON(opCtx, rs, Timestamp(2, -1)).getStatus(), ErrorCodes::BadValue);
- // currently dasserts
- // ASSERT_EQ(insertBSON(opCtx, rs, BSON("ts" << Timestamp(-2,1))).getStatus(),
- // ErrorCodes::BadValue);
+ {
+ BSONObj obj = BSON("not_ts" << Timestamp(2, 1));
+ ASSERT_EQ(
+ rs->insertRecord(opCtx.get(), obj.objdata(), obj.objsize(), false).getStatus(),
+ ErrorCodes::BadValue);
- // success cases
- ASSERT_EQ(insertBSON(opCtx, rs, Timestamp(1,1)).getValue(),
- RecordId(1,1));
+ obj = BSON("ts"
+ << "not a Timestamp");
+ ASSERT_EQ(
+ rs->insertRecord(opCtx.get(), obj.objdata(), obj.objsize(), false).getStatus(),
+ ErrorCodes::BadValue);
+ }
- ASSERT_EQ(insertBSON(opCtx, rs, Timestamp(1,2)).getValue(),
- RecordId(1,2));
+ // currently dasserts
+ // ASSERT_EQ(insertBSON(opCtx, rs, BSON("ts" << Timestamp(-2,1))).getStatus(),
+ // ErrorCodes::BadValue);
- ASSERT_EQ(insertBSON(opCtx, rs, Timestamp(2,2)).getValue(),
- RecordId(2,2));
- }
+ // success cases
+ ASSERT_EQ(insertBSON(opCtx, rs, Timestamp(1, 1)).getValue(), RecordId(1, 1));
- {
- unique_ptr<OperationContext> opCtx(harnessHelper.newOperationContext());
- // find start
- ASSERT_EQ(rs->oplogStartHack(opCtx.get(), RecordId(0,1)), RecordId()); // nothing <=
- ASSERT_EQ(rs->oplogStartHack(opCtx.get(), RecordId(2,1)), RecordId(1,2)); // between
- ASSERT_EQ(rs->oplogStartHack(opCtx.get(), RecordId(2,2)), RecordId(2,2)); // ==
- ASSERT_EQ(rs->oplogStartHack(opCtx.get(), RecordId(2,3)), RecordId(2,2)); // > highest
- }
+ ASSERT_EQ(insertBSON(opCtx, rs, Timestamp(1, 2)).getValue(), RecordId(1, 2));
- {
- unique_ptr<OperationContext> opCtx(harnessHelper.newOperationContext());
- rs->temp_cappedTruncateAfter(opCtx.get(), RecordId(2,2), false); // no-op
- }
+ ASSERT_EQ(insertBSON(opCtx, rs, Timestamp(2, 2)).getValue(), RecordId(2, 2));
+ }
- {
- unique_ptr<OperationContext> opCtx(harnessHelper.newOperationContext());
- ASSERT_EQ(rs->oplogStartHack(opCtx.get(), RecordId(2,3)), RecordId(2,2));
- }
+ {
+ unique_ptr<OperationContext> opCtx(harnessHelper.newOperationContext());
+ // find start
+ ASSERT_EQ(rs->oplogStartHack(opCtx.get(), RecordId(0, 1)), RecordId()); // nothing <=
+ ASSERT_EQ(rs->oplogStartHack(opCtx.get(), RecordId(2, 1)), RecordId(1, 2)); // between
+ ASSERT_EQ(rs->oplogStartHack(opCtx.get(), RecordId(2, 2)), RecordId(2, 2)); // ==
+ ASSERT_EQ(rs->oplogStartHack(opCtx.get(), RecordId(2, 3)), RecordId(2, 2)); // > highest
+ }
- {
- unique_ptr<OperationContext> opCtx(harnessHelper.newOperationContext());
- rs->temp_cappedTruncateAfter(opCtx.get(), RecordId(1,2), false); // deletes 2,2
- }
+ {
+ unique_ptr<OperationContext> opCtx(harnessHelper.newOperationContext());
+ rs->temp_cappedTruncateAfter(opCtx.get(), RecordId(2, 2), false); // no-op
+ }
- {
- unique_ptr<OperationContext> opCtx(harnessHelper.newOperationContext());
- ASSERT_EQ(rs->oplogStartHack(opCtx.get(), RecordId(2,3)), RecordId(1,2));
- }
+ {
+ unique_ptr<OperationContext> opCtx(harnessHelper.newOperationContext());
+ ASSERT_EQ(rs->oplogStartHack(opCtx.get(), RecordId(2, 3)), RecordId(2, 2));
+ }
- {
- unique_ptr<OperationContext> opCtx(harnessHelper.newOperationContext());
- rs->temp_cappedTruncateAfter(opCtx.get(), RecordId(1,2), true); // deletes 1,2
- }
+ {
+ unique_ptr<OperationContext> opCtx(harnessHelper.newOperationContext());
+ rs->temp_cappedTruncateAfter(opCtx.get(), RecordId(1, 2), false); // deletes 2,2
+ }
- {
- unique_ptr<OperationContext> opCtx(harnessHelper.newOperationContext());
- ASSERT_EQ(rs->oplogStartHack(opCtx.get(), RecordId(2,3)), RecordId(1,1));
- }
+ {
+ unique_ptr<OperationContext> opCtx(harnessHelper.newOperationContext());
+ ASSERT_EQ(rs->oplogStartHack(opCtx.get(), RecordId(2, 3)), RecordId(1, 2));
+ }
- {
- unique_ptr<OperationContext> opCtx(harnessHelper.newOperationContext());
- WriteUnitOfWork wuow(opCtx.get());
- ASSERT_OK(rs->truncate(opCtx.get())); // deletes 1,1 and leaves collection empty
- wuow.commit();
- }
+ {
+ unique_ptr<OperationContext> opCtx(harnessHelper.newOperationContext());
+ rs->temp_cappedTruncateAfter(opCtx.get(), RecordId(1, 2), true); // deletes 1,2
+ }
- {
- unique_ptr<OperationContext> opCtx(harnessHelper.newOperationContext());
- ASSERT_EQ(rs->oplogStartHack(opCtx.get(), RecordId(2,3)), RecordId());
- }
+ {
+ unique_ptr<OperationContext> opCtx(harnessHelper.newOperationContext());
+ ASSERT_EQ(rs->oplogStartHack(opCtx.get(), RecordId(2, 3)), RecordId(1, 1));
}
- TEST(WiredTigerRecordStoreTest, OplogHackOnNonOplog) {
- WiredTigerHarnessHelper harnessHelper;
- unique_ptr<RecordStore> rs(harnessHelper.newNonCappedRecordStore("local.NOT_oplog.foo"));
+ {
+ unique_ptr<OperationContext> opCtx(harnessHelper.newOperationContext());
+ WriteUnitOfWork wuow(opCtx.get());
+ ASSERT_OK(rs->truncate(opCtx.get())); // deletes 1,1 and leaves collection empty
+ wuow.commit();
+ }
+ {
unique_ptr<OperationContext> opCtx(harnessHelper.newOperationContext());
+ ASSERT_EQ(rs->oplogStartHack(opCtx.get(), RecordId(2, 3)), RecordId());
+ }
+}
+
+TEST(WiredTigerRecordStoreTest, OplogHackOnNonOplog) {
+ WiredTigerHarnessHelper harnessHelper;
+ unique_ptr<RecordStore> rs(harnessHelper.newNonCappedRecordStore("local.NOT_oplog.foo"));
- BSONObj obj = BSON( "ts" << Timestamp(2,-1) );
+ unique_ptr<OperationContext> opCtx(harnessHelper.newOperationContext());
+
+ BSONObj obj = BSON("ts" << Timestamp(2, -1));
+ {
+ WriteUnitOfWork wuow(opCtx.get());
+ ASSERT_OK(rs->insertRecord(opCtx.get(), obj.objdata(), obj.objsize(), false).getStatus());
+ wuow.commit();
+ }
+ ASSERT_EQ(rs->oplogStartHack(opCtx.get(), RecordId(0, 1)), boost::none);
+}
+
+TEST(WiredTigerRecordStoreTest, CappedOrder) {
+ unique_ptr<WiredTigerHarnessHelper> harnessHelper(new WiredTigerHarnessHelper());
+ unique_ptr<RecordStore> rs(harnessHelper->newCappedRecordStore("a.b", 100000, 10000));
+
+ RecordId loc1;
+
+ { // first insert a document
+ unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
{
- WriteUnitOfWork wuow( opCtx.get() );
- ASSERT_OK(rs->insertRecord(opCtx.get(), obj.objdata(),
- obj.objsize(), false ).getStatus());
- wuow.commit();
+ WriteUnitOfWork uow(opCtx.get());
+ StatusWith<RecordId> res = rs->insertRecord(opCtx.get(), "a", 2, false);
+ ASSERT_OK(res.getStatus());
+ loc1 = res.getValue();
+ uow.commit();
}
- ASSERT_EQ(rs->oplogStartHack(opCtx.get(), RecordId(0,1)), boost::none);
}
- TEST(WiredTigerRecordStoreTest, CappedOrder) {
- unique_ptr<WiredTigerHarnessHelper> harnessHelper( new WiredTigerHarnessHelper() );
- unique_ptr<RecordStore> rs(harnessHelper->newCappedRecordStore("a.b", 100000,10000));
+ {
+ unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
+ auto cursor = rs->getCursor(opCtx.get());
+ auto record = cursor->seekExact(loc1);
+ ASSERT_EQ(loc1, record->id);
+ ASSERT(!cursor->next());
+ }
- RecordId loc1;
+ {
+ // now we insert 2 docs, but commit the 2nd one fiirst
+ // we make sure we can't find the 2nd until the first is commited
+ unique_ptr<OperationContext> t1(harnessHelper->newOperationContext());
+ unique_ptr<WriteUnitOfWork> w1(new WriteUnitOfWork(t1.get()));
+ rs->insertRecord(t1.get(), "b", 2, false);
+ // do not commit yet
- { // first insert a document
- unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
+ { // create 2nd doc
+ unique_ptr<OperationContext> t2(harnessHelper->newOperationContext());
{
- WriteUnitOfWork uow( opCtx.get() );
- StatusWith<RecordId> res = rs->insertRecord( opCtx.get(), "a", 2, false );
- ASSERT_OK( res.getStatus() );
- loc1 = res.getValue();
- uow.commit();
+ WriteUnitOfWork w2(t2.get());
+ rs->insertRecord(t2.get(), "c", 2, false);
+ w2.commit();
}
}
- {
- unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
+ { // state should be the same
+ unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
auto cursor = rs->getCursor(opCtx.get());
auto record = cursor->seekExact(loc1);
- ASSERT_EQ( loc1, record->id );
+ ASSERT_EQ(loc1, record->id);
ASSERT(!cursor->next());
}
- {
- // now we insert 2 docs, but commit the 2nd one fiirst
- // we make sure we can't find the 2nd until the first is commited
- unique_ptr<OperationContext> t1( harnessHelper->newOperationContext() );
- unique_ptr<WriteUnitOfWork> w1( new WriteUnitOfWork( t1.get() ) );
- rs->insertRecord( t1.get(), "b", 2, false );
- // do not commit yet
-
- { // create 2nd doc
- unique_ptr<OperationContext> t2( harnessHelper->newOperationContext() );
- {
- WriteUnitOfWork w2( t2.get() );
- rs->insertRecord( t2.get(), "c", 2, false );
- w2.commit();
- }
- }
+ w1->commit();
+ }
- { // state should be the same
- unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- auto cursor = rs->getCursor(opCtx.get());
- auto record = cursor->seekExact(loc1);
- ASSERT_EQ( loc1, record->id );
- ASSERT(!cursor->next());
- }
+ { // now all 3 docs should be visible
+ unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
+ auto cursor = rs->getCursor(opCtx.get());
+ auto record = cursor->seekExact(loc1);
+ ASSERT_EQ(loc1, record->id);
+ ASSERT(cursor->next());
+ ASSERT(cursor->next());
+ ASSERT(!cursor->next());
+ }
+}
- w1->commit();
- }
+TEST(WiredTigerRecordStoreTest, CappedCursorRollover) {
+ unique_ptr<WiredTigerHarnessHelper> harnessHelper(new WiredTigerHarnessHelper());
+ unique_ptr<RecordStore> rs(harnessHelper->newCappedRecordStore("a.b", 10000, 5));
- { // now all 3 docs should be visible
- unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- auto cursor = rs->getCursor(opCtx.get());
- auto record = cursor->seekExact(loc1);
- ASSERT_EQ( loc1, record->id );
- ASSERT(cursor->next());
- ASSERT(cursor->next());
- ASSERT(!cursor->next());
+ { // first insert 3 documents
+ unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
+ for (int i = 0; i < 3; ++i) {
+ WriteUnitOfWork uow(opCtx.get());
+ StatusWith<RecordId> res = rs->insertRecord(opCtx.get(), "a", 2, false);
+ ASSERT_OK(res.getStatus());
+ uow.commit();
}
}
- TEST(WiredTigerRecordStoreTest, CappedCursorRollover) {
- unique_ptr<WiredTigerHarnessHelper> harnessHelper( new WiredTigerHarnessHelper() );
- unique_ptr<RecordStore> rs(harnessHelper->newCappedRecordStore("a.b", 10000, 5));
+ // set up our cursor that should rollover
+ unique_ptr<OperationContext> cursorCtx(harnessHelper->newOperationContext());
+ auto cursor = rs->getCursor(cursorCtx.get());
+ ASSERT(cursor->next());
+ cursor->savePositioned();
+ cursorCtx->recoveryUnit()->abandonSnapshot();
- { // first insert 3 documents
- unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- for ( int i = 0; i < 3; ++i ) {
- WriteUnitOfWork uow( opCtx.get() );
- StatusWith<RecordId> res = rs->insertRecord( opCtx.get(), "a", 2, false );
- ASSERT_OK( res.getStatus() );
- uow.commit();
- }
+ { // insert 100 documents which causes rollover
+ unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
+ for (int i = 0; i < 100; i++) {
+ WriteUnitOfWork uow(opCtx.get());
+ StatusWith<RecordId> res = rs->insertRecord(opCtx.get(), "a", 2, false);
+ ASSERT_OK(res.getStatus());
+ uow.commit();
}
+ }
- // set up our cursor that should rollover
- unique_ptr<OperationContext> cursorCtx( harnessHelper->newOperationContext() );
- auto cursor = rs->getCursor(cursorCtx.get());
- ASSERT(cursor->next());
- cursor->savePositioned();
- cursorCtx->recoveryUnit()->abandonSnapshot();
-
- { // insert 100 documents which causes rollover
- unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- for ( int i = 0; i < 100; i++ ) {
- WriteUnitOfWork uow( opCtx.get() );
- StatusWith<RecordId> res = rs->insertRecord( opCtx.get(), "a", 2, false );
- ASSERT_OK( res.getStatus() );
- uow.commit();
- }
- }
+ // cursor should now be dead
+ ASSERT_FALSE(cursor->restore(cursorCtx.get()));
+ ASSERT(!cursor->next());
+}
- // cursor should now be dead
- ASSERT_FALSE(cursor->restore(cursorCtx.get()));
- ASSERT(!cursor->next());
- }
+RecordId _oplogOrderInsertOplog(OperationContext* txn, unique_ptr<RecordStore>& rs, int inc) {
+ Timestamp opTime = Timestamp(5, inc);
+ WiredTigerRecordStore* wrs = checked_cast<WiredTigerRecordStore*>(rs.get());
+ Status status = wrs->oplogDiskLocRegister(txn, opTime);
+ ASSERT_OK(status);
+ BSONObj obj = BSON("ts" << opTime);
+ StatusWith<RecordId> res = rs->insertRecord(txn, obj.objdata(), obj.objsize(), false);
+ ASSERT_OK(res.getStatus());
+ return res.getValue();
+}
- RecordId _oplogOrderInsertOplog( OperationContext* txn,
- unique_ptr<RecordStore>& rs,
- int inc ) {
- Timestamp opTime = Timestamp(5,inc);
- WiredTigerRecordStore* wrs = checked_cast<WiredTigerRecordStore*>(rs.get());
- Status status = wrs->oplogDiskLocRegister( txn, opTime );
- ASSERT_OK( status );
- BSONObj obj = BSON( "ts" << opTime );
- StatusWith<RecordId> res = rs->insertRecord( txn, obj.objdata(), obj.objsize(), false );
- ASSERT_OK( res.getStatus() );
- return res.getValue();
+TEST(WiredTigerRecordStoreTest, OplogOrder) {
+ unique_ptr<WiredTigerHarnessHelper> harnessHelper(new WiredTigerHarnessHelper());
+ unique_ptr<RecordStore> rs(harnessHelper->newCappedRecordStore("local.oplog.foo", 100000, -1));
+
+ {
+ const WiredTigerRecordStore* wrs = checked_cast<WiredTigerRecordStore*>(rs.get());
+ ASSERT(wrs->isOplog());
+ ASSERT(wrs->usingOplogHack());
}
- TEST(WiredTigerRecordStoreTest, OplogOrder) {
- unique_ptr<WiredTigerHarnessHelper> harnessHelper( new WiredTigerHarnessHelper() );
- unique_ptr<RecordStore> rs(harnessHelper->newCappedRecordStore("local.oplog.foo",
- 100000,
- -1));
+ RecordId loc1;
+ { // first insert a document
+ unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
{
- const WiredTigerRecordStore* wrs = checked_cast<WiredTigerRecordStore*>(rs.get());
- ASSERT( wrs->isOplog() );
- ASSERT( wrs->usingOplogHack() );
+ WriteUnitOfWork uow(opCtx.get());
+ loc1 = _oplogOrderInsertOplog(opCtx.get(), rs, 1);
+ uow.commit();
}
+ }
+
+ {
+ unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
+ auto cursor = rs->getCursor(opCtx.get());
+ auto record = cursor->seekExact(loc1);
+ ASSERT_EQ(loc1, record->id);
+ ASSERT(!cursor->next());
+ }
- RecordId loc1;
+ {
+ // now we insert 2 docs, but commit the 2nd one fiirst
+ // we make sure we can't find the 2nd until the first is commited
+ unique_ptr<OperationContext> t1(harnessHelper->newOperationContext());
+ unique_ptr<WriteUnitOfWork> w1(new WriteUnitOfWork(t1.get()));
+ _oplogOrderInsertOplog(t1.get(), rs, 2);
+ // do not commit yet
- { // first insert a document
- unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
+ { // create 2nd doc
+ unique_ptr<OperationContext> t2(harnessHelper->newOperationContext());
{
- WriteUnitOfWork uow( opCtx.get() );
- loc1 = _oplogOrderInsertOplog( opCtx.get(), rs, 1 );
- uow.commit();
+ WriteUnitOfWork w2(t2.get());
+ _oplogOrderInsertOplog(t2.get(), rs, 3);
+ w2.commit();
}
}
- {
- unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
+ { // state should be the same
+ unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
auto cursor = rs->getCursor(opCtx.get());
auto record = cursor->seekExact(loc1);
- ASSERT_EQ( loc1, record->id );
+ ASSERT_EQ(loc1, record->id);
ASSERT(!cursor->next());
}
- {
- // now we insert 2 docs, but commit the 2nd one fiirst
- // we make sure we can't find the 2nd until the first is commited
- unique_ptr<OperationContext> t1( harnessHelper->newOperationContext() );
- unique_ptr<WriteUnitOfWork> w1( new WriteUnitOfWork( t1.get() ) );
- _oplogOrderInsertOplog( t1.get(), rs, 2 );
- // do not commit yet
-
- { // create 2nd doc
- unique_ptr<OperationContext> t2( harnessHelper->newOperationContext() );
- {
- WriteUnitOfWork w2( t2.get() );
- _oplogOrderInsertOplog( t2.get(), rs, 3 );
- w2.commit();
- }
- }
-
- { // state should be the same
- unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- auto cursor = rs->getCursor(opCtx.get());
- auto record = cursor->seekExact(loc1);
- ASSERT_EQ( loc1, record->id );
- ASSERT(!cursor->next());
- }
-
- w1->commit();
- }
-
- { // now all 3 docs should be visible
- unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- auto cursor = rs->getCursor(opCtx.get());
- auto record = cursor->seekExact(loc1);
- ASSERT_EQ( loc1, record->id );
- ASSERT(cursor->next());
- ASSERT(cursor->next());
- ASSERT(!cursor->next());
- }
+ w1->commit();
}
- TEST(WiredTigerRecordStoreTest, StorageSizeStatisticsDisabled) {
- WiredTigerHarnessHelper harnessHelper("statistics=(none)");
- unique_ptr<RecordStore> rs(harnessHelper.newNonCappedRecordStore("a.b"));
-
- unique_ptr<OperationContext> opCtx(harnessHelper.newOperationContext());
- ASSERT_THROWS(rs->storageSize(opCtx.get()), UserException);
+ { // now all 3 docs should be visible
+ unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
+ auto cursor = rs->getCursor(opCtx.get());
+ auto record = cursor->seekExact(loc1);
+ ASSERT_EQ(loc1, record->id);
+ ASSERT(cursor->next());
+ ASSERT(cursor->next());
+ ASSERT(!cursor->next());
}
+}
- TEST(WiredTigerRecordStoreTest, AppendCustomStatsMetadata) {
- WiredTigerHarnessHelper harnessHelper;
- unique_ptr<RecordStore> rs(harnessHelper.newNonCappedRecordStore("a.b"));
+TEST(WiredTigerRecordStoreTest, StorageSizeStatisticsDisabled) {
+ WiredTigerHarnessHelper harnessHelper("statistics=(none)");
+ unique_ptr<RecordStore> rs(harnessHelper.newNonCappedRecordStore("a.b"));
- unique_ptr<OperationContext> opCtx(harnessHelper.newOperationContext());
- BSONObjBuilder builder;
- rs->appendCustomStats(opCtx.get(), &builder, 1.0);
- BSONObj customStats = builder.obj();
+ unique_ptr<OperationContext> opCtx(harnessHelper.newOperationContext());
+ ASSERT_THROWS(rs->storageSize(opCtx.get()), UserException);
+}
- BSONElement wiredTigerElement = customStats.getField(kWiredTigerEngineName);
- ASSERT_TRUE(wiredTigerElement.isABSONObj());
- BSONObj wiredTiger = wiredTigerElement.Obj();
+TEST(WiredTigerRecordStoreTest, AppendCustomStatsMetadata) {
+ WiredTigerHarnessHelper harnessHelper;
+ unique_ptr<RecordStore> rs(harnessHelper.newNonCappedRecordStore("a.b"));
- BSONElement metadataElement = wiredTiger.getField("metadata");
- ASSERT_TRUE(metadataElement.isABSONObj());
- BSONObj metadata = metadataElement.Obj();
+ unique_ptr<OperationContext> opCtx(harnessHelper.newOperationContext());
+ BSONObjBuilder builder;
+ rs->appendCustomStats(opCtx.get(), &builder, 1.0);
+ BSONObj customStats = builder.obj();
- BSONElement versionElement = metadata.getField("formatVersion");
- ASSERT_TRUE(versionElement.isNumber());
+ BSONElement wiredTigerElement = customStats.getField(kWiredTigerEngineName);
+ ASSERT_TRUE(wiredTigerElement.isABSONObj());
+ BSONObj wiredTiger = wiredTigerElement.Obj();
- BSONElement creationStringElement = wiredTiger.getField("creationString");
- ASSERT_EQUALS(creationStringElement.type(), String);
- }
+ BSONElement metadataElement = wiredTiger.getField("metadata");
+ ASSERT_TRUE(metadataElement.isABSONObj());
+ BSONObj metadata = metadataElement.Obj();
- TEST(WiredTigerRecordStoreTest, CappedCursorYieldFirst) {
- unique_ptr<WiredTigerHarnessHelper> harnessHelper( new WiredTigerHarnessHelper() );
- unique_ptr<RecordStore> rs(harnessHelper->newCappedRecordStore("a.b", 10000, 50));
+ BSONElement versionElement = metadata.getField("formatVersion");
+ ASSERT_TRUE(versionElement.isNumber());
- RecordId loc1;
+ BSONElement creationStringElement = wiredTiger.getField("creationString");
+ ASSERT_EQUALS(creationStringElement.type(), String);
+}
- { // first insert a document
- unique_ptr<OperationContext> opCtx( harnessHelper->newOperationContext() );
- WriteUnitOfWork uow( opCtx.get() );
- StatusWith<RecordId> res = rs->insertRecord( opCtx.get(), "a", 2, false );
- ASSERT_OK( res.getStatus() );
- loc1 = res.getValue();
- uow.commit();
- }
+TEST(WiredTigerRecordStoreTest, CappedCursorYieldFirst) {
+ unique_ptr<WiredTigerHarnessHelper> harnessHelper(new WiredTigerHarnessHelper());
+ unique_ptr<RecordStore> rs(harnessHelper->newCappedRecordStore("a.b", 10000, 50));
- unique_ptr<OperationContext> cursorCtx( harnessHelper->newOperationContext() );
- auto cursor = rs->getCursor(cursorCtx.get());
+ RecordId loc1;
- // See that things work if you yield before you first call getNext().
- cursor->savePositioned();
- cursorCtx->recoveryUnit()->abandonSnapshot();
- ASSERT_TRUE(cursor->restore(cursorCtx.get()));
- auto record = cursor->next();
- ASSERT_EQ( loc1, record->id );
- ASSERT(!cursor->next());
+ { // first insert a document
+ unique_ptr<OperationContext> opCtx(harnessHelper->newOperationContext());
+ WriteUnitOfWork uow(opCtx.get());
+ StatusWith<RecordId> res = rs->insertRecord(opCtx.get(), "a", 2, false);
+ ASSERT_OK(res.getStatus());
+ loc1 = res.getValue();
+ uow.commit();
}
+ unique_ptr<OperationContext> cursorCtx(harnessHelper->newOperationContext());
+ auto cursor = rs->getCursor(cursorCtx.get());
+
+ // See that things work if you yield before you first call getNext().
+ cursor->savePositioned();
+ cursorCtx->recoveryUnit()->abandonSnapshot();
+ ASSERT_TRUE(cursor->restore(cursorCtx.get()));
+ auto record = cursor->next();
+ ASSERT_EQ(loc1, record->id);
+ ASSERT(!cursor->next());
+}
+
} // namespace mongo
diff --git a/src/mongo/db/storage/wiredtiger/wiredtiger_recovery_unit.cpp b/src/mongo/db/storage/wiredtiger/wiredtiger_recovery_unit.cpp
index 4c71b448804..a248085ff36 100644
--- a/src/mongo/db/storage/wiredtiger/wiredtiger_recovery_unit.cpp
+++ b/src/mongo/db/storage/wiredtiger/wiredtiger_recovery_unit.cpp
@@ -47,363 +47,347 @@
namespace mongo {
- namespace {
- struct WaitUntilDurableData {
- WaitUntilDurableData() :
- numWaitingForSync(0),
- lastSyncTime(0) {
- }
-
- void syncHappend() {
- stdx::lock_guard<stdx::mutex> lk( mutex );
- lastSyncTime++;
- condvar.notify_all();
- }
-
- // return true if happened
- bool waitUntilDurable() {
- stdx::unique_lock<stdx::mutex> lk( mutex );
- long long start = lastSyncTime;
- numWaitingForSync.fetchAndAdd(1);
- condvar.timed_wait(lk,boost::posix_time::milliseconds(50));
- numWaitingForSync.fetchAndAdd(-1);
- return lastSyncTime > start;
- }
-
- AtomicUInt32 numWaitingForSync;
-
- stdx::mutex mutex; // this just protects lastSyncTime
- stdx::condition_variable condvar;
- long long lastSyncTime;
- } waitUntilDurableData;
+namespace {
+struct WaitUntilDurableData {
+ WaitUntilDurableData() : numWaitingForSync(0), lastSyncTime(0) {}
+
+ void syncHappend() {
+ stdx::lock_guard<stdx::mutex> lk(mutex);
+ lastSyncTime++;
+ condvar.notify_all();
}
- WiredTigerRecoveryUnit::WiredTigerRecoveryUnit(WiredTigerSessionCache* sc) :
- _sessionCache( sc ),
- _session( NULL ),
- _inUnitOfWork(false),
- _active( false ),
- _myTransactionCount( 1 ),
- _everStartedWrite( false ),
- _currentlySquirreled( false ),
- _syncing( false ),
- _noTicketNeeded( false ) {
+ // return true if happened
+ bool waitUntilDurable() {
+ stdx::unique_lock<stdx::mutex> lk(mutex);
+ long long start = lastSyncTime;
+ numWaitingForSync.fetchAndAdd(1);
+ condvar.timed_wait(lk, boost::posix_time::milliseconds(50));
+ numWaitingForSync.fetchAndAdd(-1);
+ return lastSyncTime > start;
}
- WiredTigerRecoveryUnit::~WiredTigerRecoveryUnit() {
- invariant(!_inUnitOfWork);
- _abort();
- if ( _session ) {
- _sessionCache->releaseSession( _session );
- _session = NULL;
- }
- }
+ AtomicUInt32 numWaitingForSync;
- void WiredTigerRecoveryUnit::reportState( BSONObjBuilder* b ) const {
- b->append("wt_inUnitOfWork", _inUnitOfWork);
- b->append("wt_active", _active);
- b->append("wt_everStartedWrite", _everStartedWrite);
- b->append("wt_hasTicket", _ticket.hasTicket());
- b->appendNumber("wt_myTransactionCount", static_cast<long long>(_myTransactionCount));
- if (_active)
- b->append("wt_millisSinceCommit", _timer.millis());
- }
+ stdx::mutex mutex; // this just protects lastSyncTime
+ stdx::condition_variable condvar;
+ long long lastSyncTime;
+} waitUntilDurableData;
+}
- void WiredTigerRecoveryUnit::_commit() {
- try {
- if ( _session && _active ) {
- _txnClose( true );
- }
+WiredTigerRecoveryUnit::WiredTigerRecoveryUnit(WiredTigerSessionCache* sc)
+ : _sessionCache(sc),
+ _session(NULL),
+ _inUnitOfWork(false),
+ _active(false),
+ _myTransactionCount(1),
+ _everStartedWrite(false),
+ _currentlySquirreled(false),
+ _syncing(false),
+ _noTicketNeeded(false) {}
+
+WiredTigerRecoveryUnit::~WiredTigerRecoveryUnit() {
+ invariant(!_inUnitOfWork);
+ _abort();
+ if (_session) {
+ _sessionCache->releaseSession(_session);
+ _session = NULL;
+ }
+}
- for (Changes::const_iterator it = _changes.begin(), end = _changes.end(); it != end;
- ++it) {
- (*it)->commit();
- }
- _changes.clear();
+void WiredTigerRecoveryUnit::reportState(BSONObjBuilder* b) const {
+ b->append("wt_inUnitOfWork", _inUnitOfWork);
+ b->append("wt_active", _active);
+ b->append("wt_everStartedWrite", _everStartedWrite);
+ b->append("wt_hasTicket", _ticket.hasTicket());
+ b->appendNumber("wt_myTransactionCount", static_cast<long long>(_myTransactionCount));
+ if (_active)
+ b->append("wt_millisSinceCommit", _timer.millis());
+}
- invariant(!_active);
+void WiredTigerRecoveryUnit::_commit() {
+ try {
+ if (_session && _active) {
+ _txnClose(true);
}
- catch (...) {
- std::terminate();
+
+ for (Changes::const_iterator it = _changes.begin(), end = _changes.end(); it != end; ++it) {
+ (*it)->commit();
}
+ _changes.clear();
+
+ invariant(!_active);
+ } catch (...) {
+ std::terminate();
}
+}
- void WiredTigerRecoveryUnit::_abort() {
- try {
- if ( _session && _active ) {
- _txnClose( false );
- }
-
- for (Changes::const_reverse_iterator it = _changes.rbegin(), end = _changes.rend();
- it != end; ++it) {
- Change* change = *it;
- LOG(2) << "CUSTOM ROLLBACK " << demangleName(typeid(*change));
- change->rollback();
- }
- _changes.clear();
-
- invariant(!_active);
+void WiredTigerRecoveryUnit::_abort() {
+ try {
+ if (_session && _active) {
+ _txnClose(false);
}
- catch (...) {
- std::terminate();
+
+ for (Changes::const_reverse_iterator it = _changes.rbegin(), end = _changes.rend();
+ it != end;
+ ++it) {
+ Change* change = *it;
+ LOG(2) << "CUSTOM ROLLBACK " << demangleName(typeid(*change));
+ change->rollback();
}
- }
+ _changes.clear();
- void WiredTigerRecoveryUnit::beginUnitOfWork(OperationContext* opCtx) {
- invariant(!_inUnitOfWork);
- invariant(!_currentlySquirreled);
- _inUnitOfWork = true;
- _everStartedWrite = true;
- _getTicket(opCtx);
+ invariant(!_active);
+ } catch (...) {
+ std::terminate();
}
+}
- void WiredTigerRecoveryUnit::commitUnitOfWork() {
- invariant(_inUnitOfWork);
- _inUnitOfWork = false;
- _commit();
- }
+void WiredTigerRecoveryUnit::beginUnitOfWork(OperationContext* opCtx) {
+ invariant(!_inUnitOfWork);
+ invariant(!_currentlySquirreled);
+ _inUnitOfWork = true;
+ _everStartedWrite = true;
+ _getTicket(opCtx);
+}
- void WiredTigerRecoveryUnit::abortUnitOfWork() {
- invariant(_inUnitOfWork);
- _inUnitOfWork = false;
- _abort();
- }
+void WiredTigerRecoveryUnit::commitUnitOfWork() {
+ invariant(_inUnitOfWork);
+ _inUnitOfWork = false;
+ _commit();
+}
- void WiredTigerRecoveryUnit::goingToWaitUntilDurable() {
- if ( _active ) {
- // too late, can't change config
- return;
- }
- // yay, we've configured ourselves for sync
- _syncing = true;
+void WiredTigerRecoveryUnit::abortUnitOfWork() {
+ invariant(_inUnitOfWork);
+ _inUnitOfWork = false;
+ _abort();
+}
+
+void WiredTigerRecoveryUnit::goingToWaitUntilDurable() {
+ if (_active) {
+ // too late, can't change config
+ return;
}
+ // yay, we've configured ourselves for sync
+ _syncing = true;
+}
- bool WiredTigerRecoveryUnit::waitUntilDurable() {
- if ( _syncing && _everStartedWrite ) {
- // we did a sync, so we're good
- return true;
- }
- waitUntilDurableData.waitUntilDurable();
+bool WiredTigerRecoveryUnit::waitUntilDurable() {
+ if (_syncing && _everStartedWrite) {
+ // we did a sync, so we're good
return true;
}
+ waitUntilDurableData.waitUntilDurable();
+ return true;
+}
+
+void WiredTigerRecoveryUnit::registerChange(Change* change) {
+ invariant(_inUnitOfWork);
+ _changes.push_back(change);
+}
+
+WiredTigerRecoveryUnit* WiredTigerRecoveryUnit::get(OperationContext* txn) {
+ invariant(txn);
+ return checked_cast<WiredTigerRecoveryUnit*>(txn->recoveryUnit());
+}
- void WiredTigerRecoveryUnit::registerChange(Change* change) {
- invariant(_inUnitOfWork);
- _changes.push_back(change);
+void WiredTigerRecoveryUnit::assertInActiveTxn() const {
+ fassert(28575, _active);
+}
+
+WiredTigerSession* WiredTigerRecoveryUnit::getSession(OperationContext* opCtx) {
+ if (!_session) {
+ _session = _sessionCache->getSession();
}
- WiredTigerRecoveryUnit* WiredTigerRecoveryUnit::get(OperationContext *txn) {
- invariant( txn );
- return checked_cast<WiredTigerRecoveryUnit*>(txn->recoveryUnit());
+ if (!_active) {
+ _txnOpen(opCtx);
}
+ return _session;
+}
- void WiredTigerRecoveryUnit::assertInActiveTxn() const {
- fassert( 28575, _active );
+void WiredTigerRecoveryUnit::abandonSnapshot() {
+ invariant(!_inUnitOfWork);
+ if (_active) {
+ // Can't be in a WriteUnitOfWork, so safe to rollback
+ _txnClose(false);
}
+}
- WiredTigerSession* WiredTigerRecoveryUnit::getSession(OperationContext* opCtx) {
- if ( !_session ) {
- _session = _sessionCache->getSession();
- }
+void WiredTigerRecoveryUnit::setOplogReadTill(const RecordId& loc) {
+ _oplogReadTill = loc;
+}
- if ( !_active ) {
- _txnOpen(opCtx);
- }
- return _session;
- }
+namespace {
- void WiredTigerRecoveryUnit::abandonSnapshot() {
- invariant(!_inUnitOfWork);
- if (_active) {
- // Can't be in a WriteUnitOfWork, so safe to rollback
- _txnClose(false);
- }
- }
- void WiredTigerRecoveryUnit::setOplogReadTill( const RecordId& loc ) {
- _oplogReadTill = loc;
- }
+class TicketServerParameter : public ServerParameter {
+ MONGO_DISALLOW_COPYING(TicketServerParameter);
- namespace {
-
-
- class TicketServerParameter : public ServerParameter {
- MONGO_DISALLOW_COPYING(TicketServerParameter);
- public:
- TicketServerParameter(TicketHolder* holder, const std::string& name)
- : ServerParameter(ServerParameterSet::getGlobal(),
- name,
- true,
- true),
- _holder( holder ) {
- }
-
- virtual void append(OperationContext* txn, BSONObjBuilder& b, const std::string& name) {
- b.append(name, _holder->outof());
- }
-
- virtual Status set( const BSONElement& newValueElement ) {
- if (!newValueElement.isNumber())
- return Status(ErrorCodes::BadValue,
- str::stream() << name() << " has to be a number");
- return _set(newValueElement.numberInt());
- }
-
- virtual Status setFromString( const std::string& str ) {
- int num = 0;
- Status status = parseNumberFromString(str, &num);
- if (!status.isOK())
- return status;
- return _set(num);
- }
-
- Status _set(int newNum) {
- if (newNum <= 0) {
- return Status(ErrorCodes::BadValue,
- str::stream() << name() << " has to be > 0");
- }
-
- return _holder->resize(newNum);
- }
-
- private:
- TicketHolder* _holder;
- };
-
- TicketHolder openWriteTransaction(128);
- TicketServerParameter openWriteTransactionParam(&openWriteTransaction,
- "wiredTigerConcurrentWriteTransactions");
-
- TicketHolder openReadTransaction(128);
- TicketServerParameter openReadTransactionParam(&openReadTransaction,
- "wiredTigerConcurrentReadTransactions");
+public:
+ TicketServerParameter(TicketHolder* holder, const std::string& name)
+ : ServerParameter(ServerParameterSet::getGlobal(), name, true, true), _holder(holder) {}
+ virtual void append(OperationContext* txn, BSONObjBuilder& b, const std::string& name) {
+ b.append(name, _holder->outof());
}
- void WiredTigerRecoveryUnit::appendGlobalStats(BSONObjBuilder& b) {
- BSONObjBuilder bb(b.subobjStart("concurrentTransactions"));
- {
- BSONObjBuilder bbb(bb.subobjStart("write"));
- bbb.append("out", openWriteTransaction.used());
- bbb.append("available", openWriteTransaction.available());
- bbb.append("totalTickets", openWriteTransaction.outof());
- bbb.done();
- }
- {
- BSONObjBuilder bbb(bb.subobjStart("read"));
- bbb.append("out", openReadTransaction.used());
- bbb.append("available", openReadTransaction.available());
- bbb.append("totalTickets", openReadTransaction.outof());
- bbb.done();
- }
- bb.done();
+ virtual Status set(const BSONElement& newValueElement) {
+ if (!newValueElement.isNumber())
+ return Status(ErrorCodes::BadValue, str::stream() << name() << " has to be a number");
+ return _set(newValueElement.numberInt());
}
- void WiredTigerRecoveryUnit::_txnClose( bool commit ) {
- invariant( _active );
- WT_SESSION *s = _session->getSession();
- if ( commit ) {
- invariantWTOK( s->commit_transaction(s, NULL) );
- LOG(2) << "WT commit_transaction";
- if ( _syncing )
- waitUntilDurableData.syncHappend();
- }
- else {
- invariantWTOK( s->rollback_transaction(s, NULL) );
- LOG(2) << "WT rollback_transaction";
+ virtual Status setFromString(const std::string& str) {
+ int num = 0;
+ Status status = parseNumberFromString(str, &num);
+ if (!status.isOK())
+ return status;
+ return _set(num);
+ }
+
+ Status _set(int newNum) {
+ if (newNum <= 0) {
+ return Status(ErrorCodes::BadValue, str::stream() << name() << " has to be > 0");
}
- _active = false;
- _myTransactionCount++;
- _ticket.reset(NULL);
+
+ return _holder->resize(newNum);
}
- SnapshotId WiredTigerRecoveryUnit::getSnapshotId() const {
- // TODO: use actual wiredtiger txn id
- return SnapshotId(_myTransactionCount);
+private:
+ TicketHolder* _holder;
+};
+
+TicketHolder openWriteTransaction(128);
+TicketServerParameter openWriteTransactionParam(&openWriteTransaction,
+ "wiredTigerConcurrentWriteTransactions");
+
+TicketHolder openReadTransaction(128);
+TicketServerParameter openReadTransactionParam(&openReadTransaction,
+ "wiredTigerConcurrentReadTransactions");
+}
+
+void WiredTigerRecoveryUnit::appendGlobalStats(BSONObjBuilder& b) {
+ BSONObjBuilder bb(b.subobjStart("concurrentTransactions"));
+ {
+ BSONObjBuilder bbb(bb.subobjStart("write"));
+ bbb.append("out", openWriteTransaction.used());
+ bbb.append("available", openWriteTransaction.available());
+ bbb.append("totalTickets", openWriteTransaction.outof());
+ bbb.done();
+ }
+ {
+ BSONObjBuilder bbb(bb.subobjStart("read"));
+ bbb.append("out", openReadTransaction.used());
+ bbb.append("available", openReadTransaction.available());
+ bbb.append("totalTickets", openReadTransaction.outof());
+ bbb.done();
}
+ bb.done();
+}
- void WiredTigerRecoveryUnit::markNoTicketRequired() {
- invariant(!_ticket.hasTicket());
- _noTicketNeeded = true;
+void WiredTigerRecoveryUnit::_txnClose(bool commit) {
+ invariant(_active);
+ WT_SESSION* s = _session->getSession();
+ if (commit) {
+ invariantWTOK(s->commit_transaction(s, NULL));
+ LOG(2) << "WT commit_transaction";
+ if (_syncing)
+ waitUntilDurableData.syncHappend();
+ } else {
+ invariantWTOK(s->rollback_transaction(s, NULL));
+ LOG(2) << "WT rollback_transaction";
}
+ _active = false;
+ _myTransactionCount++;
+ _ticket.reset(NULL);
+}
- void WiredTigerRecoveryUnit::_getTicket(OperationContext* opCtx) {
- // already have a ticket
- if (_ticket.hasTicket())
- return;
+SnapshotId WiredTigerRecoveryUnit::getSnapshotId() const {
+ // TODO: use actual wiredtiger txn id
+ return SnapshotId(_myTransactionCount);
+}
- if (_noTicketNeeded)
- return;
+void WiredTigerRecoveryUnit::markNoTicketRequired() {
+ invariant(!_ticket.hasTicket());
+ _noTicketNeeded = true;
+}
- bool writeLocked;
+void WiredTigerRecoveryUnit::_getTicket(OperationContext* opCtx) {
+ // already have a ticket
+ if (_ticket.hasTicket())
+ return;
- // If we have a strong lock, waiting for a ticket can cause a deadlock.
- if (opCtx != NULL &&
- opCtx->lockState() != NULL) {
- if (opCtx->lockState()->hasStrongLocks())
- return;
- writeLocked = opCtx->lockState()->isWriteLocked();
- }
- else {
- writeLocked = _everStartedWrite;
- }
+ if (_noTicketNeeded)
+ return;
- TicketHolder* holder = writeLocked ? &openWriteTransaction : &openReadTransaction;
+ bool writeLocked;
- holder->waitForTicket();
- _ticket.reset(holder);
+ // If we have a strong lock, waiting for a ticket can cause a deadlock.
+ if (opCtx != NULL && opCtx->lockState() != NULL) {
+ if (opCtx->lockState()->hasStrongLocks())
+ return;
+ writeLocked = opCtx->lockState()->isWriteLocked();
+ } else {
+ writeLocked = _everStartedWrite;
}
- void WiredTigerRecoveryUnit::_txnOpen(OperationContext* opCtx) {
- invariant( !_active );
- _getTicket(opCtx);
+ TicketHolder* holder = writeLocked ? &openWriteTransaction : &openReadTransaction;
- WT_SESSION *s = _session->getSession();
- _syncing = _syncing || waitUntilDurableData.numWaitingForSync.load() > 0;
- invariantWTOK( s->begin_transaction(s, _syncing ? "sync=true" : NULL) );
- LOG(2) << "WT begin_transaction";
- _timer.reset();
- _active = true;
- }
+ holder->waitForTicket();
+ _ticket.reset(holder);
+}
- void WiredTigerRecoveryUnit::beingReleasedFromOperationContext() {
- LOG(2) << "WiredTigerRecoveryUnit::beingReleased";
- _currentlySquirreled = true;
- if ( _active == false && !wt_keeptxnopen() ) {
- _commit();
- }
- }
- void WiredTigerRecoveryUnit::beingSetOnOperationContext() {
- LOG(2) << "WiredTigerRecoveryUnit::broughtBack";
- _currentlySquirreled = false;
+void WiredTigerRecoveryUnit::_txnOpen(OperationContext* opCtx) {
+ invariant(!_active);
+ _getTicket(opCtx);
+
+ WT_SESSION* s = _session->getSession();
+ _syncing = _syncing || waitUntilDurableData.numWaitingForSync.load() > 0;
+ invariantWTOK(s->begin_transaction(s, _syncing ? "sync=true" : NULL));
+ LOG(2) << "WT begin_transaction";
+ _timer.reset();
+ _active = true;
+}
+
+void WiredTigerRecoveryUnit::beingReleasedFromOperationContext() {
+ LOG(2) << "WiredTigerRecoveryUnit::beingReleased";
+ _currentlySquirreled = true;
+ if (_active == false && !wt_keeptxnopen()) {
+ _commit();
}
+}
+void WiredTigerRecoveryUnit::beingSetOnOperationContext() {
+ LOG(2) << "WiredTigerRecoveryUnit::broughtBack";
+ _currentlySquirreled = false;
+}
- // ---------------------
+// ---------------------
- WiredTigerCursor::WiredTigerCursor(const std::string& uri,
- uint64_t id,
- bool forRecordStore,
- OperationContext* txn) {
- _uriID = id;
- _ru = WiredTigerRecoveryUnit::get( txn );
- _session = _ru->getSession(txn);
- _cursor = _session->getCursor( uri, id, forRecordStore );
- if ( !_cursor ) {
- error() << "no cursor for uri: " << uri;
- }
+WiredTigerCursor::WiredTigerCursor(const std::string& uri,
+ uint64_t id,
+ bool forRecordStore,
+ OperationContext* txn) {
+ _uriID = id;
+ _ru = WiredTigerRecoveryUnit::get(txn);
+ _session = _ru->getSession(txn);
+ _cursor = _session->getCursor(uri, id, forRecordStore);
+ if (!_cursor) {
+ error() << "no cursor for uri: " << uri;
}
+}
- WiredTigerCursor::~WiredTigerCursor() {
- _session->releaseCursor( _uriID, _cursor );
- _cursor = NULL;
- }
+WiredTigerCursor::~WiredTigerCursor() {
+ _session->releaseCursor(_uriID, _cursor);
+ _cursor = NULL;
+}
- void WiredTigerCursor::reset() {
- invariantWTOK( _cursor->reset( _cursor ) );
- }
+void WiredTigerCursor::reset() {
+ invariantWTOK(_cursor->reset(_cursor));
+}
- WT_SESSION* WiredTigerCursor::getWTSession() {
- return _session->getSession();
- }
+WT_SESSION* WiredTigerCursor::getWTSession() {
+ return _session->getSession();
+}
}
diff --git a/src/mongo/db/storage/wiredtiger/wiredtiger_recovery_unit.h b/src/mongo/db/storage/wiredtiger/wiredtiger_recovery_unit.h
index 4dcb216c060..6979f47af09 100644
--- a/src/mongo/db/storage/wiredtiger/wiredtiger_recovery_unit.h
+++ b/src/mongo/db/storage/wiredtiger/wiredtiger_recovery_unit.h
@@ -44,116 +44,131 @@
namespace mongo {
- class BSONObjBuilder;
- class WiredTigerSession;
- class WiredTigerSessionCache;
+class BSONObjBuilder;
+class WiredTigerSession;
+class WiredTigerSessionCache;
- class WiredTigerRecoveryUnit : public RecoveryUnit {
- public:
- WiredTigerRecoveryUnit(WiredTigerSessionCache* sc);
+class WiredTigerRecoveryUnit : public RecoveryUnit {
+public:
+ WiredTigerRecoveryUnit(WiredTigerSessionCache* sc);
- virtual ~WiredTigerRecoveryUnit();
+ virtual ~WiredTigerRecoveryUnit();
- virtual void reportState( BSONObjBuilder* b ) const;
+ virtual void reportState(BSONObjBuilder* b) const;
- void beginUnitOfWork(OperationContext* opCtx) final;
- void commitUnitOfWork() final;
- void abortUnitOfWork() final;
+ void beginUnitOfWork(OperationContext* opCtx) final;
+ void commitUnitOfWork() final;
+ void abortUnitOfWork() final;
- virtual bool waitUntilDurable();
- virtual void goingToWaitUntilDurable();
+ virtual bool waitUntilDurable();
+ virtual void goingToWaitUntilDurable();
- virtual void registerChange(Change *);
+ virtual void registerChange(Change*);
- virtual void beingReleasedFromOperationContext();
- virtual void beingSetOnOperationContext();
+ virtual void beingReleasedFromOperationContext();
+ virtual void beingSetOnOperationContext();
- virtual void abandonSnapshot();
+ virtual void abandonSnapshot();
- // un-used API
- virtual void* writingPtr(void* data, size_t len) { invariant(!"don't call writingPtr"); }
+ // un-used API
+ virtual void* writingPtr(void* data, size_t len) {
+ invariant(!"don't call writingPtr");
+ }
- virtual void setRollbackWritesDisabled() {}
+ virtual void setRollbackWritesDisabled() {}
- virtual SnapshotId getSnapshotId() const;
+ virtual SnapshotId getSnapshotId() const;
- // ---- WT STUFF
+ // ---- WT STUFF
- WiredTigerSession* getSession(OperationContext* opCtx);
- WiredTigerSessionCache* getSessionCache() { return _sessionCache; }
- bool inActiveTxn() const { return _active; }
- void assertInActiveTxn() const;
+ WiredTigerSession* getSession(OperationContext* opCtx);
+ WiredTigerSessionCache* getSessionCache() {
+ return _sessionCache;
+ }
+ bool inActiveTxn() const {
+ return _active;
+ }
+ void assertInActiveTxn() const;
- bool everStartedWrite() const { return _everStartedWrite; }
+ bool everStartedWrite() const {
+ return _everStartedWrite;
+ }
- void setOplogReadTill( const RecordId& loc );
- RecordId getOplogReadTill() const { return _oplogReadTill; }
+ void setOplogReadTill(const RecordId& loc);
+ RecordId getOplogReadTill() const {
+ return _oplogReadTill;
+ }
- void markNoTicketRequired();
+ void markNoTicketRequired();
- static WiredTigerRecoveryUnit* get(OperationContext *txn);
+ static WiredTigerRecoveryUnit* get(OperationContext* txn);
- static void appendGlobalStats(BSONObjBuilder& b);
- private:
+ static void appendGlobalStats(BSONObjBuilder& b);
- void _abort();
- void _commit();
+private:
+ void _abort();
+ void _commit();
- void _txnClose( bool commit );
- void _txnOpen(OperationContext* opCtx);
+ void _txnClose(bool commit);
+ void _txnOpen(OperationContext* opCtx);
- WiredTigerSessionCache* _sessionCache; // not owned
- WiredTigerSession* _session; // owned, but from pool
- bool _defaultCommit;
- bool _inUnitOfWork;
- bool _active;
- uint64_t _myTransactionCount;
- bool _everStartedWrite;
- Timer _timer;
- bool _currentlySquirreled;
- bool _syncing;
- RecordId _oplogReadTill;
+ WiredTigerSessionCache* _sessionCache; // not owned
+ WiredTigerSession* _session; // owned, but from pool
+ bool _defaultCommit;
+ bool _inUnitOfWork;
+ bool _active;
+ uint64_t _myTransactionCount;
+ bool _everStartedWrite;
+ Timer _timer;
+ bool _currentlySquirreled;
+ bool _syncing;
+ RecordId _oplogReadTill;
- typedef OwnedPointerVector<Change> Changes;
- Changes _changes;
+ typedef OwnedPointerVector<Change> Changes;
+ Changes _changes;
- bool _noTicketNeeded;
- void _getTicket(OperationContext* opCtx);
- TicketHolderReleaser _ticket;
- };
-
- /**
- * This is a smart pointer that wraps a WT_CURSOR and knows how to obtain and get from pool.
- */
- class WiredTigerCursor {
- public:
- WiredTigerCursor(const std::string& uri,
- uint64_t uriID,
- bool forRecordStore,
- OperationContext* txn);
-
- ~WiredTigerCursor();
-
-
- WT_CURSOR* get() const {
- // TODO(SERVER-16816): assertInActiveTxn();
- return _cursor;
- }
-
- WT_CURSOR* operator->() const { return get(); }
-
- WiredTigerSession* getSession() { return _session; }
- WT_SESSION* getWTSession();
-
- void reset();
-
- void assertInActiveTxn() const { _ru->assertInActiveTxn(); }
-
- private:
- uint64_t _uriID;
- WiredTigerRecoveryUnit* _ru; // not owned
- WiredTigerSession* _session;
- WT_CURSOR* _cursor; // owned, but pulled
- };
+ bool _noTicketNeeded;
+ void _getTicket(OperationContext* opCtx);
+ TicketHolderReleaser _ticket;
+};
+/**
+ * This is a smart pointer that wraps a WT_CURSOR and knows how to obtain and get from pool.
+ */
+class WiredTigerCursor {
+public:
+ WiredTigerCursor(const std::string& uri,
+ uint64_t uriID,
+ bool forRecordStore,
+ OperationContext* txn);
+
+ ~WiredTigerCursor();
+
+
+ WT_CURSOR* get() const {
+ // TODO(SERVER-16816): assertInActiveTxn();
+ return _cursor;
+ }
+
+ WT_CURSOR* operator->() const {
+ return get();
+ }
+
+ WiredTigerSession* getSession() {
+ return _session;
+ }
+ WT_SESSION* getWTSession();
+
+ void reset();
+
+ void assertInActiveTxn() const {
+ _ru->assertInActiveTxn();
+ }
+
+private:
+ uint64_t _uriID;
+ WiredTigerRecoveryUnit* _ru; // not owned
+ WiredTigerSession* _session;
+ WT_CURSOR* _cursor; // owned, but pulled
+};
}
diff --git a/src/mongo/db/storage/wiredtiger/wiredtiger_server_status.cpp b/src/mongo/db/storage/wiredtiger/wiredtiger_server_status.cpp
index d546a4997da..eadd39ebde7 100644
--- a/src/mongo/db/storage/wiredtiger/wiredtiger_server_status.cpp
+++ b/src/mongo/db/storage/wiredtiger/wiredtiger_server_status.cpp
@@ -45,41 +45,36 @@
namespace mongo {
- using std::string;
-
- WiredTigerServerStatusSection::WiredTigerServerStatusSection(WiredTigerKVEngine* engine)
- : ServerStatusSection(kWiredTigerEngineName),
- _engine(engine) { }
-
- bool WiredTigerServerStatusSection::includeByDefault() const {
- return true;
+using std::string;
+
+WiredTigerServerStatusSection::WiredTigerServerStatusSection(WiredTigerKVEngine* engine)
+ : ServerStatusSection(kWiredTigerEngineName), _engine(engine) {}
+
+bool WiredTigerServerStatusSection::includeByDefault() const {
+ return true;
+}
+
+BSONObj WiredTigerServerStatusSection::generateSection(OperationContext* txn,
+ const BSONElement& configElement) const {
+ WiredTigerSession* session =
+ checked_cast<WiredTigerRecoveryUnit*>(txn->recoveryUnit())->getSession(txn);
+ invariant(session);
+
+ WT_SESSION* s = session->getSession();
+ invariant(s);
+ const string uri = "statistics:";
+
+ BSONObjBuilder bob;
+ Status status = WiredTigerUtil::exportTableToBSON(s, uri, "statistics=(fast)", &bob);
+ if (!status.isOK()) {
+ bob.append("error", "unable to retrieve statistics");
+ bob.append("code", static_cast<int>(status.code()));
+ bob.append("reason", status.reason());
}
- BSONObj WiredTigerServerStatusSection::generateSection(
- OperationContext* txn,
- const BSONElement& configElement) const {
-
- WiredTigerSession* session =
- checked_cast<WiredTigerRecoveryUnit*>(txn->recoveryUnit())->getSession(txn);
- invariant(session);
-
- WT_SESSION* s = session->getSession();
- invariant(s);
- const string uri = "statistics:";
+ WiredTigerRecoveryUnit::appendGlobalStats(bob);
- BSONObjBuilder bob;
- Status status = WiredTigerUtil::exportTableToBSON(s, uri,
- "statistics=(fast)", &bob);
- if (!status.isOK()) {
- bob.append("error", "unable to retrieve statistics");
- bob.append("code", static_cast<int>(status.code()));
- bob.append("reason", status.reason());
- }
-
- WiredTigerRecoveryUnit::appendGlobalStats(bob);
-
- return bob.obj();
- }
+ return bob.obj();
+}
} // namespace mongo
-
diff --git a/src/mongo/db/storage/wiredtiger/wiredtiger_server_status.h b/src/mongo/db/storage/wiredtiger/wiredtiger_server_status.h
index 21f9871a82a..5e7c3b3e8a1 100644
--- a/src/mongo/db/storage/wiredtiger/wiredtiger_server_status.h
+++ b/src/mongo/db/storage/wiredtiger/wiredtiger_server_status.h
@@ -34,19 +34,19 @@
namespace mongo {
- class WiredTigerKVEngine;
-
- /**
- * Adds "wiredTiger" to the results of db.serverStatus().
- */
- class WiredTigerServerStatusSection : public ServerStatusSection {
- public:
- WiredTigerServerStatusSection(WiredTigerKVEngine* engine);
- virtual bool includeByDefault() const;
- virtual BSONObj generateSection(OperationContext* txn,
- const BSONElement& configElement) const;
- private:
- WiredTigerKVEngine* _engine;
- };
+class WiredTigerKVEngine;
+
+/**
+ * Adds "wiredTiger" to the results of db.serverStatus().
+ */
+class WiredTigerServerStatusSection : public ServerStatusSection {
+public:
+ WiredTigerServerStatusSection(WiredTigerKVEngine* engine);
+ virtual bool includeByDefault() const;
+ virtual BSONObj generateSection(OperationContext* txn, const BSONElement& configElement) const;
+
+private:
+ WiredTigerKVEngine* _engine;
+};
} // namespace mongo
diff --git a/src/mongo/db/storage/wiredtiger/wiredtiger_session_cache.cpp b/src/mongo/db/storage/wiredtiger/wiredtiger_session_cache.cpp
index dcdde08de69..9bc0f9687a2 100644
--- a/src/mongo/db/storage/wiredtiger/wiredtiger_session_cache.cpp
+++ b/src/mongo/db/storage/wiredtiger/wiredtiger_session_cache.cpp
@@ -40,206 +40,194 @@
namespace mongo {
- WiredTigerSession::WiredTigerSession(WT_CONNECTION* conn, int cachePartition, int epoch)
- : _cachePartition(cachePartition),
- _epoch(epoch),
- _session(NULL),
- _cursorsOut(0) {
+WiredTigerSession::WiredTigerSession(WT_CONNECTION* conn, int cachePartition, int epoch)
+ : _cachePartition(cachePartition), _epoch(epoch), _session(NULL), _cursorsOut(0) {
+ int ret = conn->open_session(conn, NULL, "isolation=snapshot", &_session);
+ invariantWTOK(ret);
+}
- int ret = conn->open_session(conn, NULL, "isolation=snapshot", &_session);
+WiredTigerSession::~WiredTigerSession() {
+ if (_session) {
+ int ret = _session->close(_session, NULL);
invariantWTOK(ret);
}
+}
- WiredTigerSession::~WiredTigerSession() {
- if (_session) {
- int ret = _session->close(_session, NULL);
- invariantWTOK(ret);
+WT_CURSOR* WiredTigerSession::getCursor(const std::string& uri, uint64_t id, bool forRecordStore) {
+ {
+ Cursors& cursors = _curmap[id];
+ if (!cursors.empty()) {
+ WT_CURSOR* save = cursors.back();
+ cursors.pop_back();
+ _cursorsOut++;
+ return save;
}
}
+ WT_CURSOR* c = NULL;
+ int ret = _session->open_cursor(
+ _session, uri.c_str(), NULL, forRecordStore ? "" : "overwrite=false", &c);
+ if (ret != ENOENT)
+ invariantWTOK(ret);
+ if (c)
+ _cursorsOut++;
+ return c;
+}
- WT_CURSOR* WiredTigerSession::getCursor(const std::string& uri,
- uint64_t id,
- bool forRecordStore) {
- {
- Cursors& cursors = _curmap[id];
- if ( !cursors.empty() ) {
- WT_CURSOR* save = cursors.back();
- cursors.pop_back();
- _cursorsOut++;
- return save;
- }
- }
- WT_CURSOR* c = NULL;
- int ret = _session->open_cursor(_session,
- uri.c_str(),
- NULL,
- forRecordStore ? "" : "overwrite=false",
- &c);
- if (ret != ENOENT)
- invariantWTOK(ret);
- if ( c ) _cursorsOut++;
- return c;
- }
-
- void WiredTigerSession::releaseCursor(uint64_t id, WT_CURSOR *cursor) {
- invariant( _session );
- invariant( cursor );
- _cursorsOut--;
-
- Cursors& cursors = _curmap[id];
- if ( cursors.size() > 10u ) {
- invariantWTOK( cursor->close(cursor) );
- }
- else {
- invariantWTOK( cursor->reset( cursor ) );
- cursors.push_back( cursor );
- }
+void WiredTigerSession::releaseCursor(uint64_t id, WT_CURSOR* cursor) {
+ invariant(_session);
+ invariant(cursor);
+ _cursorsOut--;
+
+ Cursors& cursors = _curmap[id];
+ if (cursors.size() > 10u) {
+ invariantWTOK(cursor->close(cursor));
+ } else {
+ invariantWTOK(cursor->reset(cursor));
+ cursors.push_back(cursor);
}
+}
- void WiredTigerSession::closeAllCursors() {
- invariant( _session );
- for (CursorMap::iterator i = _curmap.begin(); i != _curmap.end(); ++i ) {
- Cursors& cursors = i->second;
- for ( size_t j = 0; j < cursors.size(); j++ ) {
- WT_CURSOR *cursor = cursors[j];
- if (cursor) {
- int ret = cursor->close(cursor);
- invariantWTOK(ret);
- }
+void WiredTigerSession::closeAllCursors() {
+ invariant(_session);
+ for (CursorMap::iterator i = _curmap.begin(); i != _curmap.end(); ++i) {
+ Cursors& cursors = i->second;
+ for (size_t j = 0; j < cursors.size(); j++) {
+ WT_CURSOR* cursor = cursors[j];
+ if (cursor) {
+ int ret = cursor->close(cursor);
+ invariantWTOK(ret);
}
}
- _curmap.clear();
}
+ _curmap.clear();
+}
- namespace {
- AtomicUInt64 nextCursorId(1);
- AtomicUInt64 cachePartitionGen(0);
- }
- // static
- uint64_t WiredTigerSession::genCursorId() {
- return nextCursorId.fetchAndAdd(1);
- }
+namespace {
+AtomicUInt64 nextCursorId(1);
+AtomicUInt64 cachePartitionGen(0);
+}
+// static
+uint64_t WiredTigerSession::genCursorId() {
+ return nextCursorId.fetchAndAdd(1);
+}
- // -----------------------
+// -----------------------
- WiredTigerSessionCache::WiredTigerSessionCache( WiredTigerKVEngine* engine )
- : _engine( engine ), _conn( engine->getConnection() ), _shuttingDown(0) {
+WiredTigerSessionCache::WiredTigerSessionCache(WiredTigerKVEngine* engine)
+ : _engine(engine), _conn(engine->getConnection()), _shuttingDown(0) {}
- }
+WiredTigerSessionCache::WiredTigerSessionCache(WT_CONNECTION* conn)
+ : _engine(NULL), _conn(conn), _shuttingDown(0) {}
+
+WiredTigerSessionCache::~WiredTigerSessionCache() {
+ shuttingDown();
+}
- WiredTigerSessionCache::WiredTigerSessionCache( WT_CONNECTION* conn )
- : _engine( NULL ), _conn( conn ), _shuttingDown(0) {
+void WiredTigerSessionCache::shuttingDown() {
+ if (_shuttingDown.load())
+ return;
+ _shuttingDown.store(1);
+ {
+ // This ensures that any calls, which are currently inside of getSession/releaseSession
+ // will be able to complete before we start cleaning up the pool. Any others, which are
+ // about to enter will return immediately because of _shuttingDown == true.
+ stdx::lock_guard<boost::shared_mutex> lk(_shutdownLock);
}
- WiredTigerSessionCache::~WiredTigerSessionCache() {
- shuttingDown();
- }
+ closeAll();
+}
- void WiredTigerSessionCache::shuttingDown() {
- if (_shuttingDown.load()) return;
- _shuttingDown.store(1);
+void WiredTigerSessionCache::closeAll() {
+ for (int i = 0; i < NumSessionCachePartitions; i++) {
+ SessionPool swapPool;
{
- // This ensures that any calls, which are currently inside of getSession/releaseSession
- // will be able to complete before we start cleaning up the pool. Any others, which are
- // about to enter will return immediately because of _shuttingDown == true.
- stdx::lock_guard<boost::shared_mutex> lk(_shutdownLock);
+ stdx::unique_lock<SpinLock> scopedLock(_cache[i].lock);
+ _cache[i].pool.swap(swapPool);
+ _cache[i].epoch++;
}
- closeAll();
- }
-
- void WiredTigerSessionCache::closeAll() {
- for (int i = 0; i < NumSessionCachePartitions; i++) {
- SessionPool swapPool;
-
- {
- stdx::unique_lock<SpinLock> scopedLock(_cache[i].lock);
- _cache[i].pool.swap(swapPool);
- _cache[i].epoch++;
- }
-
- // New sessions will be created if need be outside of the lock
- for (size_t i = 0; i < swapPool.size(); i++) {
- delete swapPool[i];
- }
-
- swapPool.clear();
+ // New sessions will be created if need be outside of the lock
+ for (size_t i = 0; i < swapPool.size(); i++) {
+ delete swapPool[i];
}
+
+ swapPool.clear();
}
+}
- WiredTigerSession* WiredTigerSessionCache::getSession() {
- boost::shared_lock<boost::shared_mutex> shutdownLock(_shutdownLock);
+WiredTigerSession* WiredTigerSessionCache::getSession() {
+ boost::shared_lock<boost::shared_mutex> shutdownLock(_shutdownLock);
- // We should never be able to get here after _shuttingDown is set, because no new
- // operations should be allowed to start.
- invariant(!_shuttingDown.loadRelaxed());
+ // We should never be able to get here after _shuttingDown is set, because no new
+ // operations should be allowed to start.
+ invariant(!_shuttingDown.loadRelaxed());
- // Spread sessions uniformly across the cache partitions
- const int cachePartition = cachePartitionGen.addAndFetch(1) % NumSessionCachePartitions;
+ // Spread sessions uniformly across the cache partitions
+ const int cachePartition = cachePartitionGen.addAndFetch(1) % NumSessionCachePartitions;
- int epoch;
+ int epoch;
- {
- stdx::unique_lock<SpinLock> cachePartitionLock(_cache[cachePartition].lock);
- epoch = _cache[cachePartition].epoch;
+ {
+ stdx::unique_lock<SpinLock> cachePartitionLock(_cache[cachePartition].lock);
+ epoch = _cache[cachePartition].epoch;
- if (!_cache[cachePartition].pool.empty()) {
- WiredTigerSession* cachedSession = _cache[cachePartition].pool.back();
- _cache[cachePartition].pool.pop_back();
+ if (!_cache[cachePartition].pool.empty()) {
+ WiredTigerSession* cachedSession = _cache[cachePartition].pool.back();
+ _cache[cachePartition].pool.pop_back();
- return cachedSession;
- }
+ return cachedSession;
}
-
- // Outside of the cache partition lock, but on release will be put back on the cache
- return new WiredTigerSession(_conn, cachePartition, epoch);
}
- void WiredTigerSessionCache::releaseSession( WiredTigerSession* session ) {
- invariant( session );
- invariant(session->cursorsOut() == 0);
-
- boost::shared_lock<boost::shared_mutex> shutdownLock(_shutdownLock);
- if (_shuttingDown.loadRelaxed()) {
- // Leak the session in order to avoid race condition with clean shutdown, where the
- // storage engine is ripped from underneath transactions, which are not "active"
- // (i.e., do not have any locks), but are just about to delete the recovery unit.
- // See SERVER-16031 for more information.
- return;
- }
+ // Outside of the cache partition lock, but on release will be put back on the cache
+ return new WiredTigerSession(_conn, cachePartition, epoch);
+}
- // This checks that we are only caching idle sessions and not something which might hold
- // locks or otherwise prevent truncation.
- {
- WT_SESSION* ss = session->getSession();
- uint64_t range;
- invariantWTOK(ss->transaction_pinned_range(ss, &range));
- invariant(range == 0);
- }
+void WiredTigerSessionCache::releaseSession(WiredTigerSession* session) {
+ invariant(session);
+ invariant(session->cursorsOut() == 0);
+
+ boost::shared_lock<boost::shared_mutex> shutdownLock(_shutdownLock);
+ if (_shuttingDown.loadRelaxed()) {
+ // Leak the session in order to avoid race condition with clean shutdown, where the
+ // storage engine is ripped from underneath transactions, which are not "active"
+ // (i.e., do not have any locks), but are just about to delete the recovery unit.
+ // See SERVER-16031 for more information.
+ return;
+ }
- const int cachePartition = session->_getCachePartition();
- bool returnedToCache = false;
+ // This checks that we are only caching idle sessions and not something which might hold
+ // locks or otherwise prevent truncation.
+ {
+ WT_SESSION* ss = session->getSession();
+ uint64_t range;
+ invariantWTOK(ss->transaction_pinned_range(ss, &range));
+ invariant(range == 0);
+ }
- if (cachePartition >= 0) {
- stdx::unique_lock<SpinLock> cachePartitionLock(_cache[cachePartition].lock);
+ const int cachePartition = session->_getCachePartition();
+ bool returnedToCache = false;
- invariant(session->_getEpoch() <= _cache[cachePartition].epoch);
+ if (cachePartition >= 0) {
+ stdx::unique_lock<SpinLock> cachePartitionLock(_cache[cachePartition].lock);
- if (session->_getEpoch() == _cache[cachePartition].epoch) {
- _cache[cachePartition].pool.push_back(session);
- returnedToCache = true;
- }
- }
+ invariant(session->_getEpoch() <= _cache[cachePartition].epoch);
- // Do all cleanup outside of the cache partition spinlock.
- if (!returnedToCache) {
- delete session;
+ if (session->_getEpoch() == _cache[cachePartition].epoch) {
+ _cache[cachePartition].pool.push_back(session);
+ returnedToCache = true;
}
+ }
- if (_engine && _engine->haveDropsQueued()) {
- _engine->dropAllQueued();
- }
+ // Do all cleanup outside of the cache partition spinlock.
+ if (!returnedToCache) {
+ delete session;
}
+
+ if (_engine && _engine->haveDropsQueued()) {
+ _engine->dropAllQueued();
+ }
+}
}
diff --git a/src/mongo/db/storage/wiredtiger/wiredtiger_session_cache.h b/src/mongo/db/storage/wiredtiger/wiredtiger_session_cache.h
index 2f9e8d64d4d..9fd575232b9 100644
--- a/src/mongo/db/storage/wiredtiger/wiredtiger_session_cache.h
+++ b/src/mongo/db/storage/wiredtiger/wiredtiger_session_cache.h
@@ -44,113 +44,118 @@
namespace mongo {
- class WiredTigerKVEngine;
+class WiredTigerKVEngine;
+/**
+ * This is a structure that caches 1 cursor for each uri.
+ * The idea is that there is a pool of these somewhere.
+ * NOT THREADSAFE
+ */
+class WiredTigerSession {
+public:
/**
- * This is a structure that caches 1 cursor for each uri.
- * The idea is that there is a pool of these somewhere.
- * NOT THREADSAFE
+ * Creates a new WT session on the specified connection.
+ *
+ * @param conn WT connection
+ * @param cachePartition If the session comes from the session cache, this indicates to
+ * which partition it should be returned. Value of -1 means it doesn't come from
+ * cache and that it should not be cached, but closed directly.
+ * @param epoch In which session cache cleanup epoch was this session instantiated. Value
+ * of -1 means that this value is not necessary since the session will not be
+ * cached.
*/
- class WiredTigerSession {
- public:
-
- /**
- * Creates a new WT session on the specified connection.
- *
- * @param conn WT connection
- * @param cachePartition If the session comes from the session cache, this indicates to
- * which partition it should be returned. Value of -1 means it doesn't come from
- * cache and that it should not be cached, but closed directly.
- * @param epoch In which session cache cleanup epoch was this session instantiated. Value
- * of -1 means that this value is not necessary since the session will not be
- * cached.
- */
- WiredTigerSession(WT_CONNECTION* conn, int cachePartition = -1, int epoch = -1);
- ~WiredTigerSession();
-
- WT_SESSION* getSession() const { return _session; }
-
- WT_CURSOR* getCursor(const std::string& uri,
- uint64_t id,
- bool forRecordStore);
- void releaseCursor(uint64_t id, WT_CURSOR *cursor);
+ WiredTigerSession(WT_CONNECTION* conn, int cachePartition = -1, int epoch = -1);
+ ~WiredTigerSession();
- void closeAllCursors();
+ WT_SESSION* getSession() const {
+ return _session;
+ }
- int cursorsOut() const { return _cursorsOut; }
+ WT_CURSOR* getCursor(const std::string& uri, uint64_t id, bool forRecordStore);
+ void releaseCursor(uint64_t id, WT_CURSOR* cursor);
- static uint64_t genCursorId();
+ void closeAllCursors();
- /**
- * For "metadata:" cursors. Guaranteed never to collide with genCursorId() ids.
- */
- static const uint64_t kMetadataCursorId = 0;
+ int cursorsOut() const {
+ return _cursorsOut;
+ }
- private:
- friend class WiredTigerSessionCache;
+ static uint64_t genCursorId();
- typedef std::vector<WT_CURSOR*> Cursors;
- typedef std::map<uint64_t, Cursors> CursorMap;
+ /**
+ * For "metadata:" cursors. Guaranteed never to collide with genCursorId() ids.
+ */
+ static const uint64_t kMetadataCursorId = 0;
+private:
+ friend class WiredTigerSessionCache;
- // Used internally by WiredTigerSessionCache
- int _getEpoch() const { return _epoch; }
- int _getCachePartition() const { return _cachePartition; }
+ typedef std::vector<WT_CURSOR*> Cursors;
+ typedef std::map<uint64_t, Cursors> CursorMap;
- const int _cachePartition;
- const int _epoch;
- WT_SESSION* _session; // owned
- CursorMap _curmap; // owned
- int _cursorsOut;
- };
+ // Used internally by WiredTigerSessionCache
+ int _getEpoch() const {
+ return _epoch;
+ }
+ int _getCachePartition() const {
+ return _cachePartition;
+ }
- class WiredTigerSessionCache {
- public:
- WiredTigerSessionCache( WiredTigerKVEngine* engine );
- WiredTigerSessionCache( WT_CONNECTION* conn );
- ~WiredTigerSessionCache();
+ const int _cachePartition;
+ const int _epoch;
+ WT_SESSION* _session; // owned
+ CursorMap _curmap; // owned
+ int _cursorsOut;
+};
- WiredTigerSession* getSession();
- void releaseSession( WiredTigerSession* session );
+class WiredTigerSessionCache {
+public:
+ WiredTigerSessionCache(WiredTigerKVEngine* engine);
+ WiredTigerSessionCache(WT_CONNECTION* conn);
+ ~WiredTigerSessionCache();
- void closeAll();
+ WiredTigerSession* getSession();
+ void releaseSession(WiredTigerSession* session);
- void shuttingDown();
+ void closeAll();
- WT_CONNECTION* conn() const { return _conn; }
+ void shuttingDown();
- private:
- typedef std::vector<WiredTigerSession*> SessionPool;
+ WT_CONNECTION* conn() const {
+ return _conn;
+ }
- enum { NumSessionCachePartitions = 64 };
+private:
+ typedef std::vector<WiredTigerSession*> SessionPool;
- struct SessionCachePartition {
- SessionCachePartition() : epoch(0) { }
- ~SessionCachePartition() {
- invariant(pool.empty());
- }
+ enum { NumSessionCachePartitions = 64 };
- SpinLock lock;
- int epoch;
- SessionPool pool;
- };
+ struct SessionCachePartition {
+ SessionCachePartition() : epoch(0) {}
+ ~SessionCachePartition() {
+ invariant(pool.empty());
+ }
+ SpinLock lock;
+ int epoch;
+ SessionPool pool;
+ };
- WiredTigerKVEngine* _engine; // not owned, might be NULL
- WT_CONNECTION* _conn; // not owned
- // Partitioned cache of WT sessions. The partition key is not important, but it is
- // important that sessions be returned to the same partition they were taken from in order
- // to have some form of balance between the partitions.
- SessionCachePartition _cache[NumSessionCachePartitions];
+ WiredTigerKVEngine* _engine; // not owned, might be NULL
+ WT_CONNECTION* _conn; // not owned
- // Regular operations take it in shared mode. Shutdown sets the _shuttingDown flag and
- // then takes it in exclusive mode. This ensures that all threads, which would return
- // sessions to the cache would leak them.
- boost::shared_mutex _shutdownLock;
- AtomicUInt32 _shuttingDown; // Used as boolean - 0 = false, 1 = true
- };
+ // Partitioned cache of WT sessions. The partition key is not important, but it is
+ // important that sessions be returned to the same partition they were taken from in order
+ // to have some form of balance between the partitions.
+ SessionCachePartition _cache[NumSessionCachePartitions];
+ // Regular operations take it in shared mode. Shutdown sets the _shuttingDown flag and
+ // then takes it in exclusive mode. This ensures that all threads, which would return
+ // sessions to the cache would leak them.
+ boost::shared_mutex _shutdownLock;
+ AtomicUInt32 _shuttingDown; // Used as boolean - 0 = false, 1 = true
+};
}
diff --git a/src/mongo/db/storage/wiredtiger/wiredtiger_size_storer.cpp b/src/mongo/db/storage/wiredtiger/wiredtiger_size_storer.cpp
index bbf12fc075f..89d7438ea9a 100644
--- a/src/mongo/db/storage/wiredtiger/wiredtiger_size_storer.cpp
+++ b/src/mongo/db/storage/wiredtiger/wiredtiger_size_storer.cpp
@@ -47,196 +47,192 @@
namespace mongo {
- using std::string;
+using std::string;
- namespace {
- int MAGIC = 123123;
+namespace {
+int MAGIC = 123123;
+}
+
+WiredTigerSizeStorer::WiredTigerSizeStorer(WT_CONNECTION* conn, const std::string& storageUri)
+ : _session(conn) {
+ WT_SESSION* session = _session.getSession();
+ int ret = session->open_cursor(session, storageUri.c_str(), NULL, "overwrite=true", &_cursor);
+ if (ret == ENOENT) {
+ // Need to create table.
+ std::string config =
+ WiredTigerCustomizationHooks::get(getGlobalServiceContext())->getOpenConfig(storageUri);
+ invariantWTOK(session->create(session, storageUri.c_str(), config.c_str()));
+ ret = session->open_cursor(session, storageUri.c_str(), NULL, "overwrite=true", &_cursor);
}
+ invariantWTOK(ret);
- WiredTigerSizeStorer::WiredTigerSizeStorer(WT_CONNECTION* conn, const std::string& storageUri)
- : _session(conn)
- {
- WT_SESSION* session = _session.getSession();
- int ret = session->open_cursor(session, storageUri.c_str(), NULL,
- "overwrite=true", &_cursor);
- if (ret == ENOENT) {
- // Need to create table.
- std::string config = WiredTigerCustomizationHooks::get(
- getGlobalServiceContext())->getOpenConfig(storageUri);
- invariantWTOK(session->create(session, storageUri.c_str(), config.c_str()));
- ret = session->open_cursor(session, storageUri.c_str(), NULL,
- "overwrite=true", &_cursor);
- }
- invariantWTOK(ret);
+ _magic = MAGIC;
+}
- _magic = MAGIC;
- }
+WiredTigerSizeStorer::~WiredTigerSizeStorer() {
+ // This shouldn't be necessary, but protects us if we screw up.
+ stdx::lock_guard<stdx::mutex> cursorLock(_cursorMutex);
- WiredTigerSizeStorer::~WiredTigerSizeStorer() {
- // This shouldn't be necessary, but protects us if we screw up.
- stdx::lock_guard<stdx::mutex> cursorLock( _cursorMutex );
+ _magic = 11111;
+ _cursor->close(_cursor);
+}
- _magic = 11111;
- _cursor->close(_cursor);
- }
+void WiredTigerSizeStorer::_checkMagic() const {
+ if (MONGO_likely(_magic == MAGIC))
+ return;
+ log() << "WiredTigerSizeStorer magic wrong: " << _magic;
+ invariant(_magic == MAGIC);
+}
- void WiredTigerSizeStorer::_checkMagic() const {
- if ( MONGO_likely(_magic == MAGIC) )
- return;
- log() << "WiredTigerSizeStorer magic wrong: " << _magic;
- invariant( _magic == MAGIC );
- }
+void WiredTigerSizeStorer::onCreate(WiredTigerRecordStore* rs,
+ long long numRecords,
+ long long dataSize) {
+ _checkMagic();
+ stdx::lock_guard<stdx::mutex> lk(_entriesMutex);
+ Entry& entry = _entries[rs->getURI()];
+ entry.rs = rs;
+ entry.numRecords = numRecords;
+ entry.dataSize = dataSize;
+ entry.dirty = true;
+}
- void WiredTigerSizeStorer::onCreate( WiredTigerRecordStore* rs,
- long long numRecords, long long dataSize ) {
- _checkMagic();
- stdx::lock_guard<stdx::mutex> lk( _entriesMutex );
- Entry& entry = _entries[rs->getURI()];
- entry.rs = rs;
- entry.numRecords = numRecords;
- entry.dataSize = dataSize;
- entry.dirty = true;
- }
+void WiredTigerSizeStorer::onDestroy(WiredTigerRecordStore* rs) {
+ _checkMagic();
+ stdx::lock_guard<stdx::mutex> lk(_entriesMutex);
+ Entry& entry = _entries[rs->getURI()];
+ entry.numRecords = rs->numRecords(NULL);
+ entry.dataSize = rs->dataSize(NULL);
+ entry.dirty = true;
+ entry.rs = NULL;
+}
- void WiredTigerSizeStorer::onDestroy( WiredTigerRecordStore* rs ) {
- _checkMagic();
- stdx::lock_guard<stdx::mutex> lk( _entriesMutex );
- Entry& entry = _entries[rs->getURI()];
- entry.numRecords = rs->numRecords( NULL );
- entry.dataSize = rs->dataSize( NULL );
- entry.dirty = true;
- entry.rs = NULL;
- }
+void WiredTigerSizeStorer::storeToCache(StringData uri, long long numRecords, long long dataSize) {
+ _checkMagic();
+ stdx::lock_guard<stdx::mutex> lk(_entriesMutex);
+ Entry& entry = _entries[uri.toString()];
+ entry.numRecords = numRecords;
+ entry.dataSize = dataSize;
+ entry.dirty = true;
+}
- void WiredTigerSizeStorer::storeToCache( StringData uri,
- long long numRecords, long long dataSize ) {
- _checkMagic();
- stdx::lock_guard<stdx::mutex> lk( _entriesMutex );
- Entry& entry = _entries[uri.toString()];
- entry.numRecords = numRecords;
- entry.dataSize = dataSize;
- entry.dirty = true;
+void WiredTigerSizeStorer::loadFromCache(StringData uri,
+ long long* numRecords,
+ long long* dataSize) const {
+ _checkMagic();
+ stdx::lock_guard<stdx::mutex> lk(_entriesMutex);
+ Map::const_iterator it = _entries.find(uri.toString());
+ if (it == _entries.end()) {
+ *numRecords = 0;
+ *dataSize = 0;
+ return;
}
+ *numRecords = it->second.numRecords;
+ *dataSize = it->second.dataSize;
+}
- void WiredTigerSizeStorer::loadFromCache( StringData uri,
- long long* numRecords, long long* dataSize ) const {
- _checkMagic();
- stdx::lock_guard<stdx::mutex> lk( _entriesMutex );
- Map::const_iterator it = _entries.find( uri.toString() );
- if ( it == _entries.end() ) {
- *numRecords = 0;
- *dataSize = 0;
- return;
- }
- *numRecords = it->second.numRecords;
- *dataSize = it->second.dataSize;
- }
+void WiredTigerSizeStorer::fillCache() {
+ stdx::lock_guard<stdx::mutex> cursorLock(_cursorMutex);
+ _checkMagic();
- void WiredTigerSizeStorer::fillCache() {
- stdx::lock_guard<stdx::mutex> cursorLock( _cursorMutex );
- _checkMagic();
+ Map m;
+ {
+ // Seek to beginning if needed.
+ invariantWTOK(_cursor->reset(_cursor));
- Map m;
- {
- // Seek to beginning if needed.
- invariantWTOK(_cursor->reset(_cursor));
-
- // Intentionally ignoring return value.
- ON_BLOCK_EXIT(_cursor->reset, _cursor);
-
- int cursorNextRet;
- while ((cursorNextRet = _cursor->next(_cursor)) != WT_NOTFOUND) {
- invariantWTOK(cursorNextRet);
-
- WT_ITEM key;
- WT_ITEM value;
- invariantWTOK( _cursor->get_key(_cursor, &key ) );
- invariantWTOK( _cursor->get_value(_cursor, &value ) );
- std::string uriKey( reinterpret_cast<const char*>( key.data ), key.size );
- BSONObj data( reinterpret_cast<const char*>( value.data ) );
-
- LOG(2) << "WiredTigerSizeStorer::loadFrom " << uriKey << " -> " << data;
-
- Entry& e = m[uriKey];
- e.numRecords = data["numRecords"].safeNumberLong();
- e.dataSize = data["dataSize"].safeNumberLong();
- e.dirty = false;
- e.rs = NULL;
- }
- }
+ // Intentionally ignoring return value.
+ ON_BLOCK_EXIT(_cursor->reset, _cursor);
- stdx::lock_guard<stdx::mutex> lk( _entriesMutex );
- _entries.swap(m);
- }
+ int cursorNextRet;
+ while ((cursorNextRet = _cursor->next(_cursor)) != WT_NOTFOUND) {
+ invariantWTOK(cursorNextRet);
- void WiredTigerSizeStorer::syncCache(bool syncToDisk) {
- stdx::lock_guard<stdx::mutex> cursorLock( _cursorMutex );
- _checkMagic();
+ WT_ITEM key;
+ WT_ITEM value;
+ invariantWTOK(_cursor->get_key(_cursor, &key));
+ invariantWTOK(_cursor->get_value(_cursor, &value));
+ std::string uriKey(reinterpret_cast<const char*>(key.data), key.size);
+ BSONObj data(reinterpret_cast<const char*>(value.data));
- Map myMap;
- {
- stdx::lock_guard<stdx::mutex> lk( _entriesMutex );
- for ( Map::iterator it = _entries.begin(); it != _entries.end(); ++it ) {
- std::string uriKey = it->first;
- Entry& entry = it->second;
- if ( entry.rs ) {
- if ( entry.dataSize != entry.rs->dataSize( NULL ) ) {
- entry.dataSize = entry.rs->dataSize( NULL );
- entry.dirty = true;
- }
- if ( entry.numRecords != entry.rs->numRecords( NULL ) ) {
- entry.numRecords = entry.rs->numRecords( NULL );
- entry.dirty = true;
- }
- }
+ LOG(2) << "WiredTigerSizeStorer::loadFrom " << uriKey << " -> " << data;
- if ( !entry.dirty )
- continue;
- myMap[uriKey] = entry;
- }
+ Entry& e = m[uriKey];
+ e.numRecords = data["numRecords"].safeNumberLong();
+ e.dataSize = data["dataSize"].safeNumberLong();
+ e.dirty = false;
+ e.rs = NULL;
}
+ }
- if (myMap.empty())
- return; // Nothing to do.
+ stdx::lock_guard<stdx::mutex> lk(_entriesMutex);
+ _entries.swap(m);
+}
- WT_SESSION* session = _session.getSession();
- invariantWTOK(session->begin_transaction(session, syncToDisk ? "sync=true" : ""));
- ScopeGuard rollbacker = MakeGuard(session->rollback_transaction, session, "");
+void WiredTigerSizeStorer::syncCache(bool syncToDisk) {
+ stdx::lock_guard<stdx::mutex> cursorLock(_cursorMutex);
+ _checkMagic();
- for ( Map::iterator it = myMap.begin(); it != myMap.end(); ++it ) {
- string uriKey = it->first;
+ Map myMap;
+ {
+ stdx::lock_guard<stdx::mutex> lk(_entriesMutex);
+ for (Map::iterator it = _entries.begin(); it != _entries.end(); ++it) {
+ std::string uriKey = it->first;
Entry& entry = it->second;
-
- BSONObj data;
- {
- BSONObjBuilder b;
- b.append( "numRecords", entry.numRecords );
- b.append( "dataSize", entry.dataSize );
- data = b.obj();
+ if (entry.rs) {
+ if (entry.dataSize != entry.rs->dataSize(NULL)) {
+ entry.dataSize = entry.rs->dataSize(NULL);
+ entry.dirty = true;
+ }
+ if (entry.numRecords != entry.rs->numRecords(NULL)) {
+ entry.numRecords = entry.rs->numRecords(NULL);
+ entry.dirty = true;
+ }
}
- LOG(2) << "WiredTigerSizeStorer::storeInto " << uriKey << " -> " << data;
-
- WiredTigerItem key( uriKey.c_str(), uriKey.size() );
- WiredTigerItem value( data.objdata(), data.objsize() );
- _cursor->set_key( _cursor, key.Get() );
- _cursor->set_value( _cursor, value.Get() );
- invariantWTOK( _cursor->insert(_cursor) );
+ if (!entry.dirty)
+ continue;
+ myMap[uriKey] = entry;
}
+ }
- invariantWTOK(_cursor->reset(_cursor));
+ if (myMap.empty())
+ return; // Nothing to do.
+
+ WT_SESSION* session = _session.getSession();
+ invariantWTOK(session->begin_transaction(session, syncToDisk ? "sync=true" : ""));
+ ScopeGuard rollbacker = MakeGuard(session->rollback_transaction, session, "");
- rollbacker.Dismiss();
- invariantWTOK(session->commit_transaction(session, NULL));
+ for (Map::iterator it = myMap.begin(); it != myMap.end(); ++it) {
+ string uriKey = it->first;
+ Entry& entry = it->second;
+ BSONObj data;
{
- stdx::lock_guard<stdx::mutex> lk( _entriesMutex );
- for (Map::iterator it = _entries.begin(); it != _entries.end(); ++it) {
- it->second.dirty = false;
- }
+ BSONObjBuilder b;
+ b.append("numRecords", entry.numRecords);
+ b.append("dataSize", entry.dataSize);
+ data = b.obj();
}
+
+ LOG(2) << "WiredTigerSizeStorer::storeInto " << uriKey << " -> " << data;
+
+ WiredTigerItem key(uriKey.c_str(), uriKey.size());
+ WiredTigerItem value(data.objdata(), data.objsize());
+ _cursor->set_key(_cursor, key.Get());
+ _cursor->set_value(_cursor, value.Get());
+ invariantWTOK(_cursor->insert(_cursor));
}
+ invariantWTOK(_cursor->reset(_cursor));
+ rollbacker.Dismiss();
+ invariantWTOK(session->commit_transaction(session, NULL));
+
+ {
+ stdx::lock_guard<stdx::mutex> lk(_entriesMutex);
+ for (Map::iterator it = _entries.begin(); it != _entries.end(); ++it) {
+ it->second.dirty = false;
+ }
+ }
+}
}
diff --git a/src/mongo/db/storage/wiredtiger/wiredtiger_size_storer.h b/src/mongo/db/storage/wiredtiger/wiredtiger_size_storer.h
index 488696424a0..3dcbc6622a8 100644
--- a/src/mongo/db/storage/wiredtiger/wiredtiger_size_storer.h
+++ b/src/mongo/db/storage/wiredtiger/wiredtiger_size_storer.h
@@ -41,53 +41,51 @@
namespace mongo {
- class WiredTigerRecordStore;
- class WiredTigerSession;
+class WiredTigerRecordStore;
+class WiredTigerSession;
- class WiredTigerSizeStorer {
- public:
- WiredTigerSizeStorer(WT_CONNECTION* conn, const std::string& storageUri);
- ~WiredTigerSizeStorer();
+class WiredTigerSizeStorer {
+public:
+ WiredTigerSizeStorer(WT_CONNECTION* conn, const std::string& storageUri);
+ ~WiredTigerSizeStorer();
- void onCreate( WiredTigerRecordStore* rs, long long nr, long long ds );
- void onDestroy( WiredTigerRecordStore* rs );
+ void onCreate(WiredTigerRecordStore* rs, long long nr, long long ds);
+ void onDestroy(WiredTigerRecordStore* rs);
- void storeToCache( StringData uri, long long numRecords, long long dataSize );
+ void storeToCache(StringData uri, long long numRecords, long long dataSize);
- void loadFromCache( StringData uri, long long* numRecords, long long* dataSize ) const;
+ void loadFromCache(StringData uri, long long* numRecords, long long* dataSize) const;
- /**
- * Loads from the underlying table.
- */
- void fillCache();
+ /**
+ * Loads from the underlying table.
+ */
+ void fillCache();
- /**
- * Writes all changes to the underlying table.
- */
- void syncCache(bool syncToDisk);
+ /**
+ * Writes all changes to the underlying table.
+ */
+ void syncCache(bool syncToDisk);
- private:
- void _checkMagic() const;
+private:
+ void _checkMagic() const;
- struct Entry {
- Entry() : numRecords(0), dataSize(0), dirty(false), rs(NULL){}
- long long numRecords;
- long long dataSize;
- bool dirty;
- WiredTigerRecordStore* rs; // not owned
- };
-
- int _magic;
-
- // Guards _cursor. Acquire *before* _entriesMutex.
- mutable stdx::mutex _cursorMutex;
- const WiredTigerSession _session;
- WT_CURSOR* _cursor; // pointer is const after constructor
+ struct Entry {
+ Entry() : numRecords(0), dataSize(0), dirty(false), rs(NULL) {}
+ long long numRecords;
+ long long dataSize;
+ bool dirty;
+ WiredTigerRecordStore* rs; // not owned
+ };
- typedef std::map<std::string,Entry> Map;
- Map _entries;
- mutable stdx::mutex _entriesMutex;
+ int _magic;
- };
+ // Guards _cursor. Acquire *before* _entriesMutex.
+ mutable stdx::mutex _cursorMutex;
+ const WiredTigerSession _session;
+ WT_CURSOR* _cursor; // pointer is const after constructor
+ typedef std::map<std::string, Entry> Map;
+ Map _entries;
+ mutable stdx::mutex _entriesMutex;
+};
}
diff --git a/src/mongo/db/storage/wiredtiger/wiredtiger_util.cpp b/src/mongo/db/storage/wiredtiger/wiredtiger_util.cpp
index d4cc5109303..ef1231282bd 100644
--- a/src/mongo/db/storage/wiredtiger/wiredtiger_util.cpp
+++ b/src/mongo/db/storage/wiredtiger/wiredtiger_util.cpp
@@ -49,115 +49,113 @@
namespace mongo {
- using std::string;
+using std::string;
- Status wtRCToStatus_slow(int retCode, const char* prefix ) {
- if (retCode == 0)
- return Status::OK();
-
- if ( retCode == WT_ROLLBACK ) {
- throw WriteConflictException();
- }
+Status wtRCToStatus_slow(int retCode, const char* prefix) {
+ if (retCode == 0)
+ return Status::OK();
- fassert( 28559, retCode != WT_PANIC );
+ if (retCode == WT_ROLLBACK) {
+ throw WriteConflictException();
+ }
- str::stream s;
- if ( prefix )
- s << prefix << " ";
- s << retCode << ": " << wiredtiger_strerror(retCode);
+ fassert(28559, retCode != WT_PANIC);
- if (retCode == EINVAL) {
- return Status(ErrorCodes::BadValue, s);
- }
+ str::stream s;
+ if (prefix)
+ s << prefix << " ";
+ s << retCode << ": " << wiredtiger_strerror(retCode);
- // TODO convert specific codes rather than just using UNKNOWN_ERROR for everything.
- return Status(ErrorCodes::UnknownError, s);
+ if (retCode == EINVAL) {
+ return Status(ErrorCodes::BadValue, s);
}
- void WiredTigerUtil::fetchTypeAndSourceURI(OperationContext* opCtx,
- const std::string& tableUri,
- std::string* type,
- std::string* source) {
- std::string colgroupUri = "colgroup";
- const size_t colon = tableUri.find(':');
- invariant(colon != string::npos);
- colgroupUri += tableUri.substr(colon);
- StatusWith<std::string> colgroupResult = getMetadata(opCtx, colgroupUri);
- invariant(colgroupResult.isOK());
- WiredTigerConfigParser parser(colgroupResult.getValue());
-
- WT_CONFIG_ITEM typeItem;
- invariant(parser.get("type", &typeItem) == 0);
- invariant(typeItem.type == WT_CONFIG_ITEM::WT_CONFIG_ITEM_ID);
- *type = std::string(typeItem.str, typeItem.len);
-
- WT_CONFIG_ITEM sourceItem;
- invariant(parser.get("source", &sourceItem) == 0);
- invariant(sourceItem.type == WT_CONFIG_ITEM::WT_CONFIG_ITEM_STRING);
- *source = std::string(sourceItem.str, sourceItem.len);
+ // TODO convert specific codes rather than just using UNKNOWN_ERROR for everything.
+ return Status(ErrorCodes::UnknownError, s);
+}
+
+void WiredTigerUtil::fetchTypeAndSourceURI(OperationContext* opCtx,
+ const std::string& tableUri,
+ std::string* type,
+ std::string* source) {
+ std::string colgroupUri = "colgroup";
+ const size_t colon = tableUri.find(':');
+ invariant(colon != string::npos);
+ colgroupUri += tableUri.substr(colon);
+ StatusWith<std::string> colgroupResult = getMetadata(opCtx, colgroupUri);
+ invariant(colgroupResult.isOK());
+ WiredTigerConfigParser parser(colgroupResult.getValue());
+
+ WT_CONFIG_ITEM typeItem;
+ invariant(parser.get("type", &typeItem) == 0);
+ invariant(typeItem.type == WT_CONFIG_ITEM::WT_CONFIG_ITEM_ID);
+ *type = std::string(typeItem.str, typeItem.len);
+
+ WT_CONFIG_ITEM sourceItem;
+ invariant(parser.get("source", &sourceItem) == 0);
+ invariant(sourceItem.type == WT_CONFIG_ITEM::WT_CONFIG_ITEM_STRING);
+ *source = std::string(sourceItem.str, sourceItem.len);
+}
+
+StatusWith<std::string> WiredTigerUtil::getMetadata(OperationContext* opCtx, StringData uri) {
+ invariant(opCtx);
+ WiredTigerCursor curwrap("metadata:", WiredTigerSession::kMetadataCursorId, false, opCtx);
+ WT_CURSOR* cursor = curwrap.get();
+ invariant(cursor);
+ std::string strUri = uri.toString();
+ cursor->set_key(cursor, strUri.c_str());
+ int ret = cursor->search(cursor);
+ if (ret == WT_NOTFOUND) {
+ return StatusWith<std::string>(ErrorCodes::NoSuchKey,
+ str::stream() << "Unable to find metadata for " << uri);
+ } else if (ret != 0) {
+ return StatusWith<std::string>(wtRCToStatus(ret));
}
+ const char* metadata = NULL;
+ ret = cursor->get_value(cursor, &metadata);
+ if (ret != 0) {
+ return StatusWith<std::string>(wtRCToStatus(ret));
+ }
+ invariant(metadata);
+ return StatusWith<std::string>(metadata);
+}
- StatusWith<std::string> WiredTigerUtil::getMetadata(OperationContext* opCtx,
- StringData uri) {
- invariant(opCtx);
- WiredTigerCursor curwrap("metadata:", WiredTigerSession::kMetadataCursorId, false, opCtx);
- WT_CURSOR* cursor = curwrap.get();
- invariant(cursor);
- std::string strUri = uri.toString();
- cursor->set_key(cursor, strUri.c_str());
- int ret = cursor->search(cursor);
- if (ret == WT_NOTFOUND) {
- return StatusWith<std::string>(ErrorCodes::NoSuchKey, str::stream()
- << "Unable to find metadata for " << uri);
- }
- else if (ret != 0) {
- return StatusWith<std::string>(wtRCToStatus(ret));
- }
- const char* metadata = NULL;
- ret = cursor->get_value(cursor, &metadata);
- if (ret != 0) {
- return StatusWith<std::string>(wtRCToStatus(ret));
- }
- invariant(metadata);
- return StatusWith<std::string>(metadata);
+Status WiredTigerUtil::getApplicationMetadata(OperationContext* opCtx,
+ StringData uri,
+ BSONObjBuilder* bob) {
+ StatusWith<std::string> metadataResult = getMetadata(opCtx, uri);
+ if (!metadataResult.isOK()) {
+ return metadataResult.getStatus();
+ }
+ WiredTigerConfigParser topParser(metadataResult.getValue());
+ WT_CONFIG_ITEM appMetadata;
+ if (topParser.get("app_metadata", &appMetadata) != 0) {
+ return Status::OK();
+ }
+ if (appMetadata.len == 0) {
+ return Status::OK();
+ }
+ if (appMetadata.type != WT_CONFIG_ITEM::WT_CONFIG_ITEM_STRUCT) {
+ return Status(ErrorCodes::FailedToParse,
+ str::stream() << "app_metadata must be a nested struct. Actual value: "
+ << StringData(appMetadata.str, appMetadata.len));
}
- Status WiredTigerUtil::getApplicationMetadata(OperationContext* opCtx,
- StringData uri,
- BSONObjBuilder* bob) {
- StatusWith<std::string> metadataResult = getMetadata(opCtx, uri);
- if (!metadataResult.isOK()) {
- return metadataResult.getStatus();
- }
- WiredTigerConfigParser topParser(metadataResult.getValue());
- WT_CONFIG_ITEM appMetadata;
- if (topParser.get("app_metadata", &appMetadata) != 0) {
- return Status::OK();
- }
- if (appMetadata.len == 0) {
- return Status::OK();
+ WiredTigerConfigParser parser(appMetadata);
+ WT_CONFIG_ITEM keyItem;
+ WT_CONFIG_ITEM valueItem;
+ int ret;
+ unordered_set<StringData, StringData::Hasher> keysSeen;
+ while ((ret = parser.next(&keyItem, &valueItem)) == 0) {
+ const StringData key(keyItem.str, keyItem.len);
+ if (keysSeen.count(key)) {
+ return Status(ErrorCodes::DuplicateKey,
+ str::stream() << "app_metadata must not contain duplicate keys. "
+ << "Found multiple instances of key '" << key << "'.");
}
- if (appMetadata.type != WT_CONFIG_ITEM::WT_CONFIG_ITEM_STRUCT) {
- return Status(ErrorCodes::FailedToParse, str::stream()
- << "app_metadata must be a nested struct. Actual value: "
- << StringData(appMetadata.str, appMetadata.len));
- }
-
- WiredTigerConfigParser parser(appMetadata);
- WT_CONFIG_ITEM keyItem;
- WT_CONFIG_ITEM valueItem;
- int ret;
- unordered_set<StringData, StringData::Hasher> keysSeen;
- while ((ret = parser.next(&keyItem, &valueItem)) == 0) {
- const StringData key(keyItem.str, keyItem.len);
- if (keysSeen.count(key)) {
- return Status(ErrorCodes::DuplicateKey, str::stream()
- << "app_metadata must not contain duplicate keys. "
- << "Found multiple instances of key '" << key << "'.");
- }
- keysSeen.insert(key);
+ keysSeen.insert(key);
- switch (valueItem.type) {
+ switch (valueItem.type) {
case WT_CONFIG_ITEM::WT_CONFIG_ITEM_BOOL:
bob->appendBool(key, valueItem.val);
break;
@@ -167,285 +165,277 @@ namespace mongo {
default:
bob->append(key, StringData(valueItem.str, valueItem.len));
break;
- }
- }
- if (ret != WT_NOTFOUND) {
- return wtRCToStatus(ret);
}
-
- return Status::OK();
}
-
- StatusWith<BSONObj> WiredTigerUtil::getApplicationMetadata(OperationContext* opCtx,
- StringData uri) {
- BSONObjBuilder bob;
- Status status = getApplicationMetadata(opCtx, uri, &bob);
- if (!status.isOK()) {
- return StatusWith<BSONObj>(status);
- }
- return StatusWith<BSONObj>(bob.obj());
+ if (ret != WT_NOTFOUND) {
+ return wtRCToStatus(ret);
}
- Status WiredTigerUtil::checkApplicationMetadataFormatVersion(OperationContext* opCtx,
- StringData uri,
- int64_t minimumVersion,
- int64_t maximumVersion) {
-
- StatusWith<std::string> result = getMetadata(opCtx, uri);
- if (result.getStatus().code() == ErrorCodes::NoSuchKey) {
- return result.getStatus();
- }
- invariantOK(result.getStatus());
-
- WiredTigerConfigParser topParser(result.getValue());
- WT_CONFIG_ITEM metadata;
- if (topParser.get("app_metadata", &metadata) != 0)
- return Status(ErrorCodes::UnsupportedFormat, str::stream()
- << "application metadata for " << uri
- << " is missing ");
-
- WiredTigerConfigParser parser(metadata);
-
- int64_t version = 0;
- WT_CONFIG_ITEM versionItem;
- if (parser.get("formatVersion", &versionItem) != 0) {
- // If 'formatVersion' is missing, this metadata was introduced by
- // one of the RC versions (where the format version is 1).
- version = 1;
- }
- else if (versionItem.type == WT_CONFIG_ITEM::WT_CONFIG_ITEM_NUM) {
- version = versionItem.val;
- }
- else {
- return Status(ErrorCodes::UnsupportedFormat, str::stream()
- << "'formatVersion' in application metadata for " << uri
- << " must be a number. Current value: "
- << StringData(versionItem.str, versionItem.len));
- }
+ return Status::OK();
+}
- if (version < minimumVersion || version > maximumVersion) {
- return Status(ErrorCodes::UnsupportedFormat, str::stream()
- << "Application metadata for " << uri
- << " has unsupported format version " << version);
- }
+StatusWith<BSONObj> WiredTigerUtil::getApplicationMetadata(OperationContext* opCtx,
+ StringData uri) {
+ BSONObjBuilder bob;
+ Status status = getApplicationMetadata(opCtx, uri, &bob);
+ if (!status.isOK()) {
+ return StatusWith<BSONObj>(status);
+ }
+ return StatusWith<BSONObj>(bob.obj());
+}
- LOG(2) << "WiredTigerUtil::checkApplicationMetadataFormatVersion "
- << " uri: " << uri
- << " ok range " << minimumVersion << " -> " << maximumVersion
- << " current: " << version;
+Status WiredTigerUtil::checkApplicationMetadataFormatVersion(OperationContext* opCtx,
+ StringData uri,
+ int64_t minimumVersion,
+ int64_t maximumVersion) {
+ StatusWith<std::string> result = getMetadata(opCtx, uri);
+ if (result.getStatus().code() == ErrorCodes::NoSuchKey) {
+ return result.getStatus();
+ }
+ invariantOK(result.getStatus());
+
+ WiredTigerConfigParser topParser(result.getValue());
+ WT_CONFIG_ITEM metadata;
+ if (topParser.get("app_metadata", &metadata) != 0)
+ return Status(ErrorCodes::UnsupportedFormat,
+ str::stream() << "application metadata for " << uri << " is missing ");
+
+ WiredTigerConfigParser parser(metadata);
+
+ int64_t version = 0;
+ WT_CONFIG_ITEM versionItem;
+ if (parser.get("formatVersion", &versionItem) != 0) {
+ // If 'formatVersion' is missing, this metadata was introduced by
+ // one of the RC versions (where the format version is 1).
+ version = 1;
+ } else if (versionItem.type == WT_CONFIG_ITEM::WT_CONFIG_ITEM_NUM) {
+ version = versionItem.val;
+ } else {
+ return Status(ErrorCodes::UnsupportedFormat,
+ str::stream() << "'formatVersion' in application metadata for " << uri
+ << " must be a number. Current value: "
+ << StringData(versionItem.str, versionItem.len));
+ }
- return Status::OK();
+ if (version < minimumVersion || version > maximumVersion) {
+ return Status(ErrorCodes::UnsupportedFormat,
+ str::stream() << "Application metadata for " << uri
+ << " has unsupported format version " << version);
}
- // static
- StatusWith<uint64_t> WiredTigerUtil::getStatisticsValue(WT_SESSION* session,
- const std::string& uri,
- const std::string& config,
- int statisticsKey) {
- invariant(session);
- WT_CURSOR* cursor = NULL;
- const char* cursorConfig = config.empty() ? NULL : config.c_str();
- int ret = session->open_cursor(session, uri.c_str(), NULL, cursorConfig, &cursor);
- if (ret != 0) {
- return StatusWith<uint64_t>(ErrorCodes::CursorNotFound, str::stream()
- << "unable to open cursor at URI " << uri
- << ". reason: " << wiredtiger_strerror(ret));
- }
- invariant(cursor);
- ON_BLOCK_EXIT(cursor->close, cursor);
-
- cursor->set_key(cursor, statisticsKey);
- ret = cursor->search(cursor);
- if (ret != 0) {
- return StatusWith<uint64_t>(ErrorCodes::NoSuchKey, str::stream()
- << "unable to find key " << statisticsKey << " at URI " << uri
- << ". reason: " << wiredtiger_strerror(ret));
- }
+ LOG(2) << "WiredTigerUtil::checkApplicationMetadataFormatVersion "
+ << " uri: " << uri << " ok range " << minimumVersion << " -> " << maximumVersion
+ << " current: " << version;
- uint64_t value;
- ret = cursor->get_value(cursor, NULL, NULL, &value);
- if (ret != 0) {
- return StatusWith<uint64_t>(ErrorCodes::BadValue, str::stream()
- << "unable to get value for key " << statisticsKey << " at URI " << uri
- << ". reason: " << wiredtiger_strerror(ret));
- }
+ return Status::OK();
+}
- return StatusWith<uint64_t>(value);
+// static
+StatusWith<uint64_t> WiredTigerUtil::getStatisticsValue(WT_SESSION* session,
+ const std::string& uri,
+ const std::string& config,
+ int statisticsKey) {
+ invariant(session);
+ WT_CURSOR* cursor = NULL;
+ const char* cursorConfig = config.empty() ? NULL : config.c_str();
+ int ret = session->open_cursor(session, uri.c_str(), NULL, cursorConfig, &cursor);
+ if (ret != 0) {
+ return StatusWith<uint64_t>(ErrorCodes::CursorNotFound,
+ str::stream() << "unable to open cursor at URI " << uri
+ << ". reason: " << wiredtiger_strerror(ret));
+ }
+ invariant(cursor);
+ ON_BLOCK_EXIT(cursor->close, cursor);
+
+ cursor->set_key(cursor, statisticsKey);
+ ret = cursor->search(cursor);
+ if (ret != 0) {
+ return StatusWith<uint64_t>(ErrorCodes::NoSuchKey,
+ str::stream() << "unable to find key " << statisticsKey
+ << " at URI " << uri
+ << ". reason: " << wiredtiger_strerror(ret));
}
- int64_t WiredTigerUtil::getIdentSize(WT_SESSION* s,
- const std::string& uri ) {
- StatusWith<int64_t> result = WiredTigerUtil::getStatisticsValueAs<int64_t>(
- s,
- "statistics:" + uri, "statistics=(size)", WT_STAT_DSRC_BLOCK_SIZE);
- const Status& status = result.getStatus();
- if ( !status.isOK() ) {
- if ( status.code() == ErrorCodes::CursorNotFound ) {
- // ident gone, so its 0
- return 0;
- }
- uassertStatusOK( status );
- }
- return result.getValue();
+ uint64_t value;
+ ret = cursor->get_value(cursor, NULL, NULL, &value);
+ if (ret != 0) {
+ return StatusWith<uint64_t>(ErrorCodes::BadValue,
+ str::stream() << "unable to get value for key " << statisticsKey
+ << " at URI " << uri
+ << ". reason: " << wiredtiger_strerror(ret));
}
-namespace {
- int mdb_handle_error(WT_EVENT_HANDLER *handler, WT_SESSION *session,
- int errorCode, const char *message) {
- try {
- error() << "WiredTiger (" << errorCode << ") " << message;
- fassert( 28558, errorCode != WT_PANIC );
- }
- catch (...) {
- std::terminate();
+ return StatusWith<uint64_t>(value);
+}
+
+int64_t WiredTigerUtil::getIdentSize(WT_SESSION* s, const std::string& uri) {
+ StatusWith<int64_t> result = WiredTigerUtil::getStatisticsValueAs<int64_t>(
+ s, "statistics:" + uri, "statistics=(size)", WT_STAT_DSRC_BLOCK_SIZE);
+ const Status& status = result.getStatus();
+ if (!status.isOK()) {
+ if (status.code() == ErrorCodes::CursorNotFound) {
+ // ident gone, so its 0
+ return 0;
}
- return 0;
+ uassertStatusOK(status);
}
+ return result.getValue();
+}
- int mdb_handle_message( WT_EVENT_HANDLER *handler, WT_SESSION *session,
- const char *message) {
- try {
- log() << "WiredTiger " << message;
- }
- catch (...) {
- std::terminate();
- }
- return 0;
+namespace {
+int mdb_handle_error(WT_EVENT_HANDLER* handler,
+ WT_SESSION* session,
+ int errorCode,
+ const char* message) {
+ try {
+ error() << "WiredTiger (" << errorCode << ") " << message;
+ fassert(28558, errorCode != WT_PANIC);
+ } catch (...) {
+ std::terminate();
}
+ return 0;
+}
- int mdb_handle_progress( WT_EVENT_HANDLER *handler, WT_SESSION *session,
- const char *operation, uint64_t progress) {
- try {
- log() << "WiredTiger progress " << operation << " " << progress;
- }
- catch (...) {
- std::terminate();
- }
+int mdb_handle_message(WT_EVENT_HANDLER* handler, WT_SESSION* session, const char* message) {
+ try {
+ log() << "WiredTiger " << message;
+ } catch (...) {
+ std::terminate();
+ }
+ return 0;
+}
- return 0;
+int mdb_handle_progress(WT_EVENT_HANDLER* handler,
+ WT_SESSION* session,
+ const char* operation,
+ uint64_t progress) {
+ try {
+ log() << "WiredTiger progress " << operation << " " << progress;
+ } catch (...) {
+ std::terminate();
}
+ return 0;
+}
}
- WT_EVENT_HANDLER WiredTigerUtil::defaultEventHandlers() {
- WT_EVENT_HANDLER handlers = {};
- handlers.handle_error = mdb_handle_error;
- handlers.handle_message = mdb_handle_message;
- handlers.handle_progress = mdb_handle_progress;
- return handlers;
- }
+WT_EVENT_HANDLER WiredTigerUtil::defaultEventHandlers() {
+ WT_EVENT_HANDLER handlers = {};
+ handlers.handle_error = mdb_handle_error;
+ handlers.handle_message = mdb_handle_message;
+ handlers.handle_progress = mdb_handle_progress;
+ return handlers;
+}
- int WiredTigerUtil::verifyTable(OperationContext* txn, const std::string& uri,
- std::vector<std::string>* errors) {
-
- class MyEventHandlers : public WT_EVENT_HANDLER {
- public:
- MyEventHandlers(std::vector<std::string>* errors)
- : WT_EVENT_HANDLER(defaultEventHandlers())
- , _errors(errors)
- , _defaultErrorHandler(handle_error)
- {
- handle_error = onError;
- }
+int WiredTigerUtil::verifyTable(OperationContext* txn,
+ const std::string& uri,
+ std::vector<std::string>* errors) {
+ class MyEventHandlers : public WT_EVENT_HANDLER {
+ public:
+ MyEventHandlers(std::vector<std::string>* errors)
+ : WT_EVENT_HANDLER(defaultEventHandlers()),
+ _errors(errors),
+ _defaultErrorHandler(handle_error) {
+ handle_error = onError;
+ }
- private:
- static int onError(WT_EVENT_HANDLER* handler, WT_SESSION* session, int error,
- const char* message) {
- try {
- MyEventHandlers* self = static_cast<MyEventHandlers*>(handler);
- self->_errors->push_back(message);
- return self->_defaultErrorHandler(handler, session, error, message);
- }
- catch (...) {
- std::terminate();
- }
+ private:
+ static int onError(WT_EVENT_HANDLER* handler,
+ WT_SESSION* session,
+ int error,
+ const char* message) {
+ try {
+ MyEventHandlers* self = static_cast<MyEventHandlers*>(handler);
+ self->_errors->push_back(message);
+ return self->_defaultErrorHandler(handler, session, error, message);
+ } catch (...) {
+ std::terminate();
}
+ }
- typedef int(*ErrorHandler)(WT_EVENT_HANDLER*, WT_SESSION*, int, const char*);
+ typedef int (*ErrorHandler)(WT_EVENT_HANDLER*, WT_SESSION*, int, const char*);
- std::vector<std::string>* const _errors;
- const ErrorHandler _defaultErrorHandler;
- } eventHandler(errors);
+ std::vector<std::string>* const _errors;
+ const ErrorHandler _defaultErrorHandler;
+ } eventHandler(errors);
- // Try to close as much as possible to avoid EBUSY errors.
- WiredTigerRecoveryUnit::get(txn)->getSession(txn)->closeAllCursors();
- WiredTigerSessionCache* sessionCache = WiredTigerRecoveryUnit::get(txn)->getSessionCache();
- sessionCache->closeAll();
+ // Try to close as much as possible to avoid EBUSY errors.
+ WiredTigerRecoveryUnit::get(txn)->getSession(txn)->closeAllCursors();
+ WiredTigerSessionCache* sessionCache = WiredTigerRecoveryUnit::get(txn)->getSessionCache();
+ sessionCache->closeAll();
- // Open a new session with custom error handlers.
- WT_CONNECTION* conn = WiredTigerRecoveryUnit::get(txn)->getSessionCache()->conn();
- WT_SESSION* session;
- invariantWTOK(conn->open_session(conn, errors ? &eventHandler : NULL, NULL, &session));
- ON_BLOCK_EXIT(session->close, session, "");
+ // Open a new session with custom error handlers.
+ WT_CONNECTION* conn = WiredTigerRecoveryUnit::get(txn)->getSessionCache()->conn();
+ WT_SESSION* session;
+ invariantWTOK(conn->open_session(conn, errors ? &eventHandler : NULL, NULL, &session));
+ ON_BLOCK_EXIT(session->close, session, "");
- // Do the verify. Weird parens prevent treating "verify" as a macro.
- return (session->verify)(session, uri.c_str(), NULL);
- }
+ // Do the verify. Weird parens prevent treating "verify" as a macro.
+ return (session->verify)(session, uri.c_str(), NULL);
+}
- Status WiredTigerUtil::exportTableToBSON(WT_SESSION* session,
- const std::string& uri, const std::string& config,
- BSONObjBuilder* bob) {
- invariant(session);
- invariant(bob);
- WT_CURSOR* c = NULL;
- const char* cursorConfig = config.empty() ? NULL : config.c_str();
- int ret = session->open_cursor(session, uri.c_str(), NULL, cursorConfig, &c);
- if (ret != 0) {
- return Status(ErrorCodes::CursorNotFound, str::stream()
- << "unable to open cursor at URI " << uri
- << ". reason: " << wiredtiger_strerror(ret));
+Status WiredTigerUtil::exportTableToBSON(WT_SESSION* session,
+ const std::string& uri,
+ const std::string& config,
+ BSONObjBuilder* bob) {
+ invariant(session);
+ invariant(bob);
+ WT_CURSOR* c = NULL;
+ const char* cursorConfig = config.empty() ? NULL : config.c_str();
+ int ret = session->open_cursor(session, uri.c_str(), NULL, cursorConfig, &c);
+ if (ret != 0) {
+ return Status(ErrorCodes::CursorNotFound,
+ str::stream() << "unable to open cursor at URI " << uri
+ << ". reason: " << wiredtiger_strerror(ret));
+ }
+ bob->append("uri", uri);
+ invariant(c);
+ ON_BLOCK_EXIT(c->close, c);
+
+ std::map<string, BSONObjBuilder*> subs;
+ const char* desc;
+ uint64_t value;
+ while (c->next(c) == 0 && c->get_value(c, &desc, NULL, &value) == 0) {
+ StringData key(desc);
+
+ StringData prefix;
+ StringData suffix;
+
+ size_t idx = key.find(':');
+ if (idx != string::npos) {
+ prefix = key.substr(0, idx);
+ suffix = key.substr(idx + 1);
+ } else {
+ idx = key.find(' ');
}
- bob->append("uri", uri);
- invariant(c);
- ON_BLOCK_EXIT(c->close, c);
-
- std::map<string,BSONObjBuilder*> subs;
- const char* desc;
- uint64_t value;
- while (c->next(c) == 0 && c->get_value(c, &desc, NULL, &value) == 0) {
- StringData key( desc );
-
- StringData prefix;
- StringData suffix;
-
- size_t idx = key.find( ':' );
- if ( idx != string::npos ) {
- prefix = key.substr( 0, idx );
- suffix = key.substr( idx + 1 );
- }
- else {
- idx = key.find( ' ' );
- }
- if ( idx != string::npos ) {
- prefix = key.substr( 0, idx );
- suffix = key.substr( idx + 1 );
- }
- else {
- prefix = key;
- suffix = "num";
- }
+ if (idx != string::npos) {
+ prefix = key.substr(0, idx);
+ suffix = key.substr(idx + 1);
+ } else {
+ prefix = key;
+ suffix = "num";
+ }
- long long v = _castStatisticsValue<long long>(value);
+ long long v = _castStatisticsValue<long long>(value);
- if ( prefix.size() == 0 ) {
- bob->appendNumber(desc, v);
- }
- else {
- BSONObjBuilder*& sub = subs[prefix.toString()];
- if ( !sub )
- sub = new BSONObjBuilder();
- sub->appendNumber(mongoutils::str::ltrim(suffix.toString()), v);
- }
+ if (prefix.size() == 0) {
+ bob->appendNumber(desc, v);
+ } else {
+ BSONObjBuilder*& sub = subs[prefix.toString()];
+ if (!sub)
+ sub = new BSONObjBuilder();
+ sub->appendNumber(mongoutils::str::ltrim(suffix.toString()), v);
}
+ }
- for ( std::map<string,BSONObjBuilder*>::const_iterator it = subs.begin();
- it != subs.end(); ++it ) {
- const std::string& s = it->first;
- bob->append( s, it->second->obj() );
- delete it->second;
- }
- return Status::OK();
+ for (std::map<string, BSONObjBuilder*>::const_iterator it = subs.begin(); it != subs.end();
+ ++it) {
+ const std::string& s = it->first;
+ bob->append(s, it->second->obj());
+ delete it->second;
}
+ return Status::OK();
+}
} // namespace mongo
diff --git a/src/mongo/db/storage/wiredtiger/wiredtiger_util.h b/src/mongo/db/storage/wiredtiger/wiredtiger_util.h
index 72ad5e88c15..1d69a382eec 100644
--- a/src/mongo/db/storage/wiredtiger/wiredtiger_util.h
+++ b/src/mongo/db/storage/wiredtiger/wiredtiger_util.h
@@ -41,233 +41,239 @@
namespace mongo {
- class BSONObjBuilder;
- class OperationContext;
- class WiredTigerConfigParser;
+class BSONObjBuilder;
+class OperationContext;
+class WiredTigerConfigParser;
- inline bool wt_keeptxnopen() {
- return false;
+inline bool wt_keeptxnopen() {
+ return false;
+}
+
+Status wtRCToStatus_slow(int retCode, const char* prefix);
+
+/**
+ * converts wiredtiger return codes to mongodb statuses.
+ */
+inline Status wtRCToStatus(int retCode, const char* prefix = NULL) {
+ if (MONGO_likely(retCode == 0))
+ return Status::OK();
+
+ return wtRCToStatus_slow(retCode, prefix);
+}
+
+#define invariantWTOK(expression) \
+ do { \
+ int _invariantWTOK_retCode = expression; \
+ if (MONGO_unlikely(_invariantWTOK_retCode != 0)) { \
+ invariantOKFailed( \
+ #expression, wtRCToStatus(_invariantWTOK_retCode), __FILE__, __LINE__); \
+ } \
+ } while (false)
+
+struct WiredTigerItem : public WT_ITEM {
+ WiredTigerItem(const void* d, size_t s) {
+ data = d;
+ size = s;
+ }
+ WiredTigerItem(const std::string& str) {
+ data = str.c_str();
+ size = str.size();
}
+ // NOTE: do not call Get() on a temporary.
+ // The pointer returned by Get() must not be allowed to live longer than *this.
+ WT_ITEM* Get() {
+ return this;
+ }
+ const WT_ITEM* Get() const {
+ return this;
+ }
+};
+
+class WiredTigerUtil {
+ MONGO_DISALLOW_COPYING(WiredTigerUtil);
- Status wtRCToStatus_slow(int retCode, const char* prefix );
+private:
+ WiredTigerUtil();
+public:
/**
- * converts wiredtiger return codes to mongodb statuses.
+ * Fetch the type and source fields out of the colgroup metadata. 'tableUri' must be a
+ * valid table: uri.
*/
- inline Status wtRCToStatus(int retCode, const char* prefix = NULL ) {
- if (MONGO_likely(retCode == 0))
- return Status::OK();
+ static void fetchTypeAndSourceURI(OperationContext* opCtx,
+ const std::string& tableUri,
+ std::string* type,
+ std::string* source);
- return wtRCToStatus_slow(retCode, prefix);
- }
+ /**
+ * Reads contents of table using URI and exports all keys to BSON as string elements.
+ * Additional, adds 'uri' field to output document.
+ */
+ static Status exportTableToBSON(WT_SESSION* s,
+ const std::string& uri,
+ const std::string& config,
+ BSONObjBuilder* bob);
-#define invariantWTOK(expression) do { \
- int _invariantWTOK_retCode = expression; \
- if (MONGO_unlikely(_invariantWTOK_retCode != 0)) { \
- invariantOKFailed(#expression, wtRCToStatus(_invariantWTOK_retCode), \
- __FILE__, __LINE__); \
- } \
- } while (false)
+ /**
+ * Gets entire metadata string for collection/index at URI.
+ */
+ static StatusWith<std::string> getMetadata(OperationContext* opCtx, StringData uri);
+
+ /**
+ * Reads app_metadata for collection/index at URI as a BSON document.
+ */
+ static Status getApplicationMetadata(OperationContext* opCtx,
+ StringData uri,
+ BSONObjBuilder* bob);
+
+ static StatusWith<BSONObj> getApplicationMetadata(OperationContext* opCtx, StringData uri);
+
+ /**
+ * Validates formatVersion in application metadata for 'uri'.
+ * Version must be numeric and be in the range [minimumVersion, maximumVersion].
+ * URI is used in error messages only.
+ */
+ static Status checkApplicationMetadataFormatVersion(OperationContext* opCtx,
+ StringData uri,
+ int64_t minimumVersion,
+ int64_t maximumVersion);
+ /**
+ * Reads individual statistics using URI.
+ * List of statistics keys WT_STAT_* can be found in wiredtiger.h.
+ */
+ static StatusWith<uint64_t> getStatisticsValue(WT_SESSION* session,
+ const std::string& uri,
+ const std::string& config,
+ int statisticsKey);
- struct WiredTigerItem : public WT_ITEM {
- WiredTigerItem(const void *d, size_t s) {
- data = d;
- size = s;
- }
- WiredTigerItem(const std::string &str) {
- data = str.c_str();
- size = str.size();
- }
- // NOTE: do not call Get() on a temporary.
- // The pointer returned by Get() must not be allowed to live longer than *this.
- WT_ITEM *Get() { return this; }
- const WT_ITEM *Get() const { return this; }
- };
-
- class WiredTigerUtil {
- MONGO_DISALLOW_COPYING(WiredTigerUtil);
- private:
- WiredTigerUtil();
-
- public:
-
- /**
- * Fetch the type and source fields out of the colgroup metadata. 'tableUri' must be a
- * valid table: uri.
- */
- static void fetchTypeAndSourceURI(OperationContext* opCtx,
- const std::string& tableUri,
- std::string* type,
- std::string* source);
-
- /**
- * Reads contents of table using URI and exports all keys to BSON as string elements.
- * Additional, adds 'uri' field to output document.
- */
- static Status exportTableToBSON(WT_SESSION* s,
- const std::string& uri,
- const std::string& config,
- BSONObjBuilder* bob);
-
- /**
- * Gets entire metadata string for collection/index at URI.
- */
- static StatusWith<std::string> getMetadata(OperationContext* opCtx,
- StringData uri);
-
- /**
- * Reads app_metadata for collection/index at URI as a BSON document.
- */
- static Status getApplicationMetadata(OperationContext* opCtx,
- StringData uri,
- BSONObjBuilder* bob);
-
- static StatusWith<BSONObj> getApplicationMetadata(OperationContext* opCtx,
- StringData uri);
-
- /**
- * Validates formatVersion in application metadata for 'uri'.
- * Version must be numeric and be in the range [minimumVersion, maximumVersion].
- * URI is used in error messages only.
- */
- static Status checkApplicationMetadataFormatVersion(OperationContext* opCtx,
- StringData uri,
- int64_t minimumVersion,
- int64_t maximumVersion);
- /**
- * Reads individual statistics using URI.
- * List of statistics keys WT_STAT_* can be found in wiredtiger.h.
- */
- static StatusWith<uint64_t> getStatisticsValue(WT_SESSION* session,
+ /**
+ * Reads individual statistics using URI and casts to type ResultType.
+ * Caps statistics value at max(ResultType) in case of overflow.
+ */
+ template <typename ResultType>
+ static StatusWith<ResultType> getStatisticsValueAs(WT_SESSION* session,
const std::string& uri,
const std::string& config,
int statisticsKey);
- /**
- * Reads individual statistics using URI and casts to type ResultType.
- * Caps statistics value at max(ResultType) in case of overflow.
- */
- template<typename ResultType>
- static StatusWith<ResultType> getStatisticsValueAs(WT_SESSION* session,
- const std::string& uri,
- const std::string& config,
- int statisticsKey);
-
- /**
- * Reads individual statistics using URI and casts to type ResultType.
- * Caps statistics value at 'maximumResultType'.
- */
- template<typename ResultType>
- static StatusWith<ResultType> getStatisticsValueAs(WT_SESSION* session,
- const std::string& uri,
- const std::string& config,
- int statisticsKey,
- ResultType maximumResultType);
-
- static int64_t getIdentSize(WT_SESSION* s, const std::string& uri );
-
- /**
- * Returns a WT_EVENT_HANDER with MongoDB's default handlers.
- * The default handlers just log so it is recommended that you consider calling them even if
- * you are capturing the output.
- *
- * There is no default "close" handler. You only need to provide one if you need to call a
- * destructor.
- */
- static WT_EVENT_HANDLER defaultEventHandlers();
-
- /**
- * Calls WT_SESSION::validate() on a side-session to ensure that your current transaction
- * isn't left in an invalid state.
- *
- * If errors is non-NULL, all error messages will be appended to the array.
- */
- static int verifyTable(OperationContext* txn, const std::string& uri,
- std::vector<std::string>* errors = NULL);
-
- private:
- /**
- * Casts unsigned 64-bit statistics value to T.
- * If original value exceeds maximum value of T, return max(T).
- */
- template<typename T>
- static T _castStatisticsValue(uint64_t statisticsValue);
-
- /**
- * Casts unsigned 64-bit statistics value to T.
- * If original value exceeds 'maximumResultType', return 'maximumResultType'.
- */
- template<typename T>
- static T _castStatisticsValue(uint64_t statisticsValue, T maximumResultType);
- };
-
- class WiredTigerConfigParser {
- MONGO_DISALLOW_COPYING(WiredTigerConfigParser);
- public:
- WiredTigerConfigParser(StringData config) {
- invariantWTOK(wiredtiger_config_parser_open(NULL, config.rawData(), config.size(),
- &_parser));
- }
-
- WiredTigerConfigParser(const WT_CONFIG_ITEM& nested) {
- invariant(nested.type == WT_CONFIG_ITEM::WT_CONFIG_ITEM_STRUCT);
- invariantWTOK(wiredtiger_config_parser_open(NULL, nested.str, nested.len, &_parser));
- }
-
- ~WiredTigerConfigParser() {
- invariantWTOK(_parser->close(_parser));
- }
-
- int next(WT_CONFIG_ITEM* key, WT_CONFIG_ITEM* value) {
- return _parser->next(_parser, key, value);
- }
-
- int get(const char* key, WT_CONFIG_ITEM* value) {
- return _parser->get(_parser, key, value);
- }
-
- private:
- WT_CONFIG_PARSER* _parser;
- };
-
- // static
- template<typename ResultType>
- StatusWith<ResultType> WiredTigerUtil::getStatisticsValueAs(WT_SESSION* session,
- const std::string& uri,
- const std::string& config,
- int statisticsKey) {
- return getStatisticsValueAs<ResultType>(session, uri, config, statisticsKey,
- std::numeric_limits<ResultType>::max());
+ /**
+ * Reads individual statistics using URI and casts to type ResultType.
+ * Caps statistics value at 'maximumResultType'.
+ */
+ template <typename ResultType>
+ static StatusWith<ResultType> getStatisticsValueAs(WT_SESSION* session,
+ const std::string& uri,
+ const std::string& config,
+ int statisticsKey,
+ ResultType maximumResultType);
+
+ static int64_t getIdentSize(WT_SESSION* s, const std::string& uri);
+
+ /**
+ * Returns a WT_EVENT_HANDER with MongoDB's default handlers.
+ * The default handlers just log so it is recommended that you consider calling them even if
+ * you are capturing the output.
+ *
+ * There is no default "close" handler. You only need to provide one if you need to call a
+ * destructor.
+ */
+ static WT_EVENT_HANDLER defaultEventHandlers();
+
+ /**
+ * Calls WT_SESSION::validate() on a side-session to ensure that your current transaction
+ * isn't left in an invalid state.
+ *
+ * If errors is non-NULL, all error messages will be appended to the array.
+ */
+ static int verifyTable(OperationContext* txn,
+ const std::string& uri,
+ std::vector<std::string>* errors = NULL);
+
+private:
+ /**
+ * Casts unsigned 64-bit statistics value to T.
+ * If original value exceeds maximum value of T, return max(T).
+ */
+ template <typename T>
+ static T _castStatisticsValue(uint64_t statisticsValue);
+
+ /**
+ * Casts unsigned 64-bit statistics value to T.
+ * If original value exceeds 'maximumResultType', return 'maximumResultType'.
+ */
+ template <typename T>
+ static T _castStatisticsValue(uint64_t statisticsValue, T maximumResultType);
+};
+
+class WiredTigerConfigParser {
+ MONGO_DISALLOW_COPYING(WiredTigerConfigParser);
+
+public:
+ WiredTigerConfigParser(StringData config) {
+ invariantWTOK(
+ wiredtiger_config_parser_open(NULL, config.rawData(), config.size(), &_parser));
+ }
+
+ WiredTigerConfigParser(const WT_CONFIG_ITEM& nested) {
+ invariant(nested.type == WT_CONFIG_ITEM::WT_CONFIG_ITEM_STRUCT);
+ invariantWTOK(wiredtiger_config_parser_open(NULL, nested.str, nested.len, &_parser));
+ }
+
+ ~WiredTigerConfigParser() {
+ invariantWTOK(_parser->close(_parser));
}
- // static
- template<typename ResultType>
- StatusWith<ResultType> WiredTigerUtil::getStatisticsValueAs(WT_SESSION* session,
- const std::string& uri,
- const std::string& config,
- int statisticsKey,
- ResultType maximumResultType) {
- StatusWith<uint64_t> result = getStatisticsValue(session, uri, config, statisticsKey);
- if (!result.isOK()) {
- return StatusWith<ResultType>(result.getStatus());
- }
- return StatusWith<ResultType>(_castStatisticsValue<ResultType>(result.getValue(),
- maximumResultType));
+ int next(WT_CONFIG_ITEM* key, WT_CONFIG_ITEM* value) {
+ return _parser->next(_parser, key, value);
}
- // static
- template<typename ResultType>
- ResultType WiredTigerUtil::_castStatisticsValue(uint64_t statisticsValue) {
- return _castStatisticsValue<ResultType>(statisticsValue,
- std::numeric_limits<ResultType>::max());
+ int get(const char* key, WT_CONFIG_ITEM* value) {
+ return _parser->get(_parser, key, value);
}
- // static
- template<typename ResultType>
- ResultType WiredTigerUtil::_castStatisticsValue(uint64_t statisticsValue,
- ResultType maximumResultType) {
- return statisticsValue > static_cast<uint64_t>(maximumResultType) ?
- maximumResultType : static_cast<ResultType>(statisticsValue);
+private:
+ WT_CONFIG_PARSER* _parser;
+};
+
+// static
+template <typename ResultType>
+StatusWith<ResultType> WiredTigerUtil::getStatisticsValueAs(WT_SESSION* session,
+ const std::string& uri,
+ const std::string& config,
+ int statisticsKey) {
+ return getStatisticsValueAs<ResultType>(
+ session, uri, config, statisticsKey, std::numeric_limits<ResultType>::max());
+}
+
+// static
+template <typename ResultType>
+StatusWith<ResultType> WiredTigerUtil::getStatisticsValueAs(WT_SESSION* session,
+ const std::string& uri,
+ const std::string& config,
+ int statisticsKey,
+ ResultType maximumResultType) {
+ StatusWith<uint64_t> result = getStatisticsValue(session, uri, config, statisticsKey);
+ if (!result.isOK()) {
+ return StatusWith<ResultType>(result.getStatus());
}
+ return StatusWith<ResultType>(
+ _castStatisticsValue<ResultType>(result.getValue(), maximumResultType));
+}
+
+// static
+template <typename ResultType>
+ResultType WiredTigerUtil::_castStatisticsValue(uint64_t statisticsValue) {
+ return _castStatisticsValue<ResultType>(statisticsValue,
+ std::numeric_limits<ResultType>::max());
+}
+
+// static
+template <typename ResultType>
+ResultType WiredTigerUtil::_castStatisticsValue(uint64_t statisticsValue,
+ ResultType maximumResultType) {
+ return statisticsValue > static_cast<uint64_t>(maximumResultType)
+ ? maximumResultType
+ : static_cast<ResultType>(statisticsValue);
+}
} // namespace mongo
diff --git a/src/mongo/db/storage/wiredtiger/wiredtiger_util_test.cpp b/src/mongo/db/storage/wiredtiger/wiredtiger_util_test.cpp
index d699c3903ba..b9df4acc22e 100644
--- a/src/mongo/db/storage/wiredtiger/wiredtiger_util_test.cpp
+++ b/src/mongo/db/storage/wiredtiger/wiredtiger_util_test.cpp
@@ -43,292 +43,297 @@
namespace mongo {
- using std::string;
- using std::stringstream;
-
- class WiredTigerConnection {
- public:
- WiredTigerConnection(StringData dbpath, StringData extraStrings) : _conn(NULL) {
- std::stringstream ss;
- ss << "create,";
- ss << extraStrings;
- string config = ss.str();
- int ret = wiredtiger_open(dbpath.toString().c_str(), NULL, config.c_str(), &_conn);
- ASSERT_OK(wtRCToStatus(ret));
- ASSERT(_conn);
- }
- ~WiredTigerConnection() {
- _conn->close(_conn, NULL);
- }
- WT_CONNECTION* getConnection() const { return _conn; }
- private:
- WT_CONNECTION* _conn;
- };
-
- class WiredTigerUtilHarnessHelper {
- public:
- WiredTigerUtilHarnessHelper(StringData extraStrings)
- : _dbpath("wt_test"),
- _connection(_dbpath.path(), extraStrings),
- _sessionCache(_connection.getConnection()) { }
-
-
- WiredTigerSessionCache* getSessionCache() {
- return &_sessionCache;
- }
-
- OperationContext* newOperationContext() {
- return new OperationContextNoop(new WiredTigerRecoveryUnit(getSessionCache()));
- }
-
- private:
- unittest::TempDir _dbpath;
- WiredTigerConnection _connection;
- WiredTigerSessionCache _sessionCache;
- };
-
- class WiredTigerUtilMetadataTest : public mongo::unittest::Test {
- public:
- virtual void setUp() {
- _harnessHelper.reset(new WiredTigerUtilHarnessHelper(""));
- _opCtx.reset(_harnessHelper->newOperationContext());
- }
-
- virtual void tearDown() {
- _opCtx.reset(NULL);
- _harnessHelper.reset(NULL);
- }
-
- protected:
- const char* getURI() const {
- return "table:mytable";
- }
-
- OperationContext* getOperationContext() const {
- ASSERT(_opCtx.get());
- return _opCtx.get();
- }
-
- void createSession(const char* config) {
- WT_SESSION* wtSession =
- WiredTigerRecoveryUnit::get(_opCtx.get())->getSession(_opCtx.get())->getSession();
- ASSERT_OK(wtRCToStatus(wtSession->create(wtSession, getURI(), config)));
- }
- private:
- std::unique_ptr<WiredTigerUtilHarnessHelper> _harnessHelper;
- std::unique_ptr<OperationContext> _opCtx;
- };
-
- TEST_F(WiredTigerUtilMetadataTest, GetConfigurationStringInvalidURI) {
- StatusWith<std::string> result =
- WiredTigerUtil::getMetadata(getOperationContext(), getURI());
- ASSERT_NOT_OK(result.getStatus());
- ASSERT_EQUALS(ErrorCodes::NoSuchKey, result.getStatus().code());
+using std::string;
+using std::stringstream;
+
+class WiredTigerConnection {
+public:
+ WiredTigerConnection(StringData dbpath, StringData extraStrings) : _conn(NULL) {
+ std::stringstream ss;
+ ss << "create,";
+ ss << extraStrings;
+ string config = ss.str();
+ int ret = wiredtiger_open(dbpath.toString().c_str(), NULL, config.c_str(), &_conn);
+ ASSERT_OK(wtRCToStatus(ret));
+ ASSERT(_conn);
}
-
- TEST_F(WiredTigerUtilMetadataTest, GetConfigurationStringNull) {
- const char* config = NULL;
- createSession(config);
- StatusWith<std::string> result =
- WiredTigerUtil::getMetadata(getOperationContext(), getURI());
- ASSERT_OK(result.getStatus());
- ASSERT_FALSE(result.getValue().empty());
- }
-
- TEST_F(WiredTigerUtilMetadataTest, GetConfigurationStringSimple) {
- const char* config = "app_metadata=(abc=123)";
- createSession(config);
- StatusWith<std::string> result =
- WiredTigerUtil::getMetadata(getOperationContext(), getURI());
- ASSERT_OK(result.getStatus());
- ASSERT_STRING_CONTAINS(result.getValue(), config);
- }
-
- TEST_F(WiredTigerUtilMetadataTest, GetApplicationMetadataInvalidURI) {
- StatusWith<BSONObj> result =
- WiredTigerUtil::getApplicationMetadata(getOperationContext(), getURI());
- ASSERT_NOT_OK(result.getStatus());
- ASSERT_EQUALS(ErrorCodes::NoSuchKey, result.getStatus().code());
+ ~WiredTigerConnection() {
+ _conn->close(_conn, NULL);
}
-
- TEST_F(WiredTigerUtilMetadataTest, GetApplicationMetadataNull) {
- const char* config = NULL;
- createSession(config);
- StatusWith<BSONObj> result =
- WiredTigerUtil::getApplicationMetadata(getOperationContext(), getURI());
- ASSERT_OK(result.getStatus());
- ASSERT_TRUE(result.getValue().isEmpty());
+ WT_CONNECTION* getConnection() const {
+ return _conn;
}
- TEST_F(WiredTigerUtilMetadataTest, GetApplicationMetadataString) {
- const char* config = "app_metadata=\"abc\"";
- createSession(config);
- StatusWith<BSONObj> result =
- WiredTigerUtil::getApplicationMetadata(getOperationContext(), getURI());
- ASSERT_NOT_OK(result.getStatus());
- ASSERT_EQUALS(ErrorCodes::FailedToParse, result.getStatus().code());
- }
+private:
+ WT_CONNECTION* _conn;
+};
- TEST_F(WiredTigerUtilMetadataTest, GetApplicationMetadataDuplicateKeys) {
- const char* config = "app_metadata=(abc=123,abc=456)";
- createSession(config);
- StatusWith<BSONObj> result =
- WiredTigerUtil::getApplicationMetadata(getOperationContext(), getURI());
- ASSERT_NOT_OK(result.getStatus());
- ASSERT_EQUALS(ErrorCodes::DuplicateKey, result.getStatus().code());
- }
+class WiredTigerUtilHarnessHelper {
+public:
+ WiredTigerUtilHarnessHelper(StringData extraStrings)
+ : _dbpath("wt_test"),
+ _connection(_dbpath.path(), extraStrings),
+ _sessionCache(_connection.getConnection()) {}
- TEST_F(WiredTigerUtilMetadataTest, GetApplicationMetadataTypes) {
- const char* config = "app_metadata=(stringkey=\"abc\",boolkey1=true,boolkey2=false,"
- "idkey=def,numkey=123,"
- "structkey=(k1=v2,k2=v2))";
- createSession(config);
- StatusWith<BSONObj> result =
- WiredTigerUtil::getApplicationMetadata(getOperationContext(), getURI());
- ASSERT_OK(result.getStatus());
- const BSONObj& obj = result.getValue();
-
- BSONElement stringElement = obj.getField("stringkey");
- ASSERT_EQUALS(mongo::String, stringElement.type());
- ASSERT_EQUALS("abc", stringElement.String());
-
- BSONElement boolElement1 = obj.getField("boolkey1");
- ASSERT_TRUE(boolElement1.isBoolean());
- ASSERT_TRUE(boolElement1.boolean());
-
- BSONElement boolElement2 = obj.getField("boolkey2");
- ASSERT_TRUE(boolElement2.isBoolean());
- ASSERT_FALSE(boolElement2.boolean());
-
- BSONElement identifierElement = obj.getField("idkey");
- ASSERT_EQUALS(mongo::String, identifierElement.type());
- ASSERT_EQUALS("def", identifierElement.String());
-
- BSONElement numberElement = obj.getField("numkey");
- ASSERT_TRUE(numberElement.isNumber());
- ASSERT_EQUALS(123, numberElement.numberInt());
-
- BSONElement structElement = obj.getField("structkey");
- ASSERT_EQUALS(mongo::String, structElement.type());
- ASSERT_EQUALS("(k1=v2,k2=v2)", structElement.String());
- }
- TEST_F(WiredTigerUtilMetadataTest, CheckApplicationMetadataFormatVersionMissingKey) {
- createSession("app_metadata=(abc=123)");
- ASSERT_OK(WiredTigerUtil::checkApplicationMetadataFormatVersion(getOperationContext(),
- getURI(),
- 1,
- 1));
- ASSERT_NOT_OK(WiredTigerUtil::checkApplicationMetadataFormatVersion(getOperationContext(),
- getURI(),
- 2,
- 2));
+ WiredTigerSessionCache* getSessionCache() {
+ return &_sessionCache;
}
- TEST_F(WiredTigerUtilMetadataTest, CheckApplicationMetadataFormatVersionString) {
- createSession("app_metadata=(formatVersion=\"bar\")");
- ASSERT_NOT_OK(WiredTigerUtil::checkApplicationMetadataFormatVersion(getOperationContext(),
- getURI(),
- 1,
- 1));
+ OperationContext* newOperationContext() {
+ return new OperationContextNoop(new WiredTigerRecoveryUnit(getSessionCache()));
}
- TEST_F(WiredTigerUtilMetadataTest, CheckApplicationMetadataFormatVersionNumber) {
- createSession("app_metadata=(formatVersion=2)");
- ASSERT_OK(WiredTigerUtil::checkApplicationMetadataFormatVersion(getOperationContext(),
- getURI(),
- 2,
- 3));
- ASSERT_NOT_OK(WiredTigerUtil::checkApplicationMetadataFormatVersion(getOperationContext(),
- getURI(),
- 1,
- 1));
- ASSERT_NOT_OK(WiredTigerUtil::checkApplicationMetadataFormatVersion(getOperationContext(),
- getURI(),
- 3,
- 3));
+private:
+ unittest::TempDir _dbpath;
+ WiredTigerConnection _connection;
+ WiredTigerSessionCache _sessionCache;
+};
+
+class WiredTigerUtilMetadataTest : public mongo::unittest::Test {
+public:
+ virtual void setUp() {
+ _harnessHelper.reset(new WiredTigerUtilHarnessHelper(""));
+ _opCtx.reset(_harnessHelper->newOperationContext());
}
- TEST(WiredTigerUtilTest, GetStatisticsValueMissingTable) {
- WiredTigerUtilHarnessHelper harnessHelper("statistics=(all)");
- WiredTigerRecoveryUnit recoveryUnit(harnessHelper.getSessionCache());
- WiredTigerSession* session = recoveryUnit.getSession(NULL);
- StatusWith<uint64_t> result = WiredTigerUtil::getStatisticsValue(session->getSession(),
- "statistics:table:no_such_table", "statistics=(fast)", WT_STAT_DSRC_BLOCK_SIZE);
- ASSERT_NOT_OK(result.getStatus());
- ASSERT_EQUALS(ErrorCodes::CursorNotFound, result.getStatus().code());
+ virtual void tearDown() {
+ _opCtx.reset(NULL);
+ _harnessHelper.reset(NULL);
}
- TEST(WiredTigerUtilTest, GetStatisticsValueStatisticsDisabled) {
- WiredTigerUtilHarnessHelper harnessHelper("statistics=(none)");
- WiredTigerRecoveryUnit recoveryUnit(harnessHelper.getSessionCache());
- WiredTigerSession* session = recoveryUnit.getSession(NULL);
- WT_SESSION* wtSession = session->getSession();
- ASSERT_OK(wtRCToStatus(wtSession->create(wtSession, "table:mytable", NULL)));
- StatusWith<uint64_t> result = WiredTigerUtil::getStatisticsValue(session->getSession(),
- "statistics:table:mytable", "statistics=(fast)", WT_STAT_DSRC_BLOCK_SIZE);
- ASSERT_NOT_OK(result.getStatus());
- ASSERT_EQUALS(ErrorCodes::CursorNotFound, result.getStatus().code());
+protected:
+ const char* getURI() const {
+ return "table:mytable";
}
- TEST(WiredTigerUtilTest, GetStatisticsValueInvalidKey) {
- WiredTigerUtilHarnessHelper harnessHelper("statistics=(all)");
- WiredTigerRecoveryUnit recoveryUnit(harnessHelper.getSessionCache());
- WiredTigerSession* session = recoveryUnit.getSession(NULL);
- WT_SESSION* wtSession = session->getSession();
- ASSERT_OK(wtRCToStatus(wtSession->create(wtSession, "table:mytable", NULL)));
- // Use connection statistics key which does not apply to a table.
- StatusWith<uint64_t> result = WiredTigerUtil::getStatisticsValue(session->getSession(),
- "statistics:table:mytable", "statistics=(fast)", WT_STAT_CONN_SESSION_OPEN);
- ASSERT_NOT_OK(result.getStatus());
- ASSERT_EQUALS(ErrorCodes::NoSuchKey, result.getStatus().code());
+ OperationContext* getOperationContext() const {
+ ASSERT(_opCtx.get());
+ return _opCtx.get();
}
- TEST(WiredTigerUtilTest, GetStatisticsValueValidKey) {
- WiredTigerUtilHarnessHelper harnessHelper("statistics=(all)");
- WiredTigerRecoveryUnit recoveryUnit(harnessHelper.getSessionCache());
- WiredTigerSession* session = recoveryUnit.getSession(NULL);
- WT_SESSION* wtSession = session->getSession();
- ASSERT_OK(wtRCToStatus(wtSession->create(wtSession, "table:mytable", NULL)));
- // Use connection statistics key which does not apply to a table.
- StatusWith<uint64_t> result = WiredTigerUtil::getStatisticsValue(session->getSession(),
- "statistics:table:mytable", "statistics=(fast)", WT_STAT_DSRC_LSM_CHUNK_COUNT);
- ASSERT_OK(result.getStatus());
- // Expect statistics value to be zero for a LSM key on a Btree.
- ASSERT_EQUALS(0U, result.getValue());
+ void createSession(const char* config) {
+ WT_SESSION* wtSession =
+ WiredTigerRecoveryUnit::get(_opCtx.get())->getSession(_opCtx.get())->getSession();
+ ASSERT_OK(wtRCToStatus(wtSession->create(wtSession, getURI(), config)));
}
- TEST(WiredTigerUtilTest, GetStatisticsValueAsUInt8) {
- WiredTigerUtilHarnessHelper harnessHelper("statistics=(all)");
- WiredTigerRecoveryUnit recoveryUnit(harnessHelper.getSessionCache());
- WiredTigerSession* session = recoveryUnit.getSession(NULL);
- WT_SESSION* wtSession = session->getSession();
- ASSERT_OK(wtRCToStatus(wtSession->create(wtSession, "table:mytable", NULL)));
-
- // Use data source statistics that has a value > 256 on an empty table.
- StatusWith<uint64_t> resultUInt64 = WiredTigerUtil::getStatisticsValue(
- session->getSession(),
- "statistics:table:mytable", "statistics=(fast)", WT_STAT_DSRC_ALLOCATION_SIZE);
- ASSERT_OK(resultUInt64.getStatus());
- ASSERT_GREATER_THAN(resultUInt64.getValue(),
- static_cast<uint64_t>(std::numeric_limits<uint8_t>::max()));
-
- // Ensure that statistics value retrieved as an 8-bit unsigned value
- // is capped at maximum value for that type.
- StatusWith<uint8_t> resultUInt8 = WiredTigerUtil::getStatisticsValueAs<uint8_t>(
- session->getSession(),
- "statistics:table:mytable", "statistics=(fast)", WT_STAT_DSRC_ALLOCATION_SIZE);
- ASSERT_OK(resultUInt8.getStatus());
- ASSERT_EQUALS(std::numeric_limits<uint8_t>::max(), resultUInt8.getValue());
-
- // Read statistics value as signed 16-bit value with alternative maximum value to
- // std::numeric_limits.
- StatusWith<int16_t> resultInt16 = WiredTigerUtil::getStatisticsValueAs<int16_t>(
- session->getSession(),
- "statistics:table:mytable", "statistics=(fast)", WT_STAT_DSRC_ALLOCATION_SIZE,
- static_cast<int16_t>(100));
- ASSERT_OK(resultInt16.getStatus());
- ASSERT_EQUALS(static_cast<uint8_t>(100), resultInt16.getValue());
- }
+private:
+ std::unique_ptr<WiredTigerUtilHarnessHelper> _harnessHelper;
+ std::unique_ptr<OperationContext> _opCtx;
+};
+
+TEST_F(WiredTigerUtilMetadataTest, GetConfigurationStringInvalidURI) {
+ StatusWith<std::string> result = WiredTigerUtil::getMetadata(getOperationContext(), getURI());
+ ASSERT_NOT_OK(result.getStatus());
+ ASSERT_EQUALS(ErrorCodes::NoSuchKey, result.getStatus().code());
+}
+
+TEST_F(WiredTigerUtilMetadataTest, GetConfigurationStringNull) {
+ const char* config = NULL;
+ createSession(config);
+ StatusWith<std::string> result = WiredTigerUtil::getMetadata(getOperationContext(), getURI());
+ ASSERT_OK(result.getStatus());
+ ASSERT_FALSE(result.getValue().empty());
+}
+
+TEST_F(WiredTigerUtilMetadataTest, GetConfigurationStringSimple) {
+ const char* config = "app_metadata=(abc=123)";
+ createSession(config);
+ StatusWith<std::string> result = WiredTigerUtil::getMetadata(getOperationContext(), getURI());
+ ASSERT_OK(result.getStatus());
+ ASSERT_STRING_CONTAINS(result.getValue(), config);
+}
+
+TEST_F(WiredTigerUtilMetadataTest, GetApplicationMetadataInvalidURI) {
+ StatusWith<BSONObj> result =
+ WiredTigerUtil::getApplicationMetadata(getOperationContext(), getURI());
+ ASSERT_NOT_OK(result.getStatus());
+ ASSERT_EQUALS(ErrorCodes::NoSuchKey, result.getStatus().code());
+}
+
+TEST_F(WiredTigerUtilMetadataTest, GetApplicationMetadataNull) {
+ const char* config = NULL;
+ createSession(config);
+ StatusWith<BSONObj> result =
+ WiredTigerUtil::getApplicationMetadata(getOperationContext(), getURI());
+ ASSERT_OK(result.getStatus());
+ ASSERT_TRUE(result.getValue().isEmpty());
+}
+
+TEST_F(WiredTigerUtilMetadataTest, GetApplicationMetadataString) {
+ const char* config = "app_metadata=\"abc\"";
+ createSession(config);
+ StatusWith<BSONObj> result =
+ WiredTigerUtil::getApplicationMetadata(getOperationContext(), getURI());
+ ASSERT_NOT_OK(result.getStatus());
+ ASSERT_EQUALS(ErrorCodes::FailedToParse, result.getStatus().code());
+}
+
+TEST_F(WiredTigerUtilMetadataTest, GetApplicationMetadataDuplicateKeys) {
+ const char* config = "app_metadata=(abc=123,abc=456)";
+ createSession(config);
+ StatusWith<BSONObj> result =
+ WiredTigerUtil::getApplicationMetadata(getOperationContext(), getURI());
+ ASSERT_NOT_OK(result.getStatus());
+ ASSERT_EQUALS(ErrorCodes::DuplicateKey, result.getStatus().code());
+}
+
+TEST_F(WiredTigerUtilMetadataTest, GetApplicationMetadataTypes) {
+ const char* config =
+ "app_metadata=(stringkey=\"abc\",boolkey1=true,boolkey2=false,"
+ "idkey=def,numkey=123,"
+ "structkey=(k1=v2,k2=v2))";
+ createSession(config);
+ StatusWith<BSONObj> result =
+ WiredTigerUtil::getApplicationMetadata(getOperationContext(), getURI());
+ ASSERT_OK(result.getStatus());
+ const BSONObj& obj = result.getValue();
+
+ BSONElement stringElement = obj.getField("stringkey");
+ ASSERT_EQUALS(mongo::String, stringElement.type());
+ ASSERT_EQUALS("abc", stringElement.String());
+
+ BSONElement boolElement1 = obj.getField("boolkey1");
+ ASSERT_TRUE(boolElement1.isBoolean());
+ ASSERT_TRUE(boolElement1.boolean());
+
+ BSONElement boolElement2 = obj.getField("boolkey2");
+ ASSERT_TRUE(boolElement2.isBoolean());
+ ASSERT_FALSE(boolElement2.boolean());
+
+ BSONElement identifierElement = obj.getField("idkey");
+ ASSERT_EQUALS(mongo::String, identifierElement.type());
+ ASSERT_EQUALS("def", identifierElement.String());
+
+ BSONElement numberElement = obj.getField("numkey");
+ ASSERT_TRUE(numberElement.isNumber());
+ ASSERT_EQUALS(123, numberElement.numberInt());
+
+ BSONElement structElement = obj.getField("structkey");
+ ASSERT_EQUALS(mongo::String, structElement.type());
+ ASSERT_EQUALS("(k1=v2,k2=v2)", structElement.String());
+}
+
+TEST_F(WiredTigerUtilMetadataTest, CheckApplicationMetadataFormatVersionMissingKey) {
+ createSession("app_metadata=(abc=123)");
+ ASSERT_OK(WiredTigerUtil::checkApplicationMetadataFormatVersion(
+ getOperationContext(), getURI(), 1, 1));
+ ASSERT_NOT_OK(WiredTigerUtil::checkApplicationMetadataFormatVersion(
+ getOperationContext(), getURI(), 2, 2));
+}
+
+TEST_F(WiredTigerUtilMetadataTest, CheckApplicationMetadataFormatVersionString) {
+ createSession("app_metadata=(formatVersion=\"bar\")");
+ ASSERT_NOT_OK(WiredTigerUtil::checkApplicationMetadataFormatVersion(
+ getOperationContext(), getURI(), 1, 1));
+}
+
+TEST_F(WiredTigerUtilMetadataTest, CheckApplicationMetadataFormatVersionNumber) {
+ createSession("app_metadata=(formatVersion=2)");
+ ASSERT_OK(WiredTigerUtil::checkApplicationMetadataFormatVersion(
+ getOperationContext(), getURI(), 2, 3));
+ ASSERT_NOT_OK(WiredTigerUtil::checkApplicationMetadataFormatVersion(
+ getOperationContext(), getURI(), 1, 1));
+ ASSERT_NOT_OK(WiredTigerUtil::checkApplicationMetadataFormatVersion(
+ getOperationContext(), getURI(), 3, 3));
+}
+
+TEST(WiredTigerUtilTest, GetStatisticsValueMissingTable) {
+ WiredTigerUtilHarnessHelper harnessHelper("statistics=(all)");
+ WiredTigerRecoveryUnit recoveryUnit(harnessHelper.getSessionCache());
+ WiredTigerSession* session = recoveryUnit.getSession(NULL);
+ StatusWith<uint64_t> result =
+ WiredTigerUtil::getStatisticsValue(session->getSession(),
+ "statistics:table:no_such_table",
+ "statistics=(fast)",
+ WT_STAT_DSRC_BLOCK_SIZE);
+ ASSERT_NOT_OK(result.getStatus());
+ ASSERT_EQUALS(ErrorCodes::CursorNotFound, result.getStatus().code());
+}
+
+TEST(WiredTigerUtilTest, GetStatisticsValueStatisticsDisabled) {
+ WiredTigerUtilHarnessHelper harnessHelper("statistics=(none)");
+ WiredTigerRecoveryUnit recoveryUnit(harnessHelper.getSessionCache());
+ WiredTigerSession* session = recoveryUnit.getSession(NULL);
+ WT_SESSION* wtSession = session->getSession();
+ ASSERT_OK(wtRCToStatus(wtSession->create(wtSession, "table:mytable", NULL)));
+ StatusWith<uint64_t> result = WiredTigerUtil::getStatisticsValue(session->getSession(),
+ "statistics:table:mytable",
+ "statistics=(fast)",
+ WT_STAT_DSRC_BLOCK_SIZE);
+ ASSERT_NOT_OK(result.getStatus());
+ ASSERT_EQUALS(ErrorCodes::CursorNotFound, result.getStatus().code());
+}
+
+TEST(WiredTigerUtilTest, GetStatisticsValueInvalidKey) {
+ WiredTigerUtilHarnessHelper harnessHelper("statistics=(all)");
+ WiredTigerRecoveryUnit recoveryUnit(harnessHelper.getSessionCache());
+ WiredTigerSession* session = recoveryUnit.getSession(NULL);
+ WT_SESSION* wtSession = session->getSession();
+ ASSERT_OK(wtRCToStatus(wtSession->create(wtSession, "table:mytable", NULL)));
+ // Use connection statistics key which does not apply to a table.
+ StatusWith<uint64_t> result = WiredTigerUtil::getStatisticsValue(session->getSession(),
+ "statistics:table:mytable",
+ "statistics=(fast)",
+ WT_STAT_CONN_SESSION_OPEN);
+ ASSERT_NOT_OK(result.getStatus());
+ ASSERT_EQUALS(ErrorCodes::NoSuchKey, result.getStatus().code());
+}
+
+TEST(WiredTigerUtilTest, GetStatisticsValueValidKey) {
+ WiredTigerUtilHarnessHelper harnessHelper("statistics=(all)");
+ WiredTigerRecoveryUnit recoveryUnit(harnessHelper.getSessionCache());
+ WiredTigerSession* session = recoveryUnit.getSession(NULL);
+ WT_SESSION* wtSession = session->getSession();
+ ASSERT_OK(wtRCToStatus(wtSession->create(wtSession, "table:mytable", NULL)));
+ // Use connection statistics key which does not apply to a table.
+ StatusWith<uint64_t> result = WiredTigerUtil::getStatisticsValue(session->getSession(),
+ "statistics:table:mytable",
+ "statistics=(fast)",
+ WT_STAT_DSRC_LSM_CHUNK_COUNT);
+ ASSERT_OK(result.getStatus());
+ // Expect statistics value to be zero for a LSM key on a Btree.
+ ASSERT_EQUALS(0U, result.getValue());
+}
+
+TEST(WiredTigerUtilTest, GetStatisticsValueAsUInt8) {
+ WiredTigerUtilHarnessHelper harnessHelper("statistics=(all)");
+ WiredTigerRecoveryUnit recoveryUnit(harnessHelper.getSessionCache());
+ WiredTigerSession* session = recoveryUnit.getSession(NULL);
+ WT_SESSION* wtSession = session->getSession();
+ ASSERT_OK(wtRCToStatus(wtSession->create(wtSession, "table:mytable", NULL)));
+
+ // Use data source statistics that has a value > 256 on an empty table.
+ StatusWith<uint64_t> resultUInt64 =
+ WiredTigerUtil::getStatisticsValue(session->getSession(),
+ "statistics:table:mytable",
+ "statistics=(fast)",
+ WT_STAT_DSRC_ALLOCATION_SIZE);
+ ASSERT_OK(resultUInt64.getStatus());
+ ASSERT_GREATER_THAN(resultUInt64.getValue(),
+ static_cast<uint64_t>(std::numeric_limits<uint8_t>::max()));
+
+ // Ensure that statistics value retrieved as an 8-bit unsigned value
+ // is capped at maximum value for that type.
+ StatusWith<uint8_t> resultUInt8 =
+ WiredTigerUtil::getStatisticsValueAs<uint8_t>(session->getSession(),
+ "statistics:table:mytable",
+ "statistics=(fast)",
+ WT_STAT_DSRC_ALLOCATION_SIZE);
+ ASSERT_OK(resultUInt8.getStatus());
+ ASSERT_EQUALS(std::numeric_limits<uint8_t>::max(), resultUInt8.getValue());
+
+ // Read statistics value as signed 16-bit value with alternative maximum value to
+ // std::numeric_limits.
+ StatusWith<int16_t> resultInt16 =
+ WiredTigerUtil::getStatisticsValueAs<int16_t>(session->getSession(),
+ "statistics:table:mytable",
+ "statistics=(fast)",
+ WT_STAT_DSRC_ALLOCATION_SIZE,
+ static_cast<int16_t>(100));
+ ASSERT_OK(resultInt16.getStatus());
+ ASSERT_EQUALS(static_cast<uint8_t>(100), resultInt16.getValue());
+}
} // namespace mongo