summaryrefslogtreecommitdiff
path: root/src/mongo
diff options
context:
space:
mode:
authorGeert Bosch <geert@mongodb.com>2018-07-06 15:34:20 -0400
committerGeert Bosch <geert@mongodb.com>2018-07-06 15:34:20 -0400
commitebe1ae8549dfc7ec7e12f9344c2da17a2ffb9acb (patch)
tree067c7d8b968b414812d2310764506ba8af98cf85 /src/mongo
parentfda766f6be1a20fa28ce361511bc62e5c995186b (diff)
downloadmongo-ebe1ae8549dfc7ec7e12f9344c2da17a2ffb9acb.tar.gz
SERVER-35112 Remove MMAPv1 code
Diffstat (limited to 'src/mongo')
-rw-r--r--src/mongo/db/SConscript29
-rw-r--r--src/mongo/db/catalog/SConscript2
-rw-r--r--src/mongo/db/catalog/collection.cpp1
-rw-r--r--src/mongo/db/catalog/collection_impl.cpp4
-rw-r--r--src/mongo/db/commands/SConscript2
-rw-r--r--src/mongo/db/commands/dbcommands.cpp13
-rw-r--r--src/mongo/db/commands/fsync.cpp22
-rw-r--r--src/mongo/db/db.cpp10
-rw-r--r--src/mongo/db/index/SConscript1
-rw-r--r--src/mongo/db/index/index_access_method.cpp19
-rw-r--r--src/mongo/db/mongod_options.cpp150
-rw-r--r--src/mongo/db/prefetch.cpp274
-rw-r--r--src/mongo/db/prefetch.h46
-rw-r--r--src/mongo/db/repair_database.cpp25
-rw-r--r--src/mongo/db/repair_database.h6
-rw-r--r--src/mongo/db/repair_database_and_check_version.cpp11
-rw-r--r--src/mongo/db/repl/SConscript1
-rw-r--r--src/mongo/db/repl/replication_coordinator_external_state.h6
-rw-r--r--src/mongo/db/repl/replication_coordinator_external_state_impl.cpp20
-rw-r--r--src/mongo/db/repl/replication_coordinator_external_state_impl.h1
-rw-r--r--src/mongo/db/repl/replication_coordinator_external_state_mock.cpp4
-rw-r--r--src/mongo/db/repl/replication_coordinator_external_state_mock.h1
-rw-r--r--src/mongo/db/repl/replication_coordinator_impl.cpp4
-rw-r--r--src/mongo/db/repl/sync_tail.cpp38
-rw-r--r--src/mongo/db/storage/SConscript4
-rw-r--r--src/mongo/db/storage/ephemeral_for_test/ephemeral_for_test_recovery_unit.h6
-rw-r--r--src/mongo/db/storage/mmap_v1/SConscript310
-rw-r--r--src/mongo/db/storage/mmap_v1/aligned_builder.cpp175
-rw-r--r--src/mongo/db/storage/mmap_v1/aligned_builder.h149
-rw-r--r--src/mongo/db/storage/mmap_v1/btree/btree_interface.cpp437
-rw-r--r--src/mongo/db/storage/mmap_v1/btree/btree_interface.h52
-rw-r--r--src/mongo/db/storage/mmap_v1/btree/btree_interface_test.cpp81
-rw-r--r--src/mongo/db/storage/mmap_v1/btree/btree_logic.cpp2440
-rw-r--r--src/mongo/db/storage/mmap_v1/btree/btree_logic.h587
-rw-r--r--src/mongo/db/storage/mmap_v1/btree/btree_logic_test.cpp2500
-rw-r--r--src/mongo/db/storage/mmap_v1/btree/btree_ondisk.cpp59
-rw-r--r--src/mongo/db/storage/mmap_v1/btree/btree_ondisk.h377
-rw-r--r--src/mongo/db/storage/mmap_v1/btree/btree_test_help.cpp244
-rw-r--r--src/mongo/db/storage/mmap_v1/btree/btree_test_help.h150
-rw-r--r--src/mongo/db/storage/mmap_v1/btree/key.cpp734
-rw-r--r--src/mongo/db/storage/mmap_v1/btree/key.h167
-rw-r--r--src/mongo/db/storage/mmap_v1/catalog/hashtab.cpp85
-rw-r--r--src/mongo/db/storage/mmap_v1/catalog/hashtab.h138
-rw-r--r--src/mongo/db/storage/mmap_v1/catalog/index_details.cpp39
-rw-r--r--src/mongo/db/storage/mmap_v1/catalog/index_details.h70
-rw-r--r--src/mongo/db/storage/mmap_v1/catalog/namespace.h168
-rw-r--r--src/mongo/db/storage/mmap_v1/catalog/namespace_details.cpp242
-rw-r--r--src/mongo/db/storage/mmap_v1/catalog/namespace_details.h254
-rw-r--r--src/mongo/db/storage/mmap_v1/catalog/namespace_details_collection_entry.cpp488
-rw-r--r--src/mongo/db/storage/mmap_v1/catalog/namespace_details_collection_entry.h147
-rw-r--r--src/mongo/db/storage/mmap_v1/catalog/namespace_details_rsv1_metadata.cpp200
-rw-r--r--src/mongo/db/storage/mmap_v1/catalog/namespace_details_rsv1_metadata.h102
-rw-r--r--src/mongo/db/storage/mmap_v1/catalog/namespace_index.cpp249
-rw-r--r--src/mongo/db/storage/mmap_v1/catalog/namespace_index.h100
-rw-r--r--src/mongo/db/storage/mmap_v1/catalog/namespace_test.cpp68
-rw-r--r--src/mongo/db/storage/mmap_v1/commit_notifier.cpp72
-rw-r--r--src/mongo/db/storage/mmap_v1/commit_notifier.h83
-rw-r--r--src/mongo/db/storage/mmap_v1/compress.cpp57
-rw-r--r--src/mongo/db/storage/mmap_v1/compress.h46
-rw-r--r--src/mongo/db/storage/mmap_v1/data_file.cpp253
-rw-r--r--src/mongo/db/storage/mmap_v1/data_file.h264
-rw-r--r--src/mongo/db/storage/mmap_v1/data_file_sync.cpp137
-rw-r--r--src/mongo/db/storage/mmap_v1/data_file_sync.h65
-rw-r--r--src/mongo/db/storage/mmap_v1/data_file_version_test.cpp123
-rw-r--r--src/mongo/db/storage/mmap_v1/diskloc.h222
-rw-r--r--src/mongo/db/storage/mmap_v1/dur.cpp917
-rw-r--r--src/mongo/db/storage/mmap_v1/dur.h171
-rw-r--r--src/mongo/db/storage/mmap_v1/dur_commitjob.cpp119
-rw-r--r--src/mongo/db/storage/mmap_v1/dur_commitjob.h224
-rw-r--r--src/mongo/db/storage/mmap_v1/dur_journal.cpp826
-rw-r--r--src/mongo/db/storage/mmap_v1/dur_journal.h100
-rw-r--r--src/mongo/db/storage/mmap_v1/dur_journal_writer.cpp307
-rw-r--r--src/mongo/db/storage/mmap_v1/dur_journal_writer.h200
-rw-r--r--src/mongo/db/storage/mmap_v1/dur_journalformat.h219
-rw-r--r--src/mongo/db/storage/mmap_v1/dur_journalimpl.h130
-rw-r--r--src/mongo/db/storage/mmap_v1/dur_preplogbuffer.cpp209
-rw-r--r--src/mongo/db/storage/mmap_v1/dur_recover.cpp682
-rw-r--r--src/mongo/db/storage/mmap_v1/dur_recover.h119
-rw-r--r--src/mongo/db/storage/mmap_v1/dur_recovery_unit.cpp316
-rw-r--r--src/mongo/db/storage/mmap_v1/dur_recovery_unit.h171
-rw-r--r--src/mongo/db/storage/mmap_v1/dur_stats.h96
-rw-r--r--src/mongo/db/storage/mmap_v1/durable_mapped_file.cpp315
-rw-r--r--src/mongo/db/storage/mmap_v1/durable_mapped_file.h289
-rw-r--r--src/mongo/db/storage/mmap_v1/durop.cpp179
-rw-r--r--src/mongo/db/storage/mmap_v1/durop.h129
-rw-r--r--src/mongo/db/storage/mmap_v1/extent.cpp112
-rw-r--r--src/mongo/db/storage/mmap_v1/extent.h89
-rw-r--r--src/mongo/db/storage/mmap_v1/extent_manager.cpp97
-rw-r--r--src/mongo/db/storage/mmap_v1/extent_manager.h197
-rw-r--r--src/mongo/db/storage/mmap_v1/file_allocator.cpp492
-rw-r--r--src/mongo/db/storage/mmap_v1/file_allocator.h105
-rw-r--r--src/mongo/db/storage/mmap_v1/heap_record_store_btree.cpp164
-rw-r--r--src/mongo/db/storage/mmap_v1/heap_record_store_btree.h237
-rw-r--r--src/mongo/db/storage/mmap_v1/journal_latency_test_cmd.cpp155
-rw-r--r--src/mongo/db/storage/mmap_v1/logfile.cpp272
-rw-r--r--src/mongo/db/storage/mmap_v1/logfile.h83
-rw-r--r--src/mongo/db/storage/mmap_v1/mmap.cpp252
-rw-r--r--src/mongo/db/storage/mmap_v1/mmap.h325
-rw-r--r--src/mongo/db/storage/mmap_v1/mmap_posix.cpp333
-rw-r--r--src/mongo/db/storage/mmap_v1/mmap_v1_database_catalog_entry.cpp915
-rw-r--r--src/mongo/db/storage/mmap_v1/mmap_v1_database_catalog_entry.h208
-rw-r--r--src/mongo/db/storage/mmap_v1/mmap_v1_engine.cpp420
-rw-r--r--src/mongo/db/storage/mmap_v1/mmap_v1_engine.h130
-rw-r--r--src/mongo/db/storage/mmap_v1/mmap_v1_extent_manager.cpp675
-rw-r--r--src/mongo/db/storage/mmap_v1/mmap_v1_extent_manager.h258
-rw-r--r--src/mongo/db/storage/mmap_v1/mmap_v1_init.cpp86
-rw-r--r--src/mongo/db/storage/mmap_v1/mmap_v1_init_test.cpp120
-rw-r--r--src/mongo/db/storage/mmap_v1/mmap_v1_noinit.cpp31
-rw-r--r--src/mongo/db/storage/mmap_v1/mmap_v1_options.cpp31
-rw-r--r--src/mongo/db/storage/mmap_v1/mmap_v1_options.h88
-rw-r--r--src/mongo/db/storage/mmap_v1/mmap_v1_record_store_test.cpp105
-rw-r--r--src/mongo/db/storage/mmap_v1/mmap_windows.cpp487
-rw-r--r--src/mongo/db/storage/mmap_v1/paths.cpp113
-rw-r--r--src/mongo/db/storage/mmap_v1/paths.h100
-rw-r--r--src/mongo/db/storage/mmap_v1/record.h181
-rw-r--r--src/mongo/db/storage/mmap_v1/record_access_tracker.cpp338
-rw-r--r--src/mongo/db/storage/mmap_v1/record_access_tracker.h165
-rw-r--r--src/mongo/db/storage/mmap_v1/record_access_tracker_test.cpp150
-rw-r--r--src/mongo/db/storage/mmap_v1/record_store_v1_base.cpp962
-rw-r--r--src/mongo/db/storage/mmap_v1/record_store_v1_base.h364
-rw-r--r--src/mongo/db/storage/mmap_v1/record_store_v1_capped.cpp696
-rw-r--r--src/mongo/db/storage/mmap_v1/record_store_v1_capped.h129
-rw-r--r--src/mongo/db/storage/mmap_v1/record_store_v1_capped_iterator.cpp215
-rw-r--r--src/mongo/db/storage/mmap_v1/record_store_v1_capped_iterator.h98
-rw-r--r--src/mongo/db/storage/mmap_v1/record_store_v1_capped_test.cpp797
-rw-r--r--src/mongo/db/storage/mmap_v1/record_store_v1_repair_iterator.cpp210
-rw-r--r--src/mongo/db/storage/mmap_v1/record_store_v1_repair_iterator.h96
-rw-r--r--src/mongo/db/storage/mmap_v1/record_store_v1_simple.cpp486
-rw-r--r--src/mongo/db/storage/mmap_v1/record_store_v1_simple.h106
-rw-r--r--src/mongo/db/storage/mmap_v1/record_store_v1_simple_iterator.cpp129
-rw-r--r--src/mongo/db/storage/mmap_v1/record_store_v1_simple_iterator.h79
-rw-r--r--src/mongo/db/storage/mmap_v1/record_store_v1_simple_test.cpp468
-rw-r--r--src/mongo/db/storage/mmap_v1/record_store_v1_test_help.cpp668
-rw-r--r--src/mongo/db/storage/mmap_v1/record_store_v1_test_help.h211
-rw-r--r--src/mongo/db/storage/mmap_v1/repair_database.cpp499
-rw-r--r--src/mongo/db/storage/mmap_v1/repair_database_interface.cpp51
-rw-r--r--src/mongo/db/storage/mmap_v1/repair_database_interface.h46
-rw-r--r--src/mongo/db/storage/mmap_v1/touch_pages.cpp42
-rw-r--r--src/mongo/db/storage/mmap_v1/touch_pages.h38
-rw-r--r--src/mongo/db/storage/mobile/mobile_recovery_unit.h6
-rw-r--r--src/mongo/db/storage/recovery_unit.h44
-rw-r--r--src/mongo/db/storage/recovery_unit_noop.h5
-rw-r--r--src/mongo/db/storage/storage_engine_init.cpp6
-rw-r--r--src/mongo/db/storage/storage_engine_metadata.cpp67
-rw-r--r--src/mongo/db/storage/storage_options.h6
-rw-r--r--src/mongo/db/storage/wiredtiger/wiredtiger_recovery_unit.cpp5
-rw-r--r--src/mongo/db/storage/wiredtiger/wiredtiger_recovery_unit.h4
-rw-r--r--src/mongo/dbtests/SConscript4
-rw-r--r--src/mongo/dbtests/basictests.cpp35
-rw-r--r--src/mongo/dbtests/framework_options.cpp45
-rw-r--r--src/mongo/dbtests/jsobjtests.cpp287
-rw-r--r--src/mongo/dbtests/mmaptests.cpp312
-rw-r--r--src/mongo/dbtests/namespacetests.cpp664
-rw-r--r--src/mongo/embedded/embedded.cpp6
-rw-r--r--src/mongo/embedded/embedded_options.cpp22
155 files changed, 79 insertions, 33895 deletions
diff --git a/src/mongo/db/SConscript b/src/mongo/db/SConscript
index 67d31359f66..b28085d4de5 100644
--- a/src/mongo/db/SConscript
+++ b/src/mongo/db/SConscript
@@ -471,7 +471,6 @@ env.Library(
"mongod_options.cpp",
],
LIBDEPS=[
- '$BUILD_DIR/mongo/db/storage/mmap_v1/mmap_v1_options',
'repl/repl_settings',
'repl/replica_set_messages',
'server_options_servers',
@@ -695,6 +694,9 @@ env.Library(
'index/index_access_method',
'write_ops',
],
+ LIBDEPS_PRIVATE=[
+ '$BUILD_DIR/mongo/db/commands/server_status_core',
+ ],
)
env.Library(
@@ -858,19 +860,6 @@ env.Library(
)
env.Library(
- target='prefetch',
- source=[
- 'prefetch.cpp',
- ],
- LIBDEPS=[
- 'dbhelpers',
- 'index/index_access_method',
- '$BUILD_DIR/mongo/db/stats/timer_stats',
- '$BUILD_DIR/mongo/db/storage/mmap_v1/mmap',
- ],
-)
-
-env.Library(
target="rw_concern_d",
source=[
"read_concern.cpp",
@@ -886,6 +875,9 @@ env.Library(
"storage/storage_options",
"s/sharding",
],
+ LIBDEPS_PRIVATE=[
+ "commands/server_status_core",
+ ],
)
env.Library(
@@ -896,7 +888,6 @@ env.Library(
LIBDEPS=[
'$BUILD_DIR/mongo/db/catalog/collection',
'$BUILD_DIR/mongo/db/catalog/database',
- '$BUILD_DIR/mongo/db/storage/mmap_v1/repair_database_interface',
'background',
'logical_clock',
],
@@ -920,6 +911,7 @@ env.Library(
],
LIBDEPS_PRIVATE=[
'$BUILD_DIR/mongo/db/commands/fsync_locked',
+ 'commands/server_status_core',
'write_ops',
]
)
@@ -1035,6 +1027,9 @@ env.Library(
'storage/storage_options',
'update/update_driver',
],
+ LIBDEPS_PRIVATE=[
+ "commands/server_status_core",
+ ],
)
env.Library(
@@ -1047,7 +1042,6 @@ env.Library(
LIBDEPS=[
"$BUILD_DIR/mongo/db/bson/dotted_path_support",
"$BUILD_DIR/mongo/db/logical_time_metadata_hook",
- "$BUILD_DIR/mongo/db/storage/mmap_v1/file_allocator",
"$BUILD_DIR/mongo/db/ttl_collection_cache",
"$BUILD_DIR/mongo/executor/network_interface_factory",
"$BUILD_DIR/mongo/s/catalog/sharding_catalog_client_impl",
@@ -1085,7 +1079,6 @@ env.Library(
"op_observer_d",
"ops/write_ops_parsers",
"pipeline/aggregation",
- "prefetch",
"query_exec",
"repair_database",
"repl/bgsync",
@@ -1111,8 +1104,6 @@ env.Library(
"stats/top",
"storage/devnull/storage_devnull",
"storage/ephemeral_for_test/storage_ephemeral_for_test",
- "storage/mmap_v1/mmap",
- "storage/mmap_v1/storage_mmapv1",
"storage/storage_engine_lock_file",
"storage/storage_engine_metadata",
"storage/storage_init_d",
diff --git a/src/mongo/db/catalog/SConscript b/src/mongo/db/catalog/SConscript
index 13287b8f11d..97f4402ed92 100644
--- a/src/mongo/db/catalog/SConscript
+++ b/src/mongo/db/catalog/SConscript
@@ -294,9 +294,9 @@ env.Library(
'$BUILD_DIR/mongo/db/views/views_mongod',
],
LIBDEPS_PRIVATE=[
+ "$BUILD_DIR/mongo/db/commands/server_status_core",
'$BUILD_DIR/mongo/db/logical_clock',
'$BUILD_DIR/mongo/db/repl/repl_settings',
- '$BUILD_DIR/mongo/db/storage/mmap_v1/mmap_v1_options',
'$BUILD_DIR/mongo/db/storage/storage_engine_common',
],
)
diff --git a/src/mongo/db/catalog/collection.cpp b/src/mongo/db/catalog/collection.cpp
index 915980a34d3..d5a05ec5e02 100644
--- a/src/mongo/db/catalog/collection.cpp
+++ b/src/mongo/db/catalog/collection.cpp
@@ -56,7 +56,6 @@
#include "mongo/db/server_parameters.h"
#include "mongo/db/service_context.h"
#include "mongo/db/storage/key_string.h"
-#include "mongo/db/storage/mmap_v1/mmap_v1_options.h"
#include "mongo/db/storage/record_fetcher.h"
#include "mongo/db/storage/record_store.h"
#include "mongo/db/update/update_driver.h"
diff --git a/src/mongo/db/catalog/collection_impl.cpp b/src/mongo/db/catalog/collection_impl.cpp
index 8ef6a71b469..f74fbabba89 100644
--- a/src/mongo/db/catalog/collection_impl.cpp
+++ b/src/mongo/db/catalog/collection_impl.cpp
@@ -64,7 +64,6 @@
#include "mongo/db/server_parameters.h"
#include "mongo/db/service_context.h"
#include "mongo/db/storage/key_string.h"
-#include "mongo/db/storage/mmap_v1/mmap_v1_options.h"
#include "mongo/db/storage/record_fetcher.h"
#include "mongo/db/storage/record_store.h"
#include "mongo/db/update/update_driver.h"
@@ -799,9 +798,6 @@ bool CollectionImpl::_enforceQuota(bool userEnforeQuota) const {
if (!userEnforeQuota)
return false;
- if (!mmapv1GlobalOptions.quota)
- return false;
-
if (_ns.db() == "local")
return false;
diff --git a/src/mongo/db/commands/SConscript b/src/mongo/db/commands/SConscript
index 908214a6e2f..80078126aa1 100644
--- a/src/mongo/db/commands/SConscript
+++ b/src/mongo/db/commands/SConscript
@@ -167,10 +167,10 @@ env.Library(
"fsync.cpp",
],
LIBDEPS_PRIVATE=[
+ '$BUILD_DIR/mongo/db/auth/authprivilege',
'$BUILD_DIR/mongo/db/commands',
'$BUILD_DIR/mongo/db/concurrency/write_conflict_exception',
'$BUILD_DIR/mongo/db/curop',
- '$BUILD_DIR/mongo/db/storage/mmap_v1/storage_mmapv1',
'fsync_locked',
]
)
diff --git a/src/mongo/db/commands/dbcommands.cpp b/src/mongo/db/commands/dbcommands.cpp
index e95e188ece6..90d2cd2d5f0 100644
--- a/src/mongo/db/commands/dbcommands.cpp
+++ b/src/mongo/db/commands/dbcommands.cpp
@@ -240,15 +240,16 @@ public:
log() << "repairDatabase " << dbname;
BackgroundOperation::assertNoBgOpInProgForDb(dbname);
- e = cmdObj.getField("preserveClonedFilesOnFailure");
- bool preserveClonedFilesOnFailure = e.isBoolean() && e.boolean();
- e = cmdObj.getField("backupOriginalFiles");
- bool backupOriginalFiles = e.isBoolean() && e.boolean();
+ uassert(ErrorCodes::BadValue,
+ "preserveClonedFilesOnFailure not supported",
+ !cmdObj.getField("preserveClonedFilesOnFailure").trueValue());
+ uassert(ErrorCodes::BadValue,
+ "backupOriginalFiles not supported",
+ !cmdObj.getField("backupOriginalFiles").trueValue());
StorageEngine* engine = getGlobalServiceContext()->getStorageEngine();
repl::UnreplicatedWritesBlock uwb(opCtx);
- Status status = repairDatabase(
- opCtx, engine, dbname, preserveClonedFilesOnFailure, backupOriginalFiles);
+ Status status = repairDatabase(opCtx, engine, dbname);
// Open database before returning
DatabaseHolder::getDatabaseHolder().openDb(opCtx, dbname);
diff --git a/src/mongo/db/commands/fsync.cpp b/src/mongo/db/commands/fsync.cpp
index 1af900475e7..a72454ab115 100644
--- a/src/mongo/db/commands/fsync.cpp
+++ b/src/mongo/db/commands/fsync.cpp
@@ -48,7 +48,6 @@
#include "mongo/db/concurrency/write_conflict_exception.h"
#include "mongo/db/db.h"
#include "mongo/db/service_context.h"
-#include "mongo/db/storage/mmap_v1/dur.h"
#include "mongo/db/storage/storage_engine.h"
#include "mongo/stdx/condition_variable.h"
#include "mongo/util/assert_util.h"
@@ -135,17 +134,6 @@ public:
log() << "CMD fsync: sync:" << sync << " lock:" << lock;
if (!lock) {
- // the simple fsync command case
- if (sync) {
- // can this be GlobalRead? and if it can, it should be nongreedy.
- Lock::GlobalWrite w(opCtx);
- // TODO SERVER-26822: Replace MMAPv1 specific calls with ones that are storage
- // engine agnostic.
- getDur().commitNow(opCtx);
-
- // No WriteUnitOfWork needed, as this does no writes of its own.
- }
-
// Take a global IS lock to ensure the storage engine is not shutdown
Lock::GlobalLock global(opCtx, MODE_IS);
StorageEngine* storageEngine = getGlobalServiceContext()->getStorageEngine();
@@ -345,16 +333,6 @@ void FSyncLockThread::run() {
OperationContext& opCtx = *opCtxPtr;
Lock::GlobalWrite global(&opCtx); // No WriteUnitOfWork needed
- try {
- // TODO SERVER-26822: Replace MMAPv1 specific calls with ones that are storage engine
- // agnostic.
- getDur().syncDataAndTruncateJournal(&opCtx);
- } catch (const std::exception& e) {
- error() << "error doing syncDataAndTruncateJournal: " << e.what();
- fsyncCmd.threadStatus = Status(ErrorCodes::CommandFailed, e.what());
- fsyncCmd.acquireFsyncLockSyncCV.notify_one();
- return;
- }
opCtx.lockState()->downgradeGlobalXtoSForMMAPV1();
StorageEngine* storageEngine = getGlobalServiceContext()->getStorageEngine();
diff --git a/src/mongo/db/db.cpp b/src/mongo/db/db.cpp
index a7e7a7e9d0a..851175c1a8f 100644
--- a/src/mongo/db/db.cpp
+++ b/src/mongo/db/db.cpp
@@ -126,7 +126,6 @@
#include "mongo/db/startup_warnings_mongod.h"
#include "mongo/db/stats/counters.h"
#include "mongo/db/storage/encryption_hooks.h"
-#include "mongo/db/storage/mmap_v1/mmap_v1_options.h"
#include "mongo/db/storage/storage_engine.h"
#include "mongo/db/storage/storage_engine_init.h"
#include "mongo/db/storage/storage_options.h"
@@ -405,21 +404,12 @@ ExitCode _initAndListen(int listenPort) {
uassert(10296, ss.str().c_str(), boost::filesystem::exists(storageGlobalParams.dbpath));
}
- {
- std::stringstream ss;
- ss << "repairpath (" << storageGlobalParams.repairpath << ") does not exist";
- uassert(12590, ss.str().c_str(), boost::filesystem::exists(storageGlobalParams.repairpath));
- }
-
initializeSNMP();
if (!storageGlobalParams.readOnly) {
boost::filesystem::remove_all(storageGlobalParams.dbpath + "/_tmp/");
}
- if (mmapv1GlobalOptions.journalOptions & MMAPV1Options::JournalRecoverOnly)
- return EXIT_NET_ERROR;
-
if (mongodGlobalParams.scriptingEnabled) {
ScriptEngine::setup();
}
diff --git a/src/mongo/db/index/SConscript b/src/mongo/db/index/SConscript
index 6b0b9d12793..3ec4e5a7d90 100644
--- a/src/mongo/db/index/SConscript
+++ b/src/mongo/db/index/SConscript
@@ -85,7 +85,6 @@ serveronlyEnv.Library(
'$BUILD_DIR/mongo/db/concurrency/write_conflict_exception',
'$BUILD_DIR/mongo/db/repl/repl_coordinator_interface',
'$BUILD_DIR/mongo/db/storage/encryption_hooks',
- '$BUILD_DIR/mongo/db/storage/mmap_v1/btree',
'$BUILD_DIR/mongo/db/storage/storage_options',
'$BUILD_DIR/third_party/shim_snappy',
'index_descriptor',
diff --git a/src/mongo/db/index/index_access_method.cpp b/src/mongo/db/index/index_access_method.cpp
index f31dde359cb..7ab26cbb9c2 100644
--- a/src/mongo/db/index/index_access_method.cpp
+++ b/src/mongo/db/index/index_access_method.cpp
@@ -80,14 +80,6 @@ bool isMultikeyFromPaths(const MultikeyPaths& multikeyPaths) {
} // namespace
MONGO_EXPORT_SERVER_PARAMETER(failIndexKeyTooLong, bool, true);
-//
-// Comparison for external sorter interface
-//
-
-// Defined in db/structure/btree/key.cpp
-// XXX TODO: rename to something more descriptive, etc. etc.
-int oldCompare(const BSONObj& l, const BSONObj& r, const Ordering& o);
-
class BtreeExternalSortComparison {
public:
BtreeExternalSortComparison(const BSONObj& ordering, IndexVersion version)
@@ -98,12 +90,8 @@ public:
typedef std::pair<BSONObj, RecordId> Data;
int operator()(const Data& l, const Data& r) const {
- int x = (_version == IndexVersion::kV0
- ? oldCompare(l.first, r.first, _ordering)
- : l.first.woCompare(r.first, _ordering, /*considerfieldname*/ false));
- if (x) {
+ if (int x = l.first.woCompare(r.first, _ordering, /*considerfieldname*/ false))
return x;
- }
return l.second.compare(r.second);
}
@@ -497,11 +485,6 @@ Status IndexAccessMethod::commitBulk(OperationContext* opCtx,
}
WriteUnitOfWork wunit(opCtx);
- // Improve performance in the btree-building phase by disabling rollback tracking.
- // This avoids copying all the written bytes to a buffer that is only used to roll back.
- // Note that this is safe to do, as this entire index-build-in-progress will be cleaned
- // up by the index system.
- opCtx->recoveryUnit()->setRollbackWritesDisabled();
// Get the next datum and add it to the builder.
BulkBuilder::Sorter::Data data = it->next();
diff --git a/src/mongo/db/mongod_options.cpp b/src/mongo/db/mongod_options.cpp
index 510b62dc354..85d50af141f 100644
--- a/src/mongo/db/mongod_options.cpp
+++ b/src/mongo/db/mongod_options.cpp
@@ -44,7 +44,6 @@
#include "mongo/db/repl/repl_settings.h"
#include "mongo/db/server_options.h"
#include "mongo/db/server_options_server_helpers.h"
-#include "mongo/db/storage/mmap_v1/mmap_v1_options.h"
#include "mongo/util/log.h"
#include "mongo/util/mongoutils/str.h"
#include "mongo/util/net/ssl_options.h"
@@ -222,42 +221,6 @@ Status addMongodOptions(moe::OptionSection* options) {
.setSources(moe::SourceAllLegacy);
storage_options
- .addOptionChaining("storage.mmapv1.preallocDataFiles",
- "",
- moe::Bool,
- "disable data file preallocation - will often hurt performance",
- {"storage.preallocDataFiles"})
- .setSources(moe::SourceYAMLConfig);
-
- storage_options
- .addOptionChaining("storage.mmapv1.nsSize",
- "nssize",
- moe::Int,
- ".ns file size (in MB) for new databases",
- {"storage.nsSize"})
- .setDefault(moe::Value(16));
-
- storage_options
- .addOptionChaining("storage.mmapv1.quota.enforced",
- "quota",
- moe::Switch,
- "limits each database to a certain number of files (8 default)",
- {"storage.quota.enforced"})
- .incompatibleWith("keyFile");
-
- storage_options.addOptionChaining("storage.mmapv1.quota.maxFilesPerDB",
- "quotaFiles",
- moe::Int,
- "number of files allowed per db, implies --quota",
- {"storage.quota.maxFilesPerDB"});
-
- storage_options.addOptionChaining("storage.mmapv1.smallFiles",
- "smallfiles",
- moe::Switch,
- "use a smaller default file size",
- {"storage.smallFiles"});
-
- storage_options
.addOptionChaining("storage.syncPeriodSecs",
"syncdelay",
moe::Double,
@@ -272,11 +235,6 @@ Status addMongodOptions(moe::OptionSection* options) {
storage_options.addOptionChaining("repair", "repair", moe::Switch, "run repair on all dbs")
.setSources(moe::SourceAllLegacy);
- storage_options.addOptionChaining("storage.repairPath",
- "repairpath",
- moe::String,
- "root directory for repair files - defaults to dbpath");
-
// Javascript Options
general_options
@@ -319,36 +277,6 @@ Status addMongodOptions(moe::OptionSection* options) {
general_options.addOptionChaining("storage.journal.enabled", "", moe::Bool, "enable journaling")
.setSources(moe::SourceYAMLConfig);
- // Two ways to set durability diagnostic options. durOptions is deprecated
- storage_options
- .addOptionChaining("storage.mmapv1.journal.debugFlags",
- "journalOptions",
- moe::Int,
- "journal diagnostic options",
- {"storage.journal.debugFlags"})
- .incompatibleWith("durOptions");
-
- storage_options
- .addOptionChaining("durOptions", "durOptions", moe::Int, "durability diagnostic options")
- .hidden()
- .setSources(moe::SourceAllLegacy)
- .incompatibleWith("storage.mmapv1.journal.debugFlags");
-
- storage_options.addOptionChaining("storage.journal.commitIntervalMs",
- "journalCommitInterval",
- moe::Int,
- "how often to group/batch commit (ms)",
- {"storage.mmapv1.journal.commitIntervalMs"});
-
- // Deprecated option that we don't want people to use for performance reasons
- storage_options
- .addOptionChaining("storage.mmapv1.journal.nopreallocj",
- "nopreallocj",
- moe::Switch,
- "don't preallocate journal files")
- .hidden()
- .setSources(moe::SourceAll);
-
#if defined(__linux__)
general_options.addOptionChaining(
"shutdown", "shutdown", moe::Switch, "kill a running server (for init scripts)");
@@ -670,24 +598,6 @@ Status canonicalizeMongodOptions(moe::Environment* params) {
}
}
- // "storage.mmapv1.journal.durOptions" comes from the config file, so override it
- // if "durOptions" is set since that comes from the command line.
- if (params->count("durOptions")) {
- int durOptions;
- Status ret = params->get("durOptions", &durOptions);
- if (!ret.isOK()) {
- return ret;
- }
- ret = params->remove("durOptions");
- if (!ret.isOK()) {
- return ret;
- }
- ret = params->set("storage.mmapv1.journal.debugFlags", moe::Value(durOptions));
- if (!ret.isOK()) {
- return ret;
- }
- }
-
// "security.authorization" comes from the config file, so override it if "auth" is
// set since those come from the command line.
if (params->count("auth")) {
@@ -704,20 +614,6 @@ Status canonicalizeMongodOptions(moe::Environment* params) {
}
}
- // "storage.mmapv1.preallocDataFiles" comes from the config file, so override it if "noprealloc"
- // is set since that comes from the command line.
- if (params->count("noprealloc")) {
- Status ret = params->set("storage.mmapv1.preallocDataFiles",
- moe::Value(!(*params)["noprealloc"].as<bool>()));
- if (!ret.isOK()) {
- return ret;
- }
- ret = params->remove("noprealloc");
- if (!ret.isOK()) {
- return ret;
- }
- }
-
// "sharding.archiveMovedChunks" comes from the config file, so override it if
// "noMoveParanoia" or "moveParanoia" are set since those come from the command line.
if (params->count("noMoveParanoia")) {
@@ -935,13 +831,6 @@ Status storeMongodOptions(const moe::Environment& params) {
if (params.count("cpu")) {
serverGlobalParams.cpu = params["cpu"].as<bool>();
}
- if (params.count("storage.mmapv1.quota.enforced")) {
- mmapv1GlobalOptions.quota = params["storage.mmapv1.quota.enforced"].as<bool>();
- }
- if (params.count("storage.mmapv1.quota.maxFilesPerDB")) {
- mmapv1GlobalOptions.quota = true;
- mmapv1GlobalOptions.quotaFiles = params["storage.mmapv1.quota.maxFilesPerDB"].as<int>() - 1;
- }
if (params.count("storage.journal.enabled")) {
storageGlobalParams.dur = params["storage.journal.enabled"].as<bool>();
@@ -961,12 +850,6 @@ Status storeMongodOptions(const moe::Environment& params) {
<< "ms)");
}
}
- if (params.count("storage.mmapv1.journal.debugFlags")) {
- mmapv1GlobalOptions.journalOptions = params["storage.mmapv1.journal.debugFlags"].as<int>();
- }
- if (params.count("storage.mmapv1.journal.nopreallocj")) {
- mmapv1GlobalOptions.preallocj = !params["storage.mmapv1.journal.nopreallocj"].as<bool>();
- }
if (params.count("security.javascriptEnabled")) {
mongodGlobalParams.scriptingEnabled = params["security.javascriptEnabled"].as<bool>();
@@ -984,14 +867,6 @@ Status storeMongodOptions(const moe::Environment& params) {
}
}
- if (params.count("storage.mmapv1.preallocDataFiles")) {
- mmapv1GlobalOptions.prealloc = params["storage.mmapv1.preallocDataFiles"].as<bool>();
- log() << "note: noprealloc may hurt performance in many applications" << endl;
- }
- if (params.count("storage.mmapv1.smallFiles")) {
- mmapv1GlobalOptions.smallfiles = params["storage.mmapv1.smallFiles"].as<bool>();
- }
-
if (params.count("repair") && params["repair"].as<bool>() == true) {
storageGlobalParams.upgrade = 1; // --repair implies --upgrade
storageGlobalParams.repair = 1;
@@ -1028,14 +903,6 @@ Status storeMongodOptions(const moe::Environment& params) {
serverGlobalParams.indexBuildRetry = params["storage.indexBuildRetry"].as<bool>();
}
- if (params.count("storage.mmapv1.nsSize")) {
- int x = params["storage.mmapv1.nsSize"].as<int>();
- if (x <= 0 || x > (0x7fffffff / 1024 / 1024)) {
- return Status(ErrorCodes::BadValue, "bad --nssize arg");
- }
- mmapv1GlobalOptions.lenForNewNsFiles = x * 1024 * 1024;
- verify(mmapv1GlobalOptions.lenForNewNsFiles > 0);
- }
if (params.count("replication.oplogSizeMB")) {
long long x = params["replication.oplogSizeMB"].as<int>();
if (x <= 0) {
@@ -1134,23 +1001,6 @@ Status storeMongodOptions(const moe::Environment& params) {
}
#endif
- // needs to be after things like --configsvr parsing, thus here.
- if (params.count("storage.repairPath")) {
- storageGlobalParams.repairpath = params["storage.repairPath"].as<std::string>();
- if (!storageGlobalParams.repairpath.size()) {
- return Status(ErrorCodes::BadValue, "repairpath is empty");
- }
-
- if (storageGlobalParams.dur &&
- !str::startsWith(storageGlobalParams.repairpath, storageGlobalParams.dbpath)) {
- return Status(ErrorCodes::BadValue,
- "You must use a --repairpath that is a subdirectory of --dbpath when "
- "using journaling");
- }
- } else {
- storageGlobalParams.repairpath = storageGlobalParams.dbpath;
- }
-
// Check if we are 32 bit and have not explicitly specified any journaling options
if (sizeof(void*) == 4 && !params.count("storage.journal.enabled")) {
// trying to make this stand out more like startup warnings
diff --git a/src/mongo/db/prefetch.cpp b/src/mongo/db/prefetch.cpp
deleted file mode 100644
index a55993037bc..00000000000
--- a/src/mongo/db/prefetch.cpp
+++ /dev/null
@@ -1,274 +0,0 @@
-/**
- * Copyright (C) 2008-2014 MongoDB Inc.
- *
- * This program is free software: you can redistribute it and/or modify
- * it under the terms of the GNU Affero General Public License, version 3,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU Affero General Public License for more details.
- *
- * You should have received a copy of the GNU Affero General Public License
- * along with this program. If not, see <http://www.gnu.org/licenses/>.
- *
- * As a special exception, the copyright holders give permission to link the
- * code of portions of this program with the OpenSSL library under certain
- * conditions as described in each individual source file and distribute
- * linked combinations including the program with the OpenSSL library. You
- * must comply with the GNU Affero General Public License in all respects for
- * all of the code used other than as permitted herein. If you modify file(s)
- * with this exception, you may extend this exception to your version of the
- * file(s), but you are not obligated to do so. If you do not wish to do so,
- * delete this exception statement from your version. If you delete this
- * exception statement from all source files in the program, then also delete
- * it in the license file.
- */
-
-#define MONGO_LOG_DEFAULT_COMPONENT ::mongo::logger::LogComponent::kReplication
-
-#include "mongo/platform/basic.h"
-
-#include "mongo/db/prefetch.h"
-
-#include "mongo/db/catalog/collection.h"
-#include "mongo/db/catalog/database.h"
-#include "mongo/db/catalog/index_catalog.h"
-#include "mongo/db/commands/server_status_metric.h"
-#include "mongo/db/concurrency/d_concurrency.h"
-#include "mongo/db/dbhelpers.h"
-#include "mongo/db/index/index_access_method.h"
-#include "mongo/db/jsobj.h"
-#include "mongo/db/repl/oplog_entry.h"
-#include "mongo/db/repl/repl_settings.h"
-#include "mongo/db/repl/replication_coordinator.h"
-#include "mongo/db/server_parameters.h"
-#include "mongo/db/stats/timer_stats.h"
-#include "mongo/db/storage/mmap_v1/mmap.h"
-#include "mongo/util/log.h"
-
-namespace mongo {
-
-using std::endl;
-using std::string;
-
-namespace repl {
-namespace {
-// todo / idea: the prefetcher, when it fetches _id, on an upsert, will see if the record exists. if
-// it does not, at write time, we can just do an insert, which will be faster.
-
-// The count (of batches) and time spent fetching pages before application
-// -- meaning depends on the prefetch behavior: all, _id index, none, etc.)
-TimerStats prefetchIndexStats;
-ServerStatusMetricField<TimerStats> displayPrefetchIndexPages("repl.preload.indexes",
- &prefetchIndexStats);
-TimerStats prefetchDocStats;
-ServerStatusMetricField<TimerStats> displayPrefetchDocPages("repl.preload.docs", &prefetchDocStats);
-
-// page in pages needed for all index lookups on a given object
-void prefetchIndexPages(OperationContext* opCtx,
- Collection* collection,
- const ReplSettings::IndexPrefetchConfig& prefetchConfig,
- const BSONObj& obj) {
- // do we want prefetchConfig to be (1) as-is, (2) for update ops only, or (3) configured per op
- // type? One might want PREFETCH_NONE for updates, but it's more rare that it is a bad idea for
- // inserts. #3 (per op), a big issue would be "too many knobs".
- switch (prefetchConfig) {
- case ReplSettings::IndexPrefetchConfig::PREFETCH_NONE:
- return;
- case ReplSettings::IndexPrefetchConfig::PREFETCH_ID_ONLY: {
- TimerHolder timer(&prefetchIndexStats);
- // on the update op case, the call to prefetchRecordPages will touch the _id index.
- // thus perhaps this option isn't very useful?
- try {
- IndexDescriptor* desc = collection->getIndexCatalog()->findIdIndex(opCtx);
- if (!desc)
- return;
- IndexAccessMethod* iam = collection->getIndexCatalog()->getIndex(desc);
- invariant(iam);
- iam->touch(opCtx, obj).transitional_ignore();
- } catch (const DBException& e) {
- LOG(2) << "ignoring exception in prefetchIndexPages(): " << redact(e);
- }
- break;
- }
- case ReplSettings::IndexPrefetchConfig::PREFETCH_ALL: {
- // indexCount includes all indexes, including ones
- // in the process of being built
- IndexCatalog::IndexIterator ii =
- collection->getIndexCatalog()->getIndexIterator(opCtx, true);
- while (ii.more()) {
- TimerHolder timer(&prefetchIndexStats);
- // This will page in all index pages for the given object.
- try {
- IndexDescriptor* desc = ii.next();
- IndexAccessMethod* iam = collection->getIndexCatalog()->getIndex(desc);
- verify(iam);
- iam->touch(opCtx, obj).transitional_ignore();
- } catch (const DBException& e) {
- LOG(2) << "ignoring exception in prefetchIndexPages(): " << redact(e);
- }
- }
- break;
- }
- default:
- fassertFailed(16427);
- }
-}
-
-// page in the data pages for a record associated with an object
-void prefetchRecordPages(OperationContext* opCtx,
- Database* db,
- const char* ns,
- const BSONObj& obj) {
- BSONElement _id;
- if (obj.getObjectID(_id)) {
- TimerHolder timer(&prefetchDocStats);
- BSONObjBuilder builder;
- builder.append(_id);
- BSONObj result;
- try {
- if (Helpers::findById(opCtx, db, ns, builder.done(), result)) {
- // do we want to use Record::touch() here? it's pretty similar.
- // volatile - avoid compiler optimizations for touching a mmap page
- volatile char _dummy_char = '\0'; // NOLINT
-
- // Touch the first word on every page in order to fault it into memory
- for (int i = 0; i < result.objsize(); i += getMinOSPageSizeBytes()) {
- _dummy_char += *(result.objdata() + i);
- }
- // hit the last page, in case we missed it above
- _dummy_char += *(result.objdata() + result.objsize() - 1);
- }
- } catch (const DBException& e) {
- LOG(2) << "ignoring exception in prefetchRecordPages(): " << redact(e);
- }
- }
-}
-} // namespace
-
-// prefetch for an oplog operation
-void prefetchPagesForReplicatedOp(OperationContext* opCtx,
- Database* db,
- const OplogEntry& oplogEntry) {
- invariant(db);
- const ReplSettings::IndexPrefetchConfig prefetchConfig =
- ReplicationCoordinator::get(opCtx)->getIndexPrefetchConfig();
-
- // Prefetch ignores non-CRUD operations.
- if (!oplogEntry.isCrudOpType()) {
- return;
- }
-
- // This will have to change for engines other than MMAP V1, because they might not have
- // means for directly prefetching pages from the collection. For this purpose, acquire S
- // lock on the database, instead of optimizing with IS.
- const auto& nss = oplogEntry.getNamespace();
- Lock::CollectionLock collLock(opCtx->lockState(), nss.ns(), MODE_S);
-
- Collection* collection = db->getCollection(opCtx, nss);
- if (!collection) {
- return;
- }
-
- auto opType = oplogEntry.getOpType();
- LOG(4) << "index prefetch for op " << OpType_serializer(opType);
-
- // should we prefetch index pages on updates? if the update is in-place and doesn't change
- // indexed values, it is actually slower - a lot slower if there are a dozen indexes or
- // lots of multikeys. possible variations (not all mutually exclusive):
- // 1) current behavior: full prefetch
- // 2) don't do it for updates
- // 3) don't do multikey indexes for updates
- // 4) don't prefetchIndexPages on some heuristic; e.g., if it's an $inc.
- // 5) if not prefetching index pages (#2), we should do it if we are upsertings and it
- // will be an insert. to do that we could do the prefetchRecordPage first and if DNE
- // then we do #1.
- //
- // note that on deletes 'obj' does not have all the keys we would want to prefetch on.
- // a way to achieve that would be to prefetch the record first, and then afterwards do
- // this part.
- //
- auto obj = oplogEntry.getOperationToApply();
- invariant(!obj.isEmpty());
- prefetchIndexPages(opCtx, collection, prefetchConfig, obj);
-
- // do not prefetch the data for inserts; it doesn't exist yet
- //
- // we should consider doing the record prefetch for the delete op case as we hit the record
- // when we delete. note if done we only want to touch the first page.
- //
- // update: do record prefetch.
- if ((opType == OpTypeEnum::kUpdate) &&
- // do not prefetch the data for capped collections because
- // they typically do not have an _id index for findById() to use.
- !collection->isCapped()) {
- prefetchRecordPages(opCtx, db, nss.ns().c_str(), obj);
- }
-}
-
-class ReplIndexPrefetch : public ServerParameter {
-public:
- ReplIndexPrefetch() : ServerParameter(ServerParameterSet::getGlobal(), "replIndexPrefetch") {}
-
- virtual ~ReplIndexPrefetch() {}
-
- const char* _value() {
- if (ReplicationCoordinator::get(getGlobalServiceContext())->getReplicationMode() !=
- ReplicationCoordinator::modeReplSet) {
- return "uninitialized";
- }
- ReplSettings::IndexPrefetchConfig ip =
- ReplicationCoordinator::get(getGlobalServiceContext())->getIndexPrefetchConfig();
- switch (ip) {
- case ReplSettings::IndexPrefetchConfig::PREFETCH_NONE:
- return "none";
- case ReplSettings::IndexPrefetchConfig::PREFETCH_ID_ONLY:
- return "_id_only";
- case ReplSettings::IndexPrefetchConfig::PREFETCH_ALL:
- return "all";
- default:
- return "invalid";
- }
- }
-
- virtual void append(OperationContext* opCtx, BSONObjBuilder& b, const string& name) {
- b.append(name, _value());
- }
-
- virtual Status set(const BSONElement& newValueElement) {
- if (ReplicationCoordinator::get(getGlobalServiceContext())->getReplicationMode() !=
- ReplicationCoordinator::modeReplSet) {
- return Status(ErrorCodes::BadValue, "replication is not enabled");
- }
-
- std::string prefetch = newValueElement.valuestrsafe();
- return setFromString(prefetch);
- }
-
- virtual Status setFromString(const string& prefetch) {
- log() << "changing replication index prefetch behavior to " << prefetch;
-
- ReplSettings::IndexPrefetchConfig prefetchConfig;
-
- if (prefetch == "none")
- prefetchConfig = ReplSettings::IndexPrefetchConfig::PREFETCH_NONE;
- else if (prefetch == "_id_only")
- prefetchConfig = ReplSettings::IndexPrefetchConfig::PREFETCH_ID_ONLY;
- else if (prefetch == "all")
- prefetchConfig = ReplSettings::IndexPrefetchConfig::PREFETCH_ALL;
- else {
- return Status(ErrorCodes::BadValue,
- str::stream() << "unrecognized indexPrefetch setting: " << prefetch);
- }
-
- ReplicationCoordinator::get(getGlobalServiceContext())
- ->setIndexPrefetchConfig(prefetchConfig);
- return Status::OK();
- }
-
-} replIndexPrefetch;
-
-} // namespace repl
-} // namespace mongo
diff --git a/src/mongo/db/prefetch.h b/src/mongo/db/prefetch.h
deleted file mode 100644
index 1f5576e31e7..00000000000
--- a/src/mongo/db/prefetch.h
+++ /dev/null
@@ -1,46 +0,0 @@
-/**
-* Copyright (C) 2008 10gen Inc.
-*
-* This program is free software: you can redistribute it and/or modify
-* it under the terms of the GNU Affero General Public License, version 3,
-* as published by the Free Software Foundation.
-*
-* This program is distributed in the hope that it will be useful,
-* but WITHOUT ANY WARRANTY; without even the implied warranty of
-* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-* GNU Affero General Public License for more details.
-*
-* You should have received a copy of the GNU Affero General Public License
-* along with this program. If not, see <http://www.gnu.org/licenses/>.
-*
-* As a special exception, the copyright holders give permission to link the
-* code of portions of this program with the OpenSSL library under certain
-* conditions as described in each individual source file and distribute
-* linked combinations including the program with the OpenSSL library. You
-* must comply with the GNU Affero General Public License in all respects for
-* all of the code used other than as permitted herein. If you modify file(s)
-* with this exception, you may extend this exception to your version of the
-* file(s), but you are not obligated to do so. If you do not wish to do so,
-* delete this exception statement from your version. If you delete this
-* exception statement from all source files in the program, then also delete
-* it in the license file.
-*/
-#pragma once
-
-namespace mongo {
-
-class BSONObj;
-class Database;
-class OperationContext;
-
-namespace repl {
-
-class OplogEntry;
-
-// page in possible index and/or data pages for an op from the oplog
-void prefetchPagesForReplicatedOp(OperationContext* opCtx,
- Database* db,
- const OplogEntry& oplogEntry);
-
-} // namespace repl
-} // namespace mongo
diff --git a/src/mongo/db/repair_database.cpp b/src/mongo/db/repair_database.cpp
index df89ce310c6..eafca15fe15 100644
--- a/src/mongo/db/repair_database.cpp
+++ b/src/mongo/db/repair_database.cpp
@@ -51,7 +51,6 @@
#include "mongo/db/catalog/uuid_catalog.h"
#include "mongo/db/index/index_descriptor.h"
#include "mongo/db/logical_clock.h"
-#include "mongo/db/storage/mmap_v1/repair_database_interface.h"
#include "mongo/db/storage/storage_engine.h"
#include "mongo/util/log.h"
#include "mongo/util/scopeguard.h"
@@ -230,11 +229,7 @@ Status rebuildIndexesOnCollection(OperationContext* opCtx,
return Status::OK();
}
-Status repairDatabase(OperationContext* opCtx,
- StorageEngine* engine,
- const std::string& dbName,
- bool preserveClonedFilesOnFailure,
- bool backupOriginalFiles) {
+Status repairDatabase(OperationContext* opCtx, StorageEngine* engine, const std::string& dbName) {
DisableDocumentValidation validationDisabler(opCtx);
// We must hold some form of lock here
@@ -247,24 +242,6 @@ Status repairDatabase(OperationContext* opCtx,
opCtx->checkForInterrupt();
- if (engine->isMmapV1()) {
- // MMAPv1 is a layering violation so it implements its own repairDatabase. Call through a
- // shimmed interface, so the symbol can exist independent of mmapv1.
- auto status = repairDatabaseMmapv1(
- engine, opCtx, dbName, preserveClonedFilesOnFailure, backupOriginalFiles);
- // Restore oplog Collection pointer cache.
- repl::acquireOplogCollectionForLogging(opCtx);
- return status;
- }
-
- // These are MMAPv1 specific
- if (preserveClonedFilesOnFailure) {
- return Status(ErrorCodes::BadValue, "preserveClonedFilesOnFailure not supported");
- }
- if (backupOriginalFiles) {
- return Status(ErrorCodes::BadValue, "backupOriginalFiles not supported");
- }
-
// Close the db and invalidate all current users and caches.
DatabaseHolder::getDatabaseHolder().close(opCtx, dbName, "database closed for repair");
ON_BLOCK_EXIT([&dbName, &opCtx] {
diff --git a/src/mongo/db/repair_database.h b/src/mongo/db/repair_database.h
index 1aa3d4bb911..55dbc05b52b 100644
--- a/src/mongo/db/repair_database.h
+++ b/src/mongo/db/repair_database.h
@@ -73,9 +73,5 @@ Status rebuildIndexesOnCollection(OperationContext* opCtx,
* Some data may be lost or modified in the process but the output will
* be structurally valid on successful return.
*/
-Status repairDatabase(OperationContext* opCtx,
- StorageEngine* engine,
- const std::string& dbName,
- bool preserveClonedFilesOnFailure = false,
- bool backupOriginalFiles = false);
+Status repairDatabase(OperationContext* opCtx, StorageEngine* engine, const std::string& dbName);
}
diff --git a/src/mongo/db/repair_database_and_check_version.cpp b/src/mongo/db/repair_database_and_check_version.cpp
index 6b9d121f9dd..54be809d90e 100644
--- a/src/mongo/db/repair_database_and_check_version.cpp
+++ b/src/mongo/db/repair_database_and_check_version.cpp
@@ -50,7 +50,6 @@
#include "mongo/db/repl/drop_pending_collection_reaper.h"
#include "mongo/db/repl/replication_coordinator.h"
#include "mongo/db/server_options.h"
-#include "mongo/db/storage/mmap_v1/mmap_v1_options.h"
#include "mongo/util/exit.h"
#include "mongo/util/log.h"
#include "mongo/util/quick_exit.h"
@@ -501,7 +500,7 @@ StatusWith<bool> repairDatabasesAndCheckVersion(OperationContext* opCtx) {
if (replSettings.usingReplSets()) {
// We only care about _id indexes and drop-pending collections if we are in a replset.
checkForIdIndexesAndDropPendingCollections(opCtx, db);
- // Ensure oplog is capped (mmap does not guarantee order of inserts on noncapped
+ // Ensure oplog is capped (mongodb does not guarantee order of inserts on noncapped
// collections)
if (db->name() == "local") {
checkForCappedOplog(opCtx, db);
@@ -519,13 +518,7 @@ StatusWith<bool> repairDatabasesAndCheckVersion(OperationContext* opCtx) {
if (!fcvDocumentExists && nonLocalDatabases) {
severe()
<< "Unable to start up mongod due to missing featureCompatibilityVersion document.";
- if (opCtx->getServiceContext()->getStorageEngine()->isMmapV1()) {
- severe() << "Please run with --journalOptions "
- << static_cast<int>(MMAPV1Options::JournalRecoverOnly)
- << " to recover the journal. Then run with --repair to restore the document.";
- } else {
- severe() << "Please run with --repair to restore the document.";
- }
+ severe() << "Please run with --repair to restore the document.";
fassertFailedNoTrace(40652);
}
diff --git a/src/mongo/db/repl/SConscript b/src/mongo/db/repl/SConscript
index e9a4998db59..e413e6d8323 100644
--- a/src/mongo/db/repl/SConscript
+++ b/src/mongo/db/repl/SConscript
@@ -659,7 +659,6 @@ env.Library(
'$BUILD_DIR/mongo/db/concurrency/lock_manager',
'$BUILD_DIR/mongo/db/concurrency/write_conflict_exception',
'$BUILD_DIR/mongo/db/curop',
- '$BUILD_DIR/mongo/db/prefetch',
'$BUILD_DIR/mongo/db/query_exec',
'$BUILD_DIR/mongo/db/s/sharding_runtime_d',
'$BUILD_DIR/mongo/db/stats/timer_stats',
diff --git a/src/mongo/db/repl/replication_coordinator_external_state.h b/src/mongo/db/repl/replication_coordinator_external_state.h
index c9c6d4192b4..e70f68f47a8 100644
--- a/src/mongo/db/repl/replication_coordinator_external_state.h
+++ b/src/mongo/db/repl/replication_coordinator_external_state.h
@@ -110,12 +110,6 @@ public:
virtual ThreadPool* getDbWorkThreadPool() const = 0;
/**
- * Runs the repair database command on the "local" db, if the storage engine is MMapV1.
- * Note: Used after initial sync to compact the database files.
- */
- virtual Status runRepairOnLocalDB(OperationContext* opCtx) = 0;
-
- /**
* Creates the oplog, writes the first entry and stores the replica set config document.
*/
virtual Status initializeReplSetStorage(OperationContext* opCtx, const BSONObj& config) = 0;
diff --git a/src/mongo/db/repl/replication_coordinator_external_state_impl.cpp b/src/mongo/db/repl/replication_coordinator_external_state_impl.cpp
index 809cb5140b5..a17d3506ca7 100644
--- a/src/mongo/db/repl/replication_coordinator_external_state_impl.cpp
+++ b/src/mongo/db/repl/replication_coordinator_external_state_impl.cpp
@@ -407,26 +407,6 @@ ThreadPool* ReplicationCoordinatorExternalStateImpl::getDbWorkThreadPool() const
return _writerPool.get();
}
-Status ReplicationCoordinatorExternalStateImpl::runRepairOnLocalDB(OperationContext* opCtx) {
- try {
- Lock::GlobalWrite globalWrite(opCtx);
- StorageEngine* engine = getGlobalServiceContext()->getStorageEngine();
-
- if (!engine->isMmapV1()) {
- return Status::OK();
- }
-
- UnreplicatedWritesBlock uwb(opCtx);
- Status status = repairDatabase(opCtx, engine, localDbName, false, false);
-
- // Open database before returning
- DatabaseHolder::getDatabaseHolder().openDb(opCtx, localDbName);
- } catch (const DBException& ex) {
- return ex.toStatus();
- }
- return Status::OK();
-}
-
Status ReplicationCoordinatorExternalStateImpl::initializeReplSetStorage(OperationContext* opCtx,
const BSONObj& config) {
try {
diff --git a/src/mongo/db/repl/replication_coordinator_external_state_impl.h b/src/mongo/db/repl/replication_coordinator_external_state_impl.h
index 8607be1af20..797175b8111 100644
--- a/src/mongo/db/repl/replication_coordinator_external_state_impl.h
+++ b/src/mongo/db/repl/replication_coordinator_external_state_impl.h
@@ -76,7 +76,6 @@ public:
virtual void shutdown(OperationContext* opCtx);
virtual executor::TaskExecutor* getTaskExecutor() const override;
virtual ThreadPool* getDbWorkThreadPool() const override;
- virtual Status runRepairOnLocalDB(OperationContext* opCtx) override;
virtual Status initializeReplSetStorage(OperationContext* opCtx, const BSONObj& config);
virtual void waitForAllEarlierOplogWritesToBeVisible(OperationContext* opCtx);
void onDrainComplete(OperationContext* opCtx) override;
diff --git a/src/mongo/db/repl/replication_coordinator_external_state_mock.cpp b/src/mongo/db/repl/replication_coordinator_external_state_mock.cpp
index 5a4ab25d6e4..30568571626 100644
--- a/src/mongo/db/repl/replication_coordinator_external_state_mock.cpp
+++ b/src/mongo/db/repl/replication_coordinator_external_state_mock.cpp
@@ -72,10 +72,6 @@ void ReplicationCoordinatorExternalStateMock::startSteadyStateReplication(Operat
void ReplicationCoordinatorExternalStateMock::stopDataReplication(OperationContext*) {}
-Status ReplicationCoordinatorExternalStateMock::runRepairOnLocalDB(OperationContext* opCtx) {
- return Status::OK();
-}
-
Status ReplicationCoordinatorExternalStateMock::initializeReplSetStorage(OperationContext* opCtx,
const BSONObj& config) {
return storeLocalConfigDocument(opCtx, config);
diff --git a/src/mongo/db/repl/replication_coordinator_external_state_mock.h b/src/mongo/db/repl/replication_coordinator_external_state_mock.h
index e8169616079..05d08eb0f6d 100644
--- a/src/mongo/db/repl/replication_coordinator_external_state_mock.h
+++ b/src/mongo/db/repl/replication_coordinator_external_state_mock.h
@@ -65,7 +65,6 @@ public:
virtual void shutdown(OperationContext* opCtx);
virtual executor::TaskExecutor* getTaskExecutor() const override;
virtual ThreadPool* getDbWorkThreadPool() const override;
- virtual Status runRepairOnLocalDB(OperationContext* opCtx) override;
virtual Status initializeReplSetStorage(OperationContext* opCtx, const BSONObj& config);
virtual void waitForAllEarlierOplogWritesToBeVisible(OperationContext* opCtx);
void onDrainComplete(OperationContext* opCtx) override;
diff --git a/src/mongo/db/repl/replication_coordinator_impl.cpp b/src/mongo/db/repl/replication_coordinator_impl.cpp
index b6d02ba07c0..478713735b5 100644
--- a/src/mongo/db/repl/replication_coordinator_impl.cpp
+++ b/src/mongo/db/repl/replication_coordinator_impl.cpp
@@ -698,13 +698,11 @@ void ReplicationCoordinatorImpl::_startDataReplication(OperationContext* opCtx,
if (startCompleted) {
startCompleted();
}
- // Repair local db (to compact it).
- auto opCtxHolder = cc().makeOperationContext();
- uassertStatusOK(_externalState->runRepairOnLocalDB(opCtxHolder.get()));
// Because initial sync completed, we can only be in STARTUP2, not REMOVED.
// Transition from STARTUP2 to RECOVERING and start the producer and the applier.
invariant(getMemberState().startup2());
invariant(setFollowerMode(MemberState::RS_RECOVERING));
+ auto opCtxHolder = cc().makeOperationContext();
_externalState->startSteadyStateReplication(opCtxHolder.get(), this);
};
diff --git a/src/mongo/db/repl/sync_tail.cpp b/src/mongo/db/repl/sync_tail.cpp
index 300b2c67913..94fbca8fa6f 100644
--- a/src/mongo/db/repl/sync_tail.cpp
+++ b/src/mongo/db/repl/sync_tail.cpp
@@ -55,7 +55,6 @@
#include "mongo/db/logical_session_id.h"
#include "mongo/db/multi_key_path_tracker.h"
#include "mongo/db/namespace_string.h"
-#include "mongo/db/prefetch.h"
#include "mongo/db/query/query_knobs.h"
#include "mongo/db/repl/applier_helpers.h"
#include "mongo/db/repl/apply_ops.h"
@@ -349,38 +348,6 @@ const OplogApplier::Options& SyncTail::getOptions() const {
namespace {
-// The pool threads call this to prefetch each op
-void prefetchOp(const OplogEntry& oplogEntry) {
- const auto& nss = oplogEntry.getNamespace();
- if (!nss.isEmpty()) {
- try {
- // one possible tweak here would be to stay in the read lock for this database
- // for multiple prefetches if they are for the same database.
- const ServiceContext::UniqueOperationContext opCtxPtr = cc().makeOperationContext();
- OperationContext& opCtx = *opCtxPtr;
- AutoGetCollectionForReadCommand ctx(&opCtx, nss);
- Database* db = ctx.getDb();
- if (db) {
- prefetchPagesForReplicatedOp(&opCtx, db, oplogEntry);
- }
- } catch (const DBException& e) {
- LOG(2) << "ignoring exception in prefetchOp(): " << redact(e) << endl;
- } catch (const std::exception& e) {
- log() << "Unhandled std::exception in prefetchOp(): " << redact(e.what()) << endl;
- fassertFailed(16397);
- }
- }
-}
-
-// Doles out all the work to the reader pool threads and waits for them to complete
-void prefetchOps(const MultiApplier::Operations& ops, ThreadPool* prefetcherPool) {
- invariant(prefetcherPool);
- for (auto&& op : ops) {
- invariant(prefetcherPool->schedule([&] { prefetchOp(op); }));
- }
- prefetcherPool->waitForIdle();
-}
-
// Doles out all the work to the writer pool threads.
// Does not modify writerVectors, but passes non-const pointers to inner vectors into func.
void applyOps(std::vector<MultiApplier::OperationPtrs>& writerVectors,
@@ -1255,11 +1222,6 @@ Status multiSyncApply(OperationContext* opCtx,
StatusWith<OpTime> SyncTail::multiApply(OperationContext* opCtx, MultiApplier::Operations ops) {
invariant(!ops.empty());
- if (isMMAPV1()) {
- // Use a ThreadPool to prefetch all the operations in a batch.
- prefetchOps(ops, _writerPool);
- }
-
LOG(2) << "replication batch size is " << ops.size();
// Stop all readers until we're done. This also prevents doc-locking engines from deleting old
// entries from the oplog until we finish writing.
diff --git a/src/mongo/db/storage/SConscript b/src/mongo/db/storage/SConscript
index 0a2e629e9b6..988e5ffc40c 100644
--- a/src/mongo/db/storage/SConscript
+++ b/src/mongo/db/storage/SConscript
@@ -8,7 +8,6 @@ env.SConscript(
'devnull',
'ephemeral_for_test',
'kv',
- 'mmap_v1',
'wiredtiger',
'mobile',
],
@@ -246,9 +245,6 @@ env.Library(
'$BUILD_DIR/mongo/base',
'$BUILD_DIR/mongo/db/bson/dotted_path_support',
],
- LIBDEPS_PRIVATE=[
- '$BUILD_DIR/mongo/db/storage/mmap_v1/paths',
- ],
)
env.CppUnitTest(
diff --git a/src/mongo/db/storage/ephemeral_for_test/ephemeral_for_test_recovery_unit.h b/src/mongo/db/storage/ephemeral_for_test/ephemeral_for_test_recovery_unit.h
index b5e99506bf8..b64ec36da99 100644
--- a/src/mongo/db/storage/ephemeral_for_test/ephemeral_for_test_recovery_unit.h
+++ b/src/mongo/db/storage/ephemeral_for_test/ephemeral_for_test_recovery_unit.h
@@ -64,12 +64,6 @@ public:
_changes.push_back(ChangePtr(change));
}
- virtual void* writingPtr(void* data, size_t len) {
- MONGO_UNREACHABLE;
- }
-
- virtual void setRollbackWritesDisabled() {}
-
virtual SnapshotId getSnapshotId() const {
return SnapshotId();
}
diff --git a/src/mongo/db/storage/mmap_v1/SConscript b/src/mongo/db/storage/mmap_v1/SConscript
deleted file mode 100644
index 857bcfac99f..00000000000
--- a/src/mongo/db/storage/mmap_v1/SConscript
+++ /dev/null
@@ -1,310 +0,0 @@
-# -*- mode: python -*-
-
-Import("env")
-Import("mmapv1")
-
-env = env.Clone()
-
-env.Library(
- target='paths',
- source=[
- 'paths.cpp',
- ],
- LIBDEPS=[
- '$BUILD_DIR/mongo/base',
- ],
-)
-
-env.Library(
- target='mmap_v1_options',
- source=[
- 'mmap_v1_options.cpp',
- ],
-)
-
-env.Library(
- target='storage_mmapv1',
- source=[
- "aligned_builder.cpp",
- "catalog/hashtab.cpp",
- "catalog/index_details.cpp",
- "catalog/namespace_details.cpp",
- "catalog/namespace_details_collection_entry.cpp",
- "catalog/namespace_details_rsv1_metadata.cpp",
- "catalog/namespace_index.cpp",
- "commit_notifier.cpp",
- "data_file.cpp",
- "data_file_sync.cpp",
- "durable_mapped_file.cpp",
- "dur.cpp",
- "durop.cpp",
- "dur_preplogbuffer.cpp",
- "dur_commitjob.cpp",
- "dur_recover.cpp",
- "dur_journal.cpp",
- "dur_journal_writer.cpp",
- "dur_recovery_unit.cpp",
- "journal_latency_test_cmd.cpp",
- "mmap_v1_database_catalog_entry.cpp",
- "mmap_v1_engine.cpp",
- "mmap_v1_extent_manager.cpp",
- "mmap_v1_init.cpp" if mmapv1 else "mmap_v1_noinit.cpp",
- "repair_database.cpp",
- ],
- LIBDEPS=[
- 'record_store_v1',
- 'record_access_tracker',
- 'repair_database_interface',
- 'btree',
- 'file_allocator',
- 'logfile',
- 'compress',
- 'paths',
- 'mmap_v1_options',
- '$BUILD_DIR/mongo/db/catalog/collection_options',
- '$BUILD_DIR/mongo/db/catalog/database',
- '$BUILD_DIR/mongo/db/catalog/database_holder',
- '$BUILD_DIR/mongo/db/catalog/index_catalog',
- '$BUILD_DIR/mongo/db/catalog/index_create',
- '$BUILD_DIR/mongo/db/commands',
- '$BUILD_DIR/mongo/db/concurrency/lock_manager',
- '$BUILD_DIR/mongo/db/index_names',
- '$BUILD_DIR/mongo/db/index/index_descriptor',
- '$BUILD_DIR/mongo/db/storage/journal_listener',
- '$BUILD_DIR/mongo/db/storage/kv/kv_prefix',
- '$BUILD_DIR/mongo/db/storage/storage_engine_lock_file',
- '$BUILD_DIR/mongo/db/storage/storage_engine_metadata',
- '$BUILD_DIR/mongo/db/index/index_access_methods',
- '$BUILD_DIR/mongo/db/write_ops',
- ],
- LIBDEPS_PRIVATE=[
- '$BUILD_DIR/mongo/db/commands/server_status',
- '$BUILD_DIR/mongo/db/commands/test_commands_enabled',
- '$BUILD_DIR/mongo/db/storage/storage_engine_common',
- ],
-)
-
-env.Library(
- target = 'repair_database_interface',
- source = [
- "repair_database_interface.cpp",
- ],
- LIBDEPS = [
- ],
-)
-
-compressEnv = env.Clone()
-compressEnv.InjectThirdPartyIncludePaths(libraries=['snappy'])
-compressEnv
-compressEnv.Library(
- target='compress',
- source=[
- 'compress.cpp',
- ],
- LIBDEPS=[
- 'paths',
- '$BUILD_DIR/third_party/shim_snappy',
- ],
-)
-
-env.Library(
- target= 'extent',
- source= [
- 'extent.cpp',
- 'extent_manager.cpp',
- ],
- LIBDEPS= [
- '$BUILD_DIR/mongo/base',
- ]
- )
-
-env.Library(
- target='file_allocator',
- source=[
- 'file_allocator.cpp',
- ],
- LIBDEPS=[
- 'paths',
- '$BUILD_DIR/mongo/util/fail_point',
- '$BUILD_DIR/mongo/util/processinfo',
- ],
-)
-
-env.Library(
- target='logfile',
- source=[
- 'logfile.cpp',
- ],
- LIBDEPS=[
- 'mmap',
- 'paths',
- ],
-)
-
-env.Library(
- target='mmap',
- source=[
- 'mmap.cpp',
- 'mmap_${TARGET_OS_FAMILY}.cpp',
- ],
- LIBDEPS=[
- '$BUILD_DIR/mongo/db/concurrency/lock_manager',
- '$BUILD_DIR/mongo/db/service_context',
- '$BUILD_DIR/mongo/db/storage/storage_options',
- '$BUILD_DIR/mongo/util/progress_meter',
- 'file_allocator',
- ],
-)
-
-env.Library(
- target= 'record_store_v1',
- source= [
- 'record_store_v1_base.cpp',
- 'record_store_v1_capped.cpp',
- 'record_store_v1_capped_iterator.cpp',
- 'record_store_v1_repair_iterator.cpp',
- 'record_store_v1_simple.cpp',
- 'record_store_v1_simple_iterator.cpp',
- 'touch_pages.cpp',
- ],
- LIBDEPS= [
- '$BUILD_DIR/mongo/db/commands/server_status_core',
- '$BUILD_DIR/mongo/db/curop',
- '$BUILD_DIR/mongo/db/service_context',
- '$BUILD_DIR/mongo/db/storage/storage_options',
- '$BUILD_DIR/mongo/util/concurrency/spin_lock',
- '$BUILD_DIR/mongo/util/progress_meter',
- 'extent',
- ]
- )
-
-env.Library(
- target='record_store_v1_test_help',
- source=['record_store_v1_test_help.cpp',
- ],
- LIBDEPS=[
- '$BUILD_DIR/mongo/unittest/unittest',
- 'record_store_v1'
- ]
- )
-
-env.Library(
- target='record_access_tracker',
- source=['record_access_tracker.cpp',
- ],
- LIBDEPS=[
- '$BUILD_DIR/mongo/base',
- '$BUILD_DIR/mongo/util/net/network',
- '$BUILD_DIR/mongo/util/processinfo',
- ]
- )
-
-env.Library(
- target= 'btree',
- source= [
- 'btree/btree_logic.cpp',
- 'btree/btree_interface.cpp',
- 'btree/btree_ondisk.cpp',
- 'btree/key.cpp'
- ],
- LIBDEPS= [
- '$BUILD_DIR/mongo/base',
- '$BUILD_DIR/mongo/db/service_context',
- 'record_store_v1',
- ]
- )
-
-if mmapv1:
- env.CppUnitTest(
- target='storage_engine_mmap_v1_init_test',
- source=['mmap_v1_init_test.cpp',
- ],
- LIBDEPS=[
- '$BUILD_DIR/mongo/db/auth/authmocks',
- '$BUILD_DIR/mongo/db/serveronly',
- '$BUILD_DIR/mongo/db/service_context',
- '$BUILD_DIR/mongo/db/service_context_d',
- '$BUILD_DIR/mongo/db/storage/storage_engine_metadata',
- '$BUILD_DIR/mongo/db/storage/storage_options',
- ],
- )
-
- env.CppUnitTest(target = 'record_access_tracker_test',
- source = ['record_access_tracker_test.cpp'],
- LIBDEPS = ['record_access_tracker',
- '$BUILD_DIR/mongo/util/clock_source_mock',
- '$BUILD_DIR/mongo/util/processinfo',
- '$BUILD_DIR/mongo/util/net/network'])
-
- env.CppUnitTest(target = 'namespace_test',
- source = ['catalog/namespace_test.cpp'],
- LIBDEPS = ['$BUILD_DIR/mongo/base'])
-
- env.CppUnitTest(
- target='record_store_v1_simple_test',
- source=['record_store_v1_simple_test.cpp',
- ],
- LIBDEPS=[
- 'record_store_v1_test_help'
- ]
- )
-
- env.CppUnitTest(
- target='record_store_v1_capped_test',
- source=['record_store_v1_capped_test.cpp',
- ],
- LIBDEPS=[
- 'record_store_v1_test_help'
- ]
- )
-
-
- env.CppUnitTest(
- target='record_store_v1_test',
- source=['mmap_v1_record_store_test.cpp',
- ],
- LIBDEPS=[
- 'record_store_v1_test_help',
- '$BUILD_DIR/mongo/db/storage/record_store_test_harness'
- ]
- )
-
- env.Library(
- target= 'btree_test_help',
- source= [
- 'btree/btree_test_help.cpp',
- 'heap_record_store_btree.cpp'
- ],
- LIBDEPS= [
- 'btree',
- 'record_store_v1_test_help',
- ]
- )
-
- env.CppUnitTest(
- target='btree_logic_test',
- source=['btree/btree_logic_test.cpp'
- ],
- LIBDEPS=[
- 'btree_test_help'
- ]
- )
-
- env.CppUnitTest(
- target='btree_interface_test',
- source=['btree/btree_interface_test.cpp'
- ],
- LIBDEPS=[
- 'btree_test_help',
- '$BUILD_DIR/mongo/db/storage/sorted_data_interface_test_harness'
- ]
- )
-
- env.CppUnitTest(
- target='data_file_version_test',
- source=[
- 'data_file_version_test.cpp',
- ],
- LIBDEPS=[
- ],
- )
diff --git a/src/mongo/db/storage/mmap_v1/aligned_builder.cpp b/src/mongo/db/storage/mmap_v1/aligned_builder.cpp
deleted file mode 100644
index 96e7ddd936e..00000000000
--- a/src/mongo/db/storage/mmap_v1/aligned_builder.cpp
+++ /dev/null
@@ -1,175 +0,0 @@
-/**
- * Copyright (C) 2009-2014 MongoDB Inc.
- *
- * This program is free software: you can redistribute it and/or modify
- * it under the terms of the GNU Affero General Public License, version 3,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU Affero General Public License for more details.
- *
- * You should have received a copy of the GNU Affero General Public License
- * along with this program. If not, see <http://www.gnu.org/licenses/>.
- *
- * As a special exception, the copyright holders give permission to link the
- * code of portions of this program with the OpenSSL library under certain
- * conditions as described in each individual source file and distribute
- * linked combinations including the program with the OpenSSL library. You
- * must comply with the GNU Affero General Public License in all respects for
- * all of the code used other than as permitted herein. If you modify file(s)
- * with this exception, you may extend this exception to your version of the
- * file(s), but you are not obligated to do so. If you do not wish to do so,
- * delete this exception statement from your version. If you delete this
- * exception statement from all source files in the program, then also delete
- * it in the license file.
- */
-
-#define MONGO_LOG_DEFAULT_COMPONENT ::mongo::logger::LogComponent::kDefault
-
-#include "mongo/platform/basic.h"
-
-#include "mongo/db/storage/mmap_v1/aligned_builder.h"
-
-#include "mongo/base/static_assert.h"
-#include "mongo/util/debug_util.h"
-#include "mongo/util/log.h"
-
-namespace mongo {
-
-using std::endl;
-
-AlignedBuilder::AlignedBuilder(unsigned initSize) {
- _len = 0;
- _malloc(initSize);
- uassert(13584, "out of memory AlignedBuilder", _p._allocationAddress);
-}
-
-MONGO_STATIC_ASSERT(sizeof(void*) == sizeof(size_t));
-
-/** reset for a re-use. shrinks if > 128MB */
-void AlignedBuilder::reset() {
- _len = 0;
- RARELY {
- const unsigned sizeCap = 128 * 1024 * 1024;
- if (_p._size > sizeCap)
- _realloc(sizeCap, _len);
- }
-}
-
-/** reset with a hint as to the upcoming needed size specified */
-void AlignedBuilder::reset(unsigned sz) {
- _len = 0;
- unsigned Q = 32 * 1024 * 1024 - 1;
- unsigned want = (sz + Q) & (~Q);
- if (_p._size == want) {
- return;
- }
- if (_p._size > want) {
- if (_p._size <= 64 * 1024 * 1024)
- return;
- bool downsize = false;
- RARELY {
- downsize = true;
- }
- if (!downsize)
- return;
- }
- _realloc(want, _len);
-}
-
-void AlignedBuilder::mallocSelfAligned(unsigned sz) {
- verify(sz == _p._size);
- void* p = malloc(sz + Alignment - 1);
- _p._allocationAddress = p;
- size_t s = (size_t)p;
- size_t sold = s;
- s += Alignment - 1;
- s = (s / Alignment) * Alignment;
- verify(s >= sold); // beginning
- verify((s + sz) <= (sold + sz + Alignment - 1)); // end
- _p._data = (char*)s;
-}
-
-/* "slow"/infrequent portion of 'grow()' */
-void NOINLINE_DECL AlignedBuilder::growReallocate(unsigned oldLen) {
- const unsigned MB = 1024 * 1024;
- const unsigned kMaxSize = (sizeof(int*) == 4) ? 512 * MB : 2000 * MB;
- const unsigned kWarnSize = (sizeof(int*) == 4) ? 256 * MB : 512 * MB;
-
- const unsigned oldSize = _p._size;
-
- // Warn for unexpectedly large buffer
- if (_len > kWarnSize) {
- warning() << "large amount of uncommitted data (" << _len << " bytes)";
- }
-
- // Check validity of requested size
- invariant(_len > oldSize);
- if (_len > kMaxSize) {
- error() << "error writing journal: too much uncommitted data (" << _len << " bytes)";
- error() << "shutting down immediately to avoid corruption";
- fassert(28614, _len <= kMaxSize);
- }
-
- // Use smaller maximum for debug builds, as we should never be close the the maximum
- dassert(_len <= 1000 * MB);
-
- // Compute newSize by doubling the existing maximum size until the maximum is reached
- invariant(oldSize > 0);
- uint64_t newSize = oldSize; // use 64 bits to defend against accidental overflow
- while (newSize < _len) {
- newSize *= 2;
- }
-
- if (newSize > kMaxSize) {
- newSize = kMaxSize;
- }
-
- _realloc(newSize, oldLen);
-}
-
-void AlignedBuilder::_malloc(unsigned sz) {
- _p._size = sz;
-#if defined(_WIN32)
- void* p = VirtualAlloc(0, sz, MEM_COMMIT | MEM_RESERVE, PAGE_READWRITE);
- _p._allocationAddress = p;
- _p._data = (char*)p;
-#elif defined(__linux__)
- // in theory #ifdef _POSIX_VERSION should work, but it doesn't on OS X 10.4, and needs to be
- // tested on solaris. so for now, linux only for this.
- void* p = 0;
- int res = posix_memalign(&p, Alignment, sz);
- massert(13524, "out of memory AlignedBuilder", res == 0);
- _p._allocationAddress = p;
- _p._data = (char*)p;
-#else
- mallocSelfAligned(sz);
- verify(((size_t)_p._data) % Alignment == 0);
-#endif
-}
-
-void AlignedBuilder::_realloc(unsigned newSize, unsigned oldLen) {
- // posix_memalign alignment is not maintained on reallocs, so we can't use realloc().
- AllocationInfo old = _p;
- _malloc(newSize);
- verify(oldLen <= _len);
- memcpy(_p._data, old._data, oldLen);
- _free(old._allocationAddress);
-}
-
-void AlignedBuilder::_free(void* p) {
-#if defined(_WIN32)
- VirtualFree(p, 0, MEM_RELEASE);
-#else
- free(p);
-#endif
-}
-
-void AlignedBuilder::kill() {
- _free(_p._allocationAddress);
- _p._allocationAddress = 0;
- _p._data = 0;
-}
-}
diff --git a/src/mongo/db/storage/mmap_v1/aligned_builder.h b/src/mongo/db/storage/mmap_v1/aligned_builder.h
deleted file mode 100644
index f43cbee7d5d..00000000000
--- a/src/mongo/db/storage/mmap_v1/aligned_builder.h
+++ /dev/null
@@ -1,149 +0,0 @@
-/**
- * Copyright (C) 2009-2014 MongoDB Inc.
- *
- * This program is free software: you can redistribute it and/or modify
- * it under the terms of the GNU Affero General Public License, version 3,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU Affero General Public License for more details.
- *
- * You should have received a copy of the GNU Affero General Public License
- * along with this program. If not, see <http://www.gnu.org/licenses/>.
- *
- * As a special exception, the copyright holders give permission to link the
- * code of portions of this program with the OpenSSL library under certain
- * conditions as described in each individual source file and distribute
- * linked combinations including the program with the OpenSSL library. You
- * must comply with the GNU Affero General Public License in all respects for
- * all of the code used other than as permitted herein. If you modify file(s)
- * with this exception, you may extend this exception to your version of the
- * file(s), but you are not obligated to do so. If you do not wish to do so,
- * delete this exception statement from your version. If you delete this
- * exception statement from all source files in the program, then also delete
- * it in the license file.
- */
-
-#pragma once
-
-#include "mongo/base/string_data.h"
-#include "mongo/bson/util/builder.h"
-
-namespace mongo {
-
-/** a page-aligned BufBuilder. */
-class AlignedBuilder {
-public:
- AlignedBuilder(unsigned init_size);
- ~AlignedBuilder() {
- kill();
- }
-
- /** reset with a hint as to the upcoming needed size specified */
- void reset(unsigned sz);
-
- /** reset for a re-use. shrinks if > 128MB */
- void reset();
-
- /** note this may be deallocated (realloced) if you keep writing or reset(). */
- const char* buf() const {
- return _p._data;
- }
-
- /** leave room for some stuff later
- @return offset in the buffer that was our current position
- */
- size_t skip(unsigned n) {
- unsigned l = len();
- grow(n);
- return l;
- }
-
- /** if buffer grows pointer no longer valid */
- char* atOfs(unsigned ofs) {
- return _p._data + ofs;
- }
-
- /** if buffer grows pointer no longer valid */
- char* cur() {
- return _p._data + _len;
- }
-
- void appendChar(char j) {
- *((char*)grow(sizeof(char))) = j;
- }
- void appendNum(char j) {
- *((char*)grow(sizeof(char))) = j;
- }
- void appendNum(short j) {
- *((short*)grow(sizeof(short))) = j;
- }
- void appendNum(int j) {
- *((int*)grow(sizeof(int))) = j;
- }
- void appendNum(unsigned j) {
- *((unsigned*)grow(sizeof(unsigned))) = j;
- }
- void appendNum(bool j) {
- *((bool*)grow(sizeof(bool))) = j;
- }
- void appendNum(double j) {
- *((double*)grow(sizeof(double))) = j;
- }
- void appendNum(long long j) {
- *((long long*)grow(sizeof(long long))) = j;
- }
- void appendNum(unsigned long long j) {
- *((unsigned long long*)grow(sizeof(unsigned long long))) = j;
- }
-
- void appendBuf(const void* src, size_t len) {
- memcpy(grow((unsigned)len), src, len);
- }
-
- template <class T>
- void appendStruct(const T& s) {
- appendBuf(&s, sizeof(T));
- }
-
- void appendStr(StringData str, bool includeEOO = true) {
- const unsigned len = str.size() + (includeEOO ? 1 : 0);
- verify(len < (unsigned)BSONObjMaxUserSize);
- str.copyTo(grow(len), includeEOO);
- }
-
- /** @return the in-use length */
- unsigned len() const {
- return _len;
- }
-
-private:
- static const unsigned Alignment = 8192;
-
- /** returns the pre-grow write position */
- inline char* grow(unsigned by) {
- unsigned oldlen = _len;
- _len += by;
- if (MONGO_unlikely(_len > _p._size)) {
- growReallocate(oldlen);
- }
- return _p._data + oldlen;
- }
-
- void growReallocate(unsigned oldLenInUse);
- void kill();
- void mallocSelfAligned(unsigned sz);
- void _malloc(unsigned sz);
- void _realloc(unsigned newSize, unsigned oldLenInUse);
- void _free(void*);
-
- struct AllocationInfo {
- char* _data;
- void* _allocationAddress;
- unsigned _size;
- } _p;
- unsigned _len; // bytes in use
-};
-}
diff --git a/src/mongo/db/storage/mmap_v1/btree/btree_interface.cpp b/src/mongo/db/storage/mmap_v1/btree/btree_interface.cpp
deleted file mode 100644
index 14a3e57503b..00000000000
--- a/src/mongo/db/storage/mmap_v1/btree/btree_interface.cpp
+++ /dev/null
@@ -1,437 +0,0 @@
-/**
- * Copyright (C) 2014 MongoDB Inc.
- *
- * This program is free software: you can redistribute it and/or modify
- * it under the terms of the GNU Affero General Public License, version 3,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU Affero General Public License for more details.
- *
- * You should have received a copy of the GNU Affero General Public License
- * along with this program. If not, see <http://www.gnu.org/licenses/>.
- *
- * As a special exception, the copyright holders give permission to link the
- * code of portions of this program with the OpenSSL library under certain
- * conditions as described in each individual source file and distribute
- * linked combinations including the program with the OpenSSL library. You
- * must comply with the GNU Affero General Public License in all respects for
- * all of the code used other than as permitted herein. If you modify file(s)
- * with this exception, you may extend this exception to your version of the
- * file(s), but you are not obligated to do so. If you do not wish to do so,
- * delete this exception statement from your version. If you delete this
- * exception statement from all source files in the program, then also delete
- * it in the license file.
- */
-
-#include "mongo/platform/basic.h"
-
-#include <string>
-
-#include "mongo/db/storage/mmap_v1/btree/btree_interface.h"
-
-#include "mongo/db/operation_context.h"
-#include "mongo/db/storage/mmap_v1/btree/btree_logic.h"
-#include "mongo/db/storage/mmap_v1/record_store_v1_base.h"
-#include "mongo/db/storage/sorted_data_interface.h"
-#include "mongo/stdx/memory.h"
-#include "mongo/util/assert_util.h"
-
-namespace mongo {
-namespace {
-
-using std::unique_ptr;
-using std::string;
-using std::vector;
-
-using IndexVersion = IndexDescriptor::IndexVersion;
-
-template <class OnDiskFormat>
-class BtreeBuilderInterfaceImpl final : public SortedDataBuilderInterface {
-public:
- BtreeBuilderInterfaceImpl(OperationContext* trans,
- typename BtreeLogic<OnDiskFormat>::Builder* builder)
- : _builder(builder), _trans(trans) {}
-
- Status addKey(const BSONObj& key, const RecordId& loc) {
- return _builder->addKey(key, DiskLoc::fromRecordId(loc));
- }
-
-private:
- std::unique_ptr<typename BtreeLogic<OnDiskFormat>::Builder> _builder;
-
- // Not owned here.
- OperationContext* _trans;
-};
-
-template <class OnDiskFormat>
-class BtreeInterfaceImpl final : public SortedDataInterface {
-public:
- BtreeInterfaceImpl(HeadManager* headManager,
- RecordStore* recordStore,
- SavedCursorRegistry* cursorRegistry,
- const Ordering& ordering,
- const string& indexName,
- bool isUnique) {
- _btree.reset(new BtreeLogic<OnDiskFormat>(
- headManager, recordStore, cursorRegistry, ordering, indexName, isUnique));
- }
-
- virtual ~BtreeInterfaceImpl() {}
-
- virtual SortedDataBuilderInterface* getBulkBuilder(OperationContext* opCtx, bool dupsAllowed) {
- return new BtreeBuilderInterfaceImpl<OnDiskFormat>(opCtx,
- _btree->newBuilder(opCtx, dupsAllowed));
- }
-
- virtual Status insert(OperationContext* opCtx,
- const BSONObj& key,
- const RecordId& loc,
- bool dupsAllowed) {
- return _btree->insert(opCtx, key, DiskLoc::fromRecordId(loc), dupsAllowed);
- }
-
- virtual void unindex(OperationContext* opCtx,
- const BSONObj& key,
- const RecordId& loc,
- bool dupsAllowed) {
- _btree->unindex(opCtx, key, DiskLoc::fromRecordId(loc));
- }
-
- virtual void fullValidate(OperationContext* opCtx,
- long long* numKeysOut,
- ValidateResults* fullResults) const {
- *numKeysOut = _btree->fullValidate(opCtx, NULL, false, false, 0);
- }
-
- virtual bool appendCustomStats(OperationContext* opCtx,
- BSONObjBuilder* output,
- double scale) const {
- return false;
- }
-
- virtual long long getSpaceUsedBytes(OperationContext* opCtx) const {
- return _btree->getRecordStore()->dataSize(opCtx);
- }
-
- virtual Status dupKeyCheck(OperationContext* opCtx, const BSONObj& key, const RecordId& loc) {
- return _btree->dupKeyCheck(opCtx, key, DiskLoc::fromRecordId(loc));
- }
-
- virtual bool isEmpty(OperationContext* opCtx) {
- return _btree->isEmpty(opCtx);
- }
-
- virtual Status touch(OperationContext* opCtx) const {
- return _btree->touch(opCtx);
- }
-
- class Cursor final : public SortedDataInterface::Cursor {
- public:
- Cursor(OperationContext* opCtx, const BtreeLogic<OnDiskFormat>* btree, bool forward)
- : _opCtx(opCtx), _btree(btree), _direction(forward ? 1 : -1), _ofs(0) {}
-
- boost::optional<IndexKeyEntry> next(RequestedInfo parts) override {
- if (isEOF())
- return {};
- if (_lastMoveWasRestore) {
- // Return current position rather than advancing.
- _lastMoveWasRestore = false;
- } else {
- _btree->advance(_opCtx, &_bucket, &_ofs, _direction);
- }
-
- if (atEndPoint())
- markEOF();
- return curr(parts);
- }
-
- void setEndPosition(const BSONObj& key, bool inclusive) override {
- if (key.isEmpty()) {
- // This means scan to end of index.
- _endState = boost::none;
- return;
- }
-
- _endState = {{key, inclusive}};
- seekEndCursor(); // Completes initialization of _endState.
- }
-
- boost::optional<IndexKeyEntry> seek(const BSONObj& key,
- bool inclusive,
- RequestedInfo parts) override {
- locate(key, inclusive == forward() ? RecordId::min() : RecordId::max());
- _lastMoveWasRestore = false;
-
- if (isEOF())
- return {};
- dassert(inclusive ? compareKeys(getKey(), key) >= 0 : compareKeys(getKey(), key) > 0);
- return curr(parts);
- }
-
-
- boost::optional<IndexKeyEntry> seek(const IndexSeekPoint& seekPoint,
- RequestedInfo parts) override {
- bool canUseAdvanceTo = false;
- if (!isEOF()) {
- int cmp = _btree->customBSONCmp(getKey(), seekPoint, _direction);
-
- // advanceTo requires that we are positioned "earlier" in the index than the
- // seek point, in scan order.
- canUseAdvanceTo = forward() ? cmp < 0 : cmp > 0;
- }
-
-
- if (canUseAdvanceTo) {
- // This takes advantage of current location.
- _btree->advanceTo(_opCtx, &_bucket, &_ofs, seekPoint, _direction);
- } else {
- // Start at root.
- _bucket = _btree->getHead(_opCtx);
- _ofs = 0;
- _btree->customLocate(_opCtx, &_bucket, &_ofs, seekPoint, _direction);
- }
-
- _lastMoveWasRestore = false;
-
- if (atOrPastEndPointAfterSeeking())
- markEOF();
- return curr(parts);
- }
-
- void save() override {
- if (!_lastMoveWasRestore)
- _savedEOF = isEOF();
-
- if (!isEOF()) {
- _saved.bucket = _bucket;
- _btree->savedCursors()->registerCursor(&_saved);
- // Don't want to change saved position if we only moved during restore.
- if (!_lastMoveWasRestore) {
- _saved.key = getKey().getOwned();
- _saved.loc = getDiskLoc();
- }
- }
- // Doing nothing with end cursor since it will do full reseek on restore.
- }
-
- void saveUnpositioned() override {
- // Don't leak our registration if save() was previously called.
- if (!_saved.bucket.isNull())
- _btree->savedCursors()->unregisterCursor(&_saved);
-
- _saved.bucket = DiskLoc();
- _savedEOF = true;
- }
-
- void restore() override {
- // Always do a full seek on restore. We cannot use our last position since index
- // entries may have been inserted closer to our endpoint and we would need to move
- // over them.
- seekEndCursor();
-
- if (_savedEOF) {
- markEOF();
- return;
- }
-
- if (_btree->savedCursors()->unregisterCursor(&_saved)) {
- // We can use the fast restore mechanism.
- _btree->restorePosition(
- _opCtx, _saved.key, _saved.loc, _direction, &_bucket, &_ofs);
- } else {
- // Need to find our position from the root.
- locate(_saved.key, _saved.loc.toRecordId());
- }
-
- _lastMoveWasRestore = isEOF() // We weren't EOF but now are.
- || (!_btree->isUnique() && getDiskLoc() != _saved.loc) ||
- compareKeys(getKey(), _saved.key) != 0;
- }
-
- void detachFromOperationContext() final {
- _opCtx = nullptr;
- }
-
- void reattachToOperationContext(OperationContext* opCtx) final {
- _opCtx = opCtx;
- }
-
- private:
- bool isEOF() const {
- return _bucket.isNull();
- }
- void markEOF() {
- _bucket = DiskLoc();
- }
-
- boost::optional<IndexKeyEntry> curr(RequestedInfo parts) {
- if (isEOF())
- return {};
- return {{(parts & kWantKey) ? getKey() : BSONObj(),
- (parts & kWantLoc) ? getDiskLoc().toRecordId() : RecordId()}};
- }
-
- bool atEndPoint() const {
- return _endState && _bucket == _endState->bucket && (isEOF() || _ofs == _endState->ofs);
- }
-
- bool atOrPastEndPointAfterSeeking() const {
- if (!_endState)
- return false;
- if (isEOF())
- return true;
-
- int cmp = compareKeys(getKey(), _endState->key);
- return _endState->inclusive ? cmp > 0 : cmp >= 0;
- }
-
- void locate(const BSONObj& key, const RecordId& loc) {
- _btree->locate(_opCtx, key, DiskLoc::fromRecordId(loc), _direction, &_ofs, &_bucket);
- if (atOrPastEndPointAfterSeeking())
- markEOF();
- }
-
- // Returns comparison relative to direction of scan. If rhs would be seen later, returns
- // a positive value.
- int compareKeys(const BSONObj& lhs, const BSONObj& rhs) const {
- int cmp = lhs.woCompare(rhs, _btree->ordering(), /*considerFieldName*/ false);
- return forward() ? cmp : -cmp;
- }
-
- BSONObj getKey() const {
- return _btree->getKey(_opCtx, _bucket, _ofs);
- }
- DiskLoc getDiskLoc() const {
- return _btree->getDiskLoc(_opCtx, _bucket, _ofs);
- }
-
- void seekEndCursor() {
- if (!_endState)
- return;
- _btree->locate(_opCtx,
- _endState->key,
- forward() == _endState->inclusive ? DiskLoc::max() : DiskLoc::min(),
- _direction,
- &_endState->ofs,
- &_endState->bucket); // pure out params.
- }
-
- bool forward() const {
- return _direction == 1;
- }
-
- OperationContext* _opCtx; // not owned
- const BtreeLogic<OnDiskFormat>* const _btree;
- const int _direction;
-
- DiskLoc _bucket;
- int _ofs;
-
- struct EndState {
- BSONObj key;
- bool inclusive;
- DiskLoc bucket;
- int ofs;
- };
- boost::optional<EndState> _endState;
-
- // Used by next to decide to return current position rather than moving. Should be reset
- // to false by any operation that moves the cursor, other than subsequent save/restore
- // pairs.
- bool _lastMoveWasRestore = false;
-
- // Only used by save/restore() if _bucket is non-Null.
- bool _savedEOF = false;
- SavedCursorRegistry::SavedCursor _saved;
- };
-
- virtual std::unique_ptr<SortedDataInterface::Cursor> newCursor(OperationContext* opCtx,
- bool isForward = true) const {
- return stdx::make_unique<Cursor>(opCtx, _btree.get(), isForward);
- }
-
- class RandomCursor final : public SortedDataInterface::Cursor {
- public:
- RandomCursor(OperationContext* opCtx, const BtreeLogic<OnDiskFormat>* btree)
- : _opCtx(opCtx), _btree(btree) {}
-
- boost::optional<IndexKeyEntry> next(RequestedInfo parts) override {
- if (_btree->isEmpty(_opCtx)) {
- return {};
- }
- return _btree->getRandomEntry(_opCtx);
- }
-
- void detachFromOperationContext() final {
- _opCtx = nullptr;
- }
-
- void reattachToOperationContext(OperationContext* opCtx) final {
- _opCtx = opCtx;
- }
-
- //
- // Should never be called.
- //
- void setEndPosition(const BSONObj& key, bool inclusive) override {
- MONGO_UNREACHABLE;
- }
- boost::optional<IndexKeyEntry> seek(const BSONObj& key,
- bool inclusive,
- RequestedInfo parts) override {
- MONGO_UNREACHABLE;
- }
- boost::optional<IndexKeyEntry> seek(const IndexSeekPoint& seekPoint,
- RequestedInfo parts) override {
- MONGO_UNREACHABLE;
- }
-
- //
- // May be called, but are no-ops.
- //
- void save() override {}
- void saveUnpositioned() override {}
- void restore() override {}
-
- private:
- OperationContext* _opCtx;
- const BtreeLogic<OnDiskFormat>* const _btree;
- };
-
- virtual std::unique_ptr<SortedDataInterface::Cursor> newRandomCursor(
- OperationContext* opCtx) const {
- return stdx::make_unique<RandomCursor>(opCtx, _btree.get());
- }
-
- virtual Status initAsEmpty(OperationContext* opCtx) {
- return _btree->initAsEmpty(opCtx);
- }
-
-private:
- unique_ptr<BtreeLogic<OnDiskFormat>> _btree;
-};
-} // namespace
-
-SortedDataInterface* getMMAPV1Interface(HeadManager* headManager,
- RecordStore* recordStore,
- SavedCursorRegistry* cursorRegistry,
- const Ordering& ordering,
- const string& indexName,
- IndexVersion version,
- bool isUnique) {
- switch (version) {
- case IndexVersion::kV0:
- return new BtreeInterfaceImpl<BtreeLayoutV0>(
- headManager, recordStore, cursorRegistry, ordering, indexName, isUnique);
- case IndexVersion::kV1:
- case IndexVersion::kV2:
- return new BtreeInterfaceImpl<BtreeLayoutV1>(
- headManager, recordStore, cursorRegistry, ordering, indexName, isUnique);
- }
- MONGO_UNREACHABLE;
-}
-
-} // namespace mongo
diff --git a/src/mongo/db/storage/mmap_v1/btree/btree_interface.h b/src/mongo/db/storage/mmap_v1/btree/btree_interface.h
deleted file mode 100644
index ca61f2cbd28..00000000000
--- a/src/mongo/db/storage/mmap_v1/btree/btree_interface.h
+++ /dev/null
@@ -1,52 +0,0 @@
-/**
- * Copyright (C) 2014 MongoDB Inc.
- *
- * This program is free software: you can redistribute it and/or modify
- * it under the terms of the GNU Affero General Public License, version 3,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU Affero General Public License for more details.
- *
- * You should have received a copy of the GNU Affero General Public License
- * along with this program. If not, see <http://www.gnu.org/licenses/>.
- *
- * As a special exception, the copyright holders give permission to link the
- * code of portions of this program with the OpenSSL library under certain
- * conditions as described in each individual source file and distribute
- * linked combinations including the program with the OpenSSL library. You
- * must comply with the GNU Affero General Public License in all respects for
- * all of the code used other than as permitted herein. If you modify file(s)
- * with this exception, you may extend this exception to your version of the
- * file(s), but you are not obligated to do so. If you do not wish to do so,
- * delete this exception statement from your version. If you delete this
- * exception statement from all source files in the program, then also delete
- * it in the license file.
- */
-
-#include <string>
-
-#include "mongo/bson/ordering.h"
-#include "mongo/db/catalog/head_manager.h"
-#include "mongo/db/index/index_descriptor.h"
-#include "mongo/db/jsobj.h"
-#include "mongo/db/operation_context.h"
-#include "mongo/db/storage/mmap_v1/diskloc.h"
-#include "mongo/db/storage/record_store.h"
-#include "mongo/db/storage/sorted_data_interface.h"
-
-#pragma once
-
-namespace mongo {
-class SavedCursorRegistry;
-
-SortedDataInterface* getMMAPV1Interface(HeadManager* headManager,
- RecordStore* recordStore,
- SavedCursorRegistry* cursorRegistry,
- const Ordering& ordering,
- const std::string& indexName,
- IndexDescriptor::IndexVersion version,
- bool isUnique);
-} // namespace mongo
diff --git a/src/mongo/db/storage/mmap_v1/btree/btree_interface_test.cpp b/src/mongo/db/storage/mmap_v1/btree/btree_interface_test.cpp
deleted file mode 100644
index b49fd70ec26..00000000000
--- a/src/mongo/db/storage/mmap_v1/btree/btree_interface_test.cpp
+++ /dev/null
@@ -1,81 +0,0 @@
-/**
- * Copyright (C) 2014 MongoDB Inc.
- *
- * This program is free software: you can redistribute it and/or modify
- * it under the terms of the GNU Affero General Public License, version 3,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU Affero General Public License for more details.
- *
- * You should have received a copy of the GNU Affero General Public License
- * along with this program. If not, see <http://www.gnu.org/licenses/>.
- *
- * As a special exception, the copyright holders give permission to link the
- * code of portions of this program with the OpenSSL library under certain
- * conditions as described in each individual source file and distribute
- * linked combinations including the program with the OpenSSL library. You
- * must comply with the GNU Affero General Public License in all respects for
- * all of the code used other than as permitted herein. If you modify file(s)
- * with this exception, you may extend this exception to your version of the
- * file(s), but you are not obligated to do so. If you do not wish to do so,
- * delete this exception statement from your version. If you delete this
- * exception statement from all source files in the program, then also delete
- * it in the license file.
- */
-
-#include "mongo/db/storage/mmap_v1/btree/btree_interface.h"
-
-#include "mongo/base/init.h"
-#include "mongo/db/index/index_descriptor.h"
-#include "mongo/db/storage/mmap_v1/btree/btree_test_help.h"
-#include "mongo/db/storage/sorted_data_interface_test_harness.h"
-#include "mongo/stdx/memory.h"
-#include "mongo/unittest/unittest.h"
-
-namespace mongo {
-namespace {
-
-using std::unique_ptr;
-
-class MyHarnessHelper final : public SortedDataInterfaceHarnessHelper {
-public:
- MyHarnessHelper() : _recordStore("a.b"), _order(Ordering::make(BSONObj())) {}
-
- std::unique_ptr<SortedDataInterface> newSortedDataInterface(bool unique) final {
- std::unique_ptr<SortedDataInterface> sorted(
- getMMAPV1Interface(&_headManager,
- &_recordStore,
- &_cursorRegistry,
- _order,
- "a_1", // indexName
- IndexDescriptor::IndexVersion::kV1,
- unique));
- OperationContextNoop op;
- massertStatusOK(sorted->initAsEmpty(&op));
- return sorted;
- }
-
- std::unique_ptr<RecoveryUnit> newRecoveryUnit() final {
- return stdx::make_unique<HeapRecordStoreBtreeRecoveryUnit>();
- }
-
-private:
- TestHeadManager _headManager;
- HeapRecordStoreBtree _recordStore;
- SavedCursorRegistry _cursorRegistry;
- Ordering _order;
-};
-
-std::unique_ptr<HarnessHelper> makeHarnessHelper() {
- return stdx::make_unique<MyHarnessHelper>();
-}
-
-MONGO_INITIALIZER(RegisterHarnessFactory)(InitializerContext* const) {
- mongo::registerHarnessHelperFactory(makeHarnessHelper);
- return Status::OK();
-}
-} // namespace
-} // namespace mongo
diff --git a/src/mongo/db/storage/mmap_v1/btree/btree_logic.cpp b/src/mongo/db/storage/mmap_v1/btree/btree_logic.cpp
deleted file mode 100644
index dc0e4aa83e2..00000000000
--- a/src/mongo/db/storage/mmap_v1/btree/btree_logic.cpp
+++ /dev/null
@@ -1,2440 +0,0 @@
-/**
- * Copyright (C) 2014 MongoDB Inc.
- *
- * This program is free software: you can redistribute it and/or modify
- * it under the terms of the GNU Affero General Public License, version 3,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU Affero General Public License for more details.
- *
- * You should have received a copy of the GNU Affero General Public License
- * along with this program. If not, see <http://www.gnu.org/licenses/>.
- *
- * As a special exception, the copyright holders give permission to link the
- * code of portions of this program with the OpenSSL library under certain
- * conditions as described in each individual source file and distribute
- * linked combinations including the program with the OpenSSL library. You
- * must comply with the GNU Affero General Public License in all respects for
- * all of the code used other than as permitted herein. If you modify file(s)
- * with this exception, you may extend this exception to your version of the
- * file(s), but you are not obligated to do so. If you do not wish to do so,
- * delete this exception statement from your version. If you delete this
- * exception statement from all source files in the program, then also delete
- * it in the license file.
- */
-
-#define MONGO_LOG_DEFAULT_COMPONENT ::mongo::logger::LogComponent::kIndex
-
-#include "mongo/platform/basic.h"
-
-#include <mutex>
-#include <numeric>
-
-#include "mongo/db/client.h"
-#include "mongo/db/jsobj.h"
-#include "mongo/db/operation_context.h"
-#include "mongo/db/storage/mmap_v1/btree/btree_logic.h"
-#include "mongo/db/storage/mmap_v1/btree/key.h"
-#include "mongo/db/storage/mmap_v1/diskloc.h"
-#include "mongo/db/storage/mmap_v1/record_store_v1_base.h"
-#include "mongo/db/storage/record_store.h"
-#include "mongo/util/log.h"
-#include "mongo/util/mongoutils/str.h"
-
-namespace mongo {
-
-using std::unique_ptr;
-using std::dec;
-using std::endl;
-using std::hex;
-using std::make_pair;
-using std::pair;
-using std::string;
-using std::stringstream;
-using std::vector;
-
-// BtreeLogic::Builder algorithm
-//
-// Phase 1:
-// Handled by caller. Extracts keys from raw documents and puts them in external sorter
-//
-// Phase 2 (the addKeys phase):
-// Add all keys to buckets. When a bucket gets full, pop the highest key (setting the
-// nextChild pointer of the bucket to the prevChild of the popped key), add the popped key to
-// a parent bucket, and create a new right sibling bucket to add the new key to. If the parent
-// bucket is full, this same operation is performed on the parent and all full ancestors. If
-// we get to the root and it is full, a new root is created above the current root. When
-// creating a new right sibling, it is set as its parent's nextChild as all keys in the right
-// sibling will be higher than all keys currently in the parent.
-
-namespace {
-std::once_flag assertValidFlag;
-} // namespace
-
-//
-// Public Builder logic
-//
-
-template <class BtreeLayout>
-typename BtreeLogic<BtreeLayout>::Builder* BtreeLogic<BtreeLayout>::newBuilder(
- OperationContext* opCtx, bool dupsAllowed) {
- return new Builder(this, opCtx, dupsAllowed);
-}
-
-template <class BtreeLayout>
-BtreeLogic<BtreeLayout>::Builder::Builder(BtreeLogic* logic,
- OperationContext* opCtx,
- bool dupsAllowed)
- : _logic(logic), _dupsAllowed(dupsAllowed), _opCtx(opCtx) {
- // The normal bulk building path calls initAsEmpty, so we already have an empty root bucket.
- // This isn't the case in some unit tests that use the Builder directly rather than going
- // through an IndexAccessMethod.
- _rightLeafLoc = DiskLoc::fromRecordId(_logic->_headManager->getHead(opCtx));
- if (_rightLeafLoc.isNull()) {
- _rightLeafLoc = _logic->_addBucket(opCtx);
- _logic->_headManager->setHead(_opCtx, _rightLeafLoc.toRecordId());
- }
-
- // must be empty when starting
- invariant(_getBucket(_rightLeafLoc)->n == 0);
-}
-
-template <class BtreeLayout>
-class BtreeLogic<BtreeLayout>::Builder::SetRightLeafLocChange : public RecoveryUnit::Change {
-public:
- SetRightLeafLocChange(Builder* builder, DiskLoc oldLoc) : _builder(builder), _oldLoc(oldLoc) {}
-
- virtual void commit(boost::optional<Timestamp>) {}
- virtual void rollback() {
- _builder->_rightLeafLoc = _oldLoc;
- }
-
- Builder* _builder;
- const DiskLoc _oldLoc;
-};
-
-template <class BtreeLayout>
-Status BtreeLogic<BtreeLayout>::Builder::addKey(const BSONObj& keyObj, const DiskLoc& loc) {
- unique_ptr<KeyDataOwnedType> key(new KeyDataOwnedType(keyObj));
-
- if (key->dataSize() > BtreeLayout::KeyMax) {
- string msg = str::stream() << "Btree::insert: key too large to index, failing "
- << _logic->_indexName << ' ' << key->dataSize() << ' '
- << key->toString();
- log() << msg << endl;
- return Status(ErrorCodes::KeyTooLong, msg);
- }
-
- // If we have a previous key to compare to...
- if (_keyLast.get()) {
- int cmp = _keyLast->woCompare(*key, _logic->_ordering);
-
- // This shouldn't happen ever. We expect keys in sorted order.
- if (cmp > 0) {
- return Status(ErrorCodes::InternalError, "Bad key order in btree builder");
- }
-
- // This could easily happen..
- if (!_dupsAllowed && (cmp == 0)) {
- return Status(ErrorCodes::DuplicateKey, _logic->dupKeyError(*_keyLast));
- }
- }
-
- BucketType* rightLeaf = _getModifiableBucket(_rightLeafLoc);
- if (!_logic->pushBack(rightLeaf, loc, *key, DiskLoc())) {
- // bucket was full, so split and try with the new node.
- _opCtx->recoveryUnit()->registerChange(new SetRightLeafLocChange(this, _rightLeafLoc));
- _rightLeafLoc = newBucket(rightLeaf, _rightLeafLoc);
- rightLeaf = _getModifiableBucket(_rightLeafLoc);
- invariant(_logic->pushBack(rightLeaf, loc, *key, DiskLoc()));
- }
-
- _keyLast = std::move(key);
- return Status::OK();
-}
-
-//
-// Private Builder logic
-//
-
-template <class BtreeLayout>
-DiskLoc BtreeLogic<BtreeLayout>::Builder::newBucket(BucketType* leftSib, DiskLoc leftSibLoc) {
- invariant(leftSib->n >= 2); // Guaranteed by sufficiently small KeyMax.
-
- if (leftSib->parent.isNull()) {
- // Making a new root
- invariant(leftSibLoc.toRecordId() == _logic->_headManager->getHead(_opCtx));
- const DiskLoc newRootLoc = _logic->_addBucket(_opCtx);
- leftSib->parent = newRootLoc;
- _logic->_headManager->setHead(_opCtx, newRootLoc.toRecordId());
-
- // Set the newRoot's nextChild to point to leftSib for the invariant below.
- BucketType* newRoot = _getBucket(newRootLoc);
- *_opCtx->recoveryUnit()->writing(&newRoot->nextChild) = leftSibLoc;
- }
-
- DiskLoc parentLoc = leftSib->parent;
- BucketType* parent = _getModifiableBucket(parentLoc);
-
- // For the pushBack below to be correct, leftSib must be the right-most child of parent.
- invariant(parent->nextChild == leftSibLoc);
-
- // Pull right-most key out of leftSib and move to parent, splitting parent if necessary.
- // Note that popBack() handles setting leftSib's nextChild to the former prevChildNode of
- // the popped key.
- KeyDataType key;
- DiskLoc val;
- _logic->popBack(leftSib, &val, &key);
- if (!_logic->pushBack(parent, val, key, leftSibLoc)) {
- // parent is full, so split it.
- parentLoc = newBucket(parent, parentLoc);
- parent = _getModifiableBucket(parentLoc);
- invariant(_logic->pushBack(parent, val, key, leftSibLoc));
- leftSib->parent = parentLoc;
- }
-
- // Create a new bucket to the right of leftSib and set its parent pointer and the downward
- // nextChild pointer from the parent.
- DiskLoc newBucketLoc = _logic->_addBucket(_opCtx);
- BucketType* newBucket = _getBucket(newBucketLoc);
- *_opCtx->recoveryUnit()->writing(&newBucket->parent) = parentLoc;
- *_opCtx->recoveryUnit()->writing(&parent->nextChild) = newBucketLoc;
- return newBucketLoc;
-}
-
-template <class BtreeLayout>
-typename BtreeLogic<BtreeLayout>::BucketType*
-BtreeLogic<BtreeLayout>::Builder::_getModifiableBucket(DiskLoc loc) {
- return _logic->btreemod(_opCtx, _logic->getBucket(_opCtx, loc));
-}
-
-template <class BtreeLayout>
-typename BtreeLogic<BtreeLayout>::BucketType* BtreeLogic<BtreeLayout>::Builder::_getBucket(
- DiskLoc loc) {
- return _logic->getBucket(_opCtx, loc);
-}
-
-//
-// BtreeLogic logic
-//
-
-// static
-template <class BtreeLayout>
-typename BtreeLogic<BtreeLayout>::FullKey BtreeLogic<BtreeLayout>::getFullKey(
- const BucketType* bucket, int i) {
- if (i >= bucket->n) {
- int code = 13000;
- massert(code,
- (string) "invalid keyNode: " + BSON("i" << i << "n" << bucket->n).jsonString(),
- i < bucket->n);
- }
- return FullKey(bucket, i);
-}
-
-// static
-template <class BtreeLayout>
-typename BtreeLogic<BtreeLayout>::KeyHeaderType& BtreeLogic<BtreeLayout>::getKeyHeader(
- BucketType* bucket, int i) {
- return ((KeyHeaderType*)bucket->data)[i];
-}
-
-// static
-template <class BtreeLayout>
-const typename BtreeLogic<BtreeLayout>::KeyHeaderType& BtreeLogic<BtreeLayout>::getKeyHeader(
- const BucketType* bucket, int i) {
- return ((const KeyHeaderType*)bucket->data)[i];
-}
-
-template <class BtreeLayout>
-void BtreeLogic<BtreeLayout>::markUnused(BucketType* bucket, int keyPos) {
- invariant(keyPos >= 0 && keyPos < bucket->n);
- getKeyHeader(bucket, keyPos).setUnused();
-}
-
-template <class BtreeLayout>
-char* BtreeLogic<BtreeLayout>::dataAt(BucketType* bucket, short ofs) {
- return bucket->data + ofs;
-}
-
-template <class BtreeLayout>
-typename BtreeLogic<BtreeLayout>::BucketType* BtreeLogic<BtreeLayout>::btreemod(
- OperationContext* opCtx, BucketType* bucket) {
- opCtx->recoveryUnit()->writingPtr(bucket, BtreeLayout::BucketSize);
- return bucket;
-}
-
-template <class BtreeLayout>
-int BtreeLogic<BtreeLayout>::totalDataSize(BucketType* bucket) {
- return (int)(BtreeLayout::BucketSize - (bucket->data - (char*)bucket));
-}
-
-// We define this value as the maximum number of bytes such that, if we have
-// fewer than this many bytes, we must be able to either merge with or receive
-// keys from any neighboring node. If our utilization goes below this value we
-// know we can bring up the utilization with a simple operation. Ignoring the
-// 90/10 split policy which is sometimes employed and our 'unused' nodes, this
-// is a lower bound on bucket utilization for non root buckets.
-//
-// Note that the exact value here depends on the implementation of
-// _rebalancedSeparatorPos(). The conditions for lowWaterMark - 1 are as
-// follows: We know we cannot merge with the neighbor, so the total data size
-// for us, the neighbor, and the separator must be at least
-// BucketType::bodySize() + 1. We must be able to accept one key of any
-// allowed size, so our size plus storage for that additional key must be
-// <= BucketType::bodySize() / 2. This way, with the extra key we'll have a
-// new bucket data size < half the total data size and by the implementation
-// of _rebalancedSeparatorPos() the key must be added.
-template <class BtreeLayout>
-int BtreeLogic<BtreeLayout>::lowWaterMark() {
- return BtreeLayout::BucketBodySize / 2 - BtreeLayout::KeyMax - sizeof(KeyHeaderType) + 1;
-}
-
-template <class BtreeLayout>
-void BtreeLogic<BtreeLayout>::init(BucketType* bucket) {
- BtreeLayout::initBucket(bucket);
- bucket->parent.Null();
- bucket->nextChild.Null();
- bucket->flags = Packed;
- bucket->n = 0;
- bucket->emptySize = totalDataSize(bucket);
- bucket->topSize = 0;
-}
-
-template <class BtreeLayout>
-void BtreeLogic<BtreeLayout>::_unalloc(BucketType* bucket, int bytes) {
- bucket->topSize -= bytes;
- bucket->emptySize += bytes;
-}
-
-/**
- * We allocate space from the end of the buffer for data. The keynodes grow from the front.
- */
-template <class BtreeLayout>
-int BtreeLogic<BtreeLayout>::_alloc(BucketType* bucket, int bytes) {
- invariant(bucket->emptySize >= bytes);
- bucket->topSize += bytes;
- bucket->emptySize -= bytes;
- int ofs = totalDataSize(bucket) - bucket->topSize;
- invariant(ofs > 0);
- return ofs;
-}
-
-template <class BtreeLayout>
-void BtreeLogic<BtreeLayout>::setNotPacked(BucketType* bucket) {
- bucket->flags &= ~Packed;
-}
-
-template <class BtreeLayout>
-void BtreeLogic<BtreeLayout>::setPacked(BucketType* bucket) {
- bucket->flags |= Packed;
-}
-
-template <class BtreeLayout>
-void BtreeLogic<BtreeLayout>::_delKeyAtPos(BucketType* bucket, int keypos, bool mayEmpty) {
- invariant(keypos >= 0 && keypos <= bucket->n);
- invariant(childLocForPos(bucket, keypos).isNull());
- invariant((mayEmpty && bucket->n > 0) || bucket->n > 1 || bucket->nextChild.isNull());
-
- bucket->emptySize += sizeof(KeyHeaderType);
- bucket->n--;
-
- for (int j = keypos; j < bucket->n; j++) {
- getKeyHeader(bucket, j) = getKeyHeader(bucket, j + 1);
- }
-
- setNotPacked(bucket);
-}
-
-/**
- * Pull rightmost key from the bucket and set its prevChild pointer to be the nextChild for the
- * whole bucket. It is assumed that caller already has the old value of the nextChild
- * pointer and is about to add a pointer to it elsewhere in the tree.
- *
- * This is only used by BtreeLogic::Builder. Think very hard (and change this comment) before
- * using it anywhere else.
- *
- * WARNING: The keyDataOut that is filled out by this function points to newly unalloced memory
- * inside of this bucket. It only remains valid until the next write to this bucket.
- */
-template <class BtreeLayout>
-void BtreeLogic<BtreeLayout>::popBack(BucketType* bucket,
- DiskLoc* recordLocOut,
- KeyDataType* keyDataOut) {
- massert(17435, "n==0 in btree popBack()", bucket->n > 0);
-
- invariant(getKeyHeader(bucket, bucket->n - 1).isUsed());
-
- FullKey kn = getFullKey(bucket, bucket->n - 1);
- *recordLocOut = kn.recordLoc;
- keyDataOut->assign(kn.data);
- int keysize = kn.data.dataSize();
-
- // The left/prev child of the node we are popping now goes in to the nextChild slot as all
- // of its keys are greater than all remaining keys in this node.
- bucket->nextChild = kn.prevChildBucket;
- bucket->n--;
-
- // This is risky because the keyDataOut we filled out above will now point to this newly
- // unalloced memory.
- bucket->emptySize += sizeof(KeyHeaderType);
- _unalloc(bucket, keysize);
-}
-
-/**
- * Add a key. Must be > all existing. Be careful to set next ptr right.
- */
-template <class BtreeLayout>
-bool BtreeLogic<BtreeLayout>::pushBack(BucketType* bucket,
- const DiskLoc recordLoc,
- const KeyDataType& key,
- const DiskLoc prevChild) {
- int bytesNeeded = key.dataSize() + sizeof(KeyHeaderType);
- if (bytesNeeded > bucket->emptySize) {
- return false;
- }
- invariant(bytesNeeded <= bucket->emptySize);
-
- if (bucket->n) {
- const FullKey klast = getFullKey(bucket, bucket->n - 1);
- if (klast.data.woCompare(key, _ordering) > 0) {
- log() << "btree bucket corrupt? "
- "consider reindexing or running validate command"
- << endl;
- log() << " klast: " << klast.data.toString() << endl;
- log() << " key: " << key.toString() << endl;
- MONGO_UNREACHABLE;
- }
- }
-
- bucket->emptySize -= sizeof(KeyHeaderType);
- KeyHeaderType& kn = getKeyHeader(bucket, bucket->n++);
- kn.prevChildBucket = prevChild;
- kn.recordLoc = recordLoc;
- kn.setKeyDataOfs((short)_alloc(bucket, key.dataSize()));
- short ofs = kn.keyDataOfs();
- char* p = dataAt(bucket, ofs);
- memcpy(p, key.data(), key.dataSize());
- return true;
-}
-
-/**
- * Durability note:
- *
- * We do separate intent declarations herein. Arguably one could just declare the whole bucket
- * given we do group commits. This is something we could investigate later as to what is
- * faster.
- **/
-
-/**
- * Insert a key in a bucket with no complexity -- no splits required
- * Returns false if a split is required.
- */
-template <class BtreeLayout>
-bool BtreeLogic<BtreeLayout>::basicInsert(OperationContext* opCtx,
- BucketType* bucket,
- const DiskLoc bucketLoc,
- int& keypos,
- const KeyDataType& key,
- const DiskLoc recordLoc) {
- invariant(bucket->n < 1024);
- invariant(keypos >= 0 && keypos <= bucket->n);
-
- int bytesNeeded = key.dataSize() + sizeof(KeyHeaderType);
- if (bytesNeeded > bucket->emptySize) {
- _pack(opCtx, bucket, bucketLoc, keypos);
- if (bytesNeeded > bucket->emptySize) {
- return false;
- }
- }
-
- invariant(getBucket(opCtx, bucketLoc) == bucket);
-
- {
- // declare that we will write to [k(keypos),k(n)]
- char* start = reinterpret_cast<char*>(&getKeyHeader(bucket, keypos));
- char* end = reinterpret_cast<char*>(&getKeyHeader(bucket, bucket->n + 1));
-
- // Declare that we will write to [k(keypos),k(n)]
- opCtx->recoveryUnit()->writingPtr(start, end - start);
- }
-
- // e.g. for n==3, keypos==2
- // 1 4 9 -> 1 4 _ 9
- for (int j = bucket->n; j > keypos; j--) {
- getKeyHeader(bucket, j) = getKeyHeader(bucket, j - 1);
- }
-
- size_t writeLen = sizeof(bucket->emptySize) + sizeof(bucket->topSize) + sizeof(bucket->n);
- opCtx->recoveryUnit()->writingPtr(&bucket->emptySize, writeLen);
- bucket->emptySize -= sizeof(KeyHeaderType);
- bucket->n++;
-
- // This _KeyNode was marked for writing above.
- KeyHeaderType& kn = getKeyHeader(bucket, keypos);
- kn.prevChildBucket.Null();
- kn.recordLoc = recordLoc;
- kn.setKeyDataOfs((short)_alloc(bucket, key.dataSize()));
- char* p = dataAt(bucket, kn.keyDataOfs());
- opCtx->recoveryUnit()->writingPtr(p, key.dataSize());
- memcpy(p, key.data(), key.dataSize());
- return true;
-}
-
-/**
- * With this implementation, refPos == 0 disregards effect of refPos. index > 0 prevents
- * creation of an empty bucket.
- */
-template <class BtreeLayout>
-bool BtreeLogic<BtreeLayout>::mayDropKey(BucketType* bucket, int index, int refPos) {
- return index > 0 && (index != refPos) && getKeyHeader(bucket, index).isUnused() &&
- getKeyHeader(bucket, index).prevChildBucket.isNull();
-}
-
-template <class BtreeLayout>
-int BtreeLogic<BtreeLayout>::_packedDataSize(BucketType* bucket, int refPos) {
- if (bucket->flags & Packed) {
- return BtreeLayout::BucketSize - bucket->emptySize - BucketType::HeaderSize;
- }
-
- int size = 0;
- for (int j = 0; j < bucket->n; ++j) {
- if (mayDropKey(bucket, j, refPos)) {
- continue;
- }
- size += getFullKey(bucket, j).data.dataSize() + sizeof(KeyHeaderType);
- }
-
- return size;
-}
-
-/**
- * When we delete things, we just leave empty space until the node is full and then we repack
- * it.
- */
-template <class BtreeLayout>
-void BtreeLogic<BtreeLayout>::_pack(OperationContext* opCtx,
- BucketType* bucket,
- const DiskLoc thisLoc,
- int& refPos) {
- invariant(getBucket(opCtx, thisLoc) == bucket);
-
- if (bucket->flags & Packed) {
- return;
- }
-
- _packReadyForMod(btreemod(opCtx, bucket), refPos);
-}
-
-/**
- * Version when write intent already declared.
- */
-template <class BtreeLayout>
-void BtreeLogic<BtreeLayout>::_packReadyForMod(BucketType* bucket, int& refPos) {
- if (bucket->flags & Packed) {
- return;
- }
-
- int tdz = totalDataSize(bucket);
- char temp[BtreeLayout::BucketSize];
- int ofs = tdz;
- bucket->topSize = 0;
-
- int i = 0;
- for (int j = 0; j < bucket->n; j++) {
- if (mayDropKey(bucket, j, refPos)) {
- // key is unused and has no children - drop it
- continue;
- }
-
- if (i != j) {
- if (refPos == j) {
- // i < j so j will never be refPos again
- refPos = i;
- }
- getKeyHeader(bucket, i) = getKeyHeader(bucket, j);
- }
-
- short ofsold = getKeyHeader(bucket, i).keyDataOfs();
- int sz = getFullKey(bucket, i).data.dataSize();
- ofs -= sz;
- bucket->topSize += sz;
- memcpy(temp + ofs, dataAt(bucket, ofsold), sz);
- getKeyHeader(bucket, i).setKeyDataOfsSavingUse(ofs);
- ++i;
- }
-
- if (refPos == bucket->n) {
- refPos = i;
- }
-
- bucket->n = i;
- int dataUsed = tdz - ofs;
- memcpy(bucket->data + ofs, temp + ofs, dataUsed);
-
- bucket->emptySize = tdz - dataUsed - bucket->n * sizeof(KeyHeaderType);
- int foo = bucket->emptySize;
- invariant(foo >= 0);
- setPacked(bucket);
- assertValid(_indexName, bucket, _ordering);
-}
-
-template <class BtreeLayout>
-void BtreeLogic<BtreeLayout>::truncateTo(BucketType* bucket, int N, int& refPos) {
- bucket->n = N;
- setNotPacked(bucket);
- _packReadyForMod(bucket, refPos);
-}
-
-/**
- * In the standard btree algorithm, we would split based on the
- * existing keys _and_ the new key. But that's more work to
- * implement, so we split the existing keys and then add the new key.
- *
- * There are several published heuristic algorithms for doing splits, but basically what you
- * want are (1) even balancing between the two sides and (2) a small split key so the parent can
- * have a larger branching factor.
- *
- * We just have a simple algorithm right now: if a key includes the halfway point (or 10% way
- * point) in terms of bytes, split on that key; otherwise split on the key immediately to the
- * left of the halfway point (or 10% point).
- *
- * This function is expected to be called on a packed bucket.
- */
-template <class BtreeLayout>
-int BtreeLogic<BtreeLayout>::splitPos(BucketType* bucket, int keypos) {
- invariant(bucket->n > 2);
- int split = 0;
- int rightSize = 0;
-
- // When splitting a btree node, if the new key is greater than all the other keys, we should
- // not do an even split, but a 90/10 split. see SERVER-983. TODO I think we only want to
- // do the 90% split on the rhs node of the tree.
- int rightSizeLimit =
- (bucket->topSize + sizeof(KeyHeaderType) * bucket->n) / (keypos == bucket->n ? 10 : 2);
-
- for (int i = bucket->n - 1; i > -1; --i) {
- rightSize += getFullKey(bucket, i).data.dataSize() + sizeof(KeyHeaderType);
- if (rightSize > rightSizeLimit) {
- split = i;
- break;
- }
- }
-
- // safeguards - we must not create an empty bucket
- if (split < 1) {
- split = 1;
- } else if (split > bucket->n - 2) {
- split = bucket->n - 2;
- }
-
- return split;
-}
-
-template <class BtreeLayout>
-void BtreeLogic<BtreeLayout>::reserveKeysFront(BucketType* bucket, int nAdd) {
- invariant(bucket->emptySize >= int(sizeof(KeyHeaderType) * nAdd));
- bucket->emptySize -= sizeof(KeyHeaderType) * nAdd;
- for (int i = bucket->n - 1; i > -1; --i) {
- getKeyHeader(bucket, i + nAdd) = getKeyHeader(bucket, i);
- }
- bucket->n += nAdd;
-}
-
-template <class BtreeLayout>
-void BtreeLogic<BtreeLayout>::setKey(BucketType* bucket,
- int i,
- const DiskLoc recordLoc,
- const KeyDataType& key,
- const DiskLoc prevChildBucket) {
- KeyHeaderType& kn = getKeyHeader(bucket, i);
- kn.recordLoc = recordLoc;
- kn.prevChildBucket = prevChildBucket;
- short ofs = (short)_alloc(bucket, key.dataSize());
- kn.setKeyDataOfs(ofs);
- char* p = dataAt(bucket, ofs);
- memcpy(p, key.data(), key.dataSize());
-}
-
-template <class BtreeLayout>
-void BtreeLogic<BtreeLayout>::dropFront(BucketType* bucket, int nDrop, int& refpos) {
- for (int i = nDrop; i < bucket->n; ++i) {
- getKeyHeader(bucket, i - nDrop) = getKeyHeader(bucket, i);
- }
- bucket->n -= nDrop;
- setNotPacked(bucket);
- _packReadyForMod(bucket, refpos);
-}
-
-template <class BtreeLayout>
-void BtreeLogic<BtreeLayout>::customLocate(OperationContext* opCtx,
- DiskLoc* locInOut,
- int* keyOfsInOut,
- const IndexSeekPoint& seekPoint,
- int direction) const {
- pair<DiskLoc, int> unused;
-
- customLocate(opCtx, locInOut, keyOfsInOut, seekPoint, direction, unused);
- skipUnusedKeys(opCtx, locInOut, keyOfsInOut, direction);
-}
-
-template <class BtreeLayout>
-void BtreeLogic<BtreeLayout>::advance(OperationContext* opCtx,
- DiskLoc* bucketLocInOut,
- int* posInOut,
- int direction) const {
- *bucketLocInOut = advance(opCtx, *bucketLocInOut, posInOut, direction);
- skipUnusedKeys(opCtx, bucketLocInOut, posInOut, direction);
-}
-
-template <class BtreeLayout>
-void BtreeLogic<BtreeLayout>::skipUnusedKeys(OperationContext* opCtx,
- DiskLoc* loc,
- int* pos,
- int direction) const {
- while (!loc->isNull() && !keyIsUsed(opCtx, *loc, *pos)) {
- *loc = advance(opCtx, *loc, pos, direction);
- }
-}
-
-template <class BtreeLayout>
-void BtreeLogic<BtreeLayout>::advanceTo(OperationContext* opCtx,
- DiskLoc* thisLocInOut,
- int* keyOfsInOut,
- const IndexSeekPoint& seekPoint,
- int direction) const {
- advanceToImpl(opCtx, thisLocInOut, keyOfsInOut, seekPoint, direction);
- skipUnusedKeys(opCtx, thisLocInOut, keyOfsInOut, direction);
-}
-
-/**
- * find smallest/biggest value greater-equal/less-equal than specified
- *
- * starting thisLoc + keyOfs will be strictly less than/strictly greater than
- * keyBegin/keyBeginLen/keyEnd
- *
- * All the direction checks below allowed me to refactor the code, but possibly separate forward
- * and reverse implementations would be more efficient
- */
-template <class BtreeLayout>
-void BtreeLogic<BtreeLayout>::advanceToImpl(OperationContext* opCtx,
- DiskLoc* thisLocInOut,
- int* keyOfsInOut,
- const IndexSeekPoint& seekPoint,
- int direction) const {
- BucketType* bucket = getBucket(opCtx, *thisLocInOut);
-
- int l, h;
- bool dontGoUp;
-
- if (direction > 0) {
- l = *keyOfsInOut;
- h = bucket->n - 1;
- int cmpResult = customBSONCmp(getFullKey(bucket, h).data.toBson(), seekPoint, direction);
- dontGoUp = (cmpResult >= 0);
- } else {
- l = 0;
- h = *keyOfsInOut;
- int cmpResult = customBSONCmp(getFullKey(bucket, l).data.toBson(), seekPoint, direction);
- dontGoUp = (cmpResult <= 0);
- }
-
- pair<DiskLoc, int> bestParent;
-
- if (dontGoUp) {
- // this comparison result assures h > l
- if (!customFind(opCtx, l, h, seekPoint, direction, thisLocInOut, keyOfsInOut, bestParent)) {
- return;
- }
- } else {
- // go up parents until rightmost/leftmost node is >=/<= target or at top
- while (!bucket->parent.isNull()) {
- *thisLocInOut = bucket->parent;
- bucket = getBucket(opCtx, *thisLocInOut);
-
- if (direction > 0) {
- if (customBSONCmp(getFullKey(bucket, bucket->n - 1).data.toBson(),
- seekPoint,
- direction) >= 0) {
- break;
- }
- } else {
- if (customBSONCmp(getFullKey(bucket, 0).data.toBson(), seekPoint, direction) <= 0) {
- break;
- }
- }
- }
- }
-
- customLocate(opCtx, thisLocInOut, keyOfsInOut, seekPoint, direction, bestParent);
-}
-
-template <class BtreeLayout>
-void BtreeLogic<BtreeLayout>::customLocate(OperationContext* opCtx,
- DiskLoc* locInOut,
- int* keyOfsInOut,
- const IndexSeekPoint& seekPoint,
- int direction,
- pair<DiskLoc, int>& bestParent) const {
- BucketType* bucket = getBucket(opCtx, *locInOut);
-
- if (0 == bucket->n) {
- *locInOut = DiskLoc();
- return;
- }
-
- // go down until find smallest/biggest >=/<= target
- for (;;) {
- int l = 0;
- int h = bucket->n - 1;
-
- // +direction: 0, -direction: h
- int z = (direction > 0) ? 0 : h;
-
- // leftmost/rightmost key may possibly be >=/<= search key
- int res = customBSONCmp(getFullKey(bucket, z).data.toBson(), seekPoint, direction);
- if (direction * res >= 0) {
- DiskLoc next;
- *keyOfsInOut = z;
-
- if (direction > 0) {
- dassert(z == 0);
- next = getKeyHeader(bucket, 0).prevChildBucket;
- } else {
- next = bucket->nextChild;
- }
-
- if (!next.isNull()) {
- bestParent = pair<DiskLoc, int>(*locInOut, *keyOfsInOut);
- *locInOut = next;
- bucket = getBucket(opCtx, *locInOut);
- continue;
- } else {
- return;
- }
- }
-
- res = customBSONCmp(getFullKey(bucket, h - z).data.toBson(), seekPoint, direction);
- if (direction * res < 0) {
- DiskLoc next;
- if (direction > 0) {
- next = bucket->nextChild;
- } else {
- next = getKeyHeader(bucket, 0).prevChildBucket;
- }
-
- if (next.isNull()) {
- // if bestParent is null, we've hit the end and locInOut gets set to DiskLoc()
- *locInOut = bestParent.first;
- *keyOfsInOut = bestParent.second;
- return;
- } else {
- *locInOut = next;
- bucket = getBucket(opCtx, *locInOut);
- continue;
- }
- }
-
- if (!customFind(opCtx, l, h, seekPoint, direction, locInOut, keyOfsInOut, bestParent)) {
- return;
- }
-
- bucket = getBucket(opCtx, *locInOut);
- }
-}
-
-template <class BtreeLayout>
-bool BtreeLogic<BtreeLayout>::customFind(OperationContext* opCtx,
- int low,
- int high,
- const IndexSeekPoint& seekPoint,
- int direction,
- DiskLoc* thisLocInOut,
- int* keyOfsInOut,
- pair<DiskLoc, int>& bestParent) const {
- const BucketType* bucket = getBucket(opCtx, *thisLocInOut);
-
- for (;;) {
- if (low + 1 == high) {
- *keyOfsInOut = (direction > 0) ? high : low;
- DiskLoc next = getKeyHeader(bucket, high).prevChildBucket;
- if (!next.isNull()) {
- bestParent = make_pair(*thisLocInOut, *keyOfsInOut);
- *thisLocInOut = next;
- return true;
- } else {
- return false;
- }
- }
-
- int middle = low + (high - low) / 2;
-
- int cmp = customBSONCmp(getFullKey(bucket, middle).data.toBson(), seekPoint, direction);
- if (cmp < 0) {
- low = middle;
- } else if (cmp > 0) {
- high = middle;
- } else {
- if (direction < 0) {
- low = middle;
- } else {
- high = middle;
- }
- }
- }
-}
-
-/**
- * NOTE: Currently the Ordering implementation assumes a compound index will not have more keys
- * than an unsigned variable has bits. The same assumption is used in the implementation below
- * with respect to the 'mask' variable.
- *
- * 'l' is a regular bsonobj
- *
- * 'rBegin' is composed partly of an existing bsonobj, and the remaining keys are taken from a
- * vector of elements that frequently changes
- *
- * see https://jira.mongodb.org/browse/SERVER-371
- */
-// static
-template <class BtreeLayout>
-int BtreeLogic<BtreeLayout>::customBSONCmp(const BSONObj& left,
- const IndexSeekPoint& right,
- int direction) const {
- // XXX: make this readable
- dassert(right.keySuffix.size() == right.suffixInclusive.size());
-
- BSONObjIterator ll(left);
- BSONObjIterator rr(right.keyPrefix);
- unsigned mask = 1;
- size_t i = 0;
- for (; i < size_t(right.prefixLen); ++i, mask <<= 1) {
- BSONElement lll = ll.next();
- BSONElement rrr = rr.next();
-
- int x = lll.woCompare(rrr, false);
- if (_ordering.descending(mask))
- x = -x;
- if (x != 0)
- return x;
- }
- if (right.prefixExclusive) {
- return -direction;
- }
- for (; i < right.keySuffix.size(); ++i, mask <<= 1) {
- if (!ll.more())
- return -direction;
-
- BSONElement lll = ll.next();
- BSONElement rrr = *right.keySuffix[i];
- int x = lll.woCompare(rrr, false);
- if (_ordering.descending(mask))
- x = -x;
- if (x != 0)
- return x;
- if (!right.suffixInclusive[i]) {
- return -direction;
- }
- }
- return ll.more() ? direction : 0;
-}
-
-template <class BtreeLayout>
-bool BtreeLogic<BtreeLayout>::exists(OperationContext* opCtx, const KeyDataType& key) const {
- int position = 0;
-
- // Find the DiskLoc
- bool found;
-
- DiskLoc bucket = _locate(opCtx, getRootLoc(opCtx), key, &position, &found, DiskLoc::min(), 1);
-
- while (!bucket.isNull()) {
- FullKey fullKey = getFullKey(getBucket(opCtx, bucket), position);
- if (fullKey.header.isUsed()) {
- return fullKey.data.woEqual(key);
- }
- bucket = advance(opCtx, bucket, &position, 1);
- }
-
- return false;
-}
-
-template <class BtreeLayout>
-Status BtreeLogic<BtreeLayout>::dupKeyCheck(OperationContext* opCtx,
- const BSONObj& key,
- const DiskLoc& loc) const {
- KeyDataOwnedType theKey(key);
- if (!wouldCreateDup(opCtx, theKey, loc)) {
- return Status::OK();
- }
-
- return Status(ErrorCodes::DuplicateKey, dupKeyError(theKey));
-}
-
-template <class BtreeLayout>
-bool BtreeLogic<BtreeLayout>::wouldCreateDup(OperationContext* opCtx,
- const KeyDataType& key,
- const DiskLoc self) const {
- int position;
- bool found;
-
- DiskLoc posLoc = _locate(opCtx, getRootLoc(opCtx), key, &position, &found, DiskLoc::min(), 1);
-
- while (!posLoc.isNull()) {
- FullKey fullKey = getFullKey(getBucket(opCtx, posLoc), position);
- if (fullKey.header.isUsed()) {
- // TODO: we may not need fullKey.data until we know fullKey.header.isUsed() here
- // and elsewhere.
- if (fullKey.data.woEqual(key)) {
- return fullKey.recordLoc != self;
- }
- break;
- }
-
- posLoc = advance(opCtx, posLoc, &position, 1);
- }
- return false;
-}
-
-template <class BtreeLayout>
-string BtreeLogic<BtreeLayout>::dupKeyError(const KeyDataType& key) const {
- stringstream ss;
- ss << "E11000 duplicate key error ";
- ss << "index: " << _indexName << " ";
- ss << "dup key: " << key.toString();
- return ss.str();
-}
-
-/**
- * Find a key within this btree bucket.
- *
- * When duplicate keys are allowed, we use the DiskLoc of the record as if it were part of the
- * key. That assures that even when there are many duplicates (e.g., 1 million) for a key, our
- * performance is still good.
- *
- * assertIfDup: if the key exists (ignoring the recordLoc), uassert
- *
- * pos: for existing keys k0...kn-1.
- * returns # it goes BEFORE. so key[pos-1] < key < key[pos]
- * returns n if it goes after the last existing key.
- * note result might be an Unused location!
- */
-template <class BtreeLayout>
-Status BtreeLogic<BtreeLayout>::_find(OperationContext* opCtx,
- BucketType* bucket,
- const KeyDataType& key,
- const DiskLoc& recordLoc,
- bool errorIfDup,
- int* keyPositionOut,
- bool* foundOut) const {
- // XXX: fix the ctor for DiskLoc56bit so we can just convert w/o assignment operator
- LocType genericRecordLoc;
- genericRecordLoc = recordLoc;
-
- bool dupsCheckedYet = false;
-
- int low = 0;
- int high = bucket->n - 1;
- int middle = (low + high) / 2;
-
- while (low <= high) {
- FullKey fullKey = getFullKey(bucket, middle);
- int cmp = key.woCompare(fullKey.data, _ordering);
-
- // The key data is the same.
- if (0 == cmp) {
- // Found the key in this bucket. If we're checking for dups...
- if (errorIfDup) {
- if (fullKey.header.isUnused()) {
- // It's ok that the key is there if it is unused. We need to check that
- // there aren't other entries for the key then. as it is very rare that
- // we get here, we don't put any coding effort in here to make this
- // particularly fast
- if (!dupsCheckedYet) {
- // This is expensive and we only want to do it once(? -- when would
- // it happen twice).
- dupsCheckedYet = true;
- if (exists(opCtx, key)) {
- if (wouldCreateDup(opCtx, key, genericRecordLoc)) {
- return Status(ErrorCodes::DuplicateKey, dupKeyError(key));
- } else {
- return Status(ErrorCodes::DuplicateKeyValue,
- "key/value already in index");
- }
- }
- }
- } else {
- if (fullKey.recordLoc == recordLoc) {
- return Status(ErrorCodes::DuplicateKeyValue, "key/value already in index");
- } else {
- return Status(ErrorCodes::DuplicateKey, dupKeyError(key));
- }
- }
- }
-
- // If we're here dup keys are allowed, or the key is a dup but unused.
- LocType recordLocCopy = fullKey.recordLoc;
-
- // We clear this bit so we can test equality without the used bit messing us up.
- // XXX: document this
- // XXX: kill this GETOFS stuff
- recordLocCopy.GETOFS() &= ~1;
-
- // Set 'cmp' to the comparison w/the DiskLoc and fall through below.
- cmp = recordLoc.compare(recordLocCopy);
- }
-
- if (cmp < 0) {
- high = middle - 1;
- } else if (cmp > 0) {
- low = middle + 1;
- } else {
- // Found it!
- *keyPositionOut = middle;
- *foundOut = true;
- return Status::OK();
- }
-
- middle = (low + high) / 2;
- }
-
- // Not found.
- *keyPositionOut = low;
-
- // Some debugging checks.
- if (low != bucket->n) {
- invariant(key.woCompare(getFullKey(bucket, low).data, _ordering) <= 0);
-
- if (low > 0) {
- if (getFullKey(bucket, low - 1).data.woCompare(key, _ordering) > 0) {
- DEV {
- log() << key.toString() << endl;
- log() << getFullKey(bucket, low - 1).data.toString() << endl;
- }
- MONGO_UNREACHABLE;
- }
- }
- }
-
- *foundOut = false;
- return Status::OK();
-}
-
-template <class BtreeLayout>
-void BtreeLogic<BtreeLayout>::delBucket(OperationContext* opCtx,
- BucketType* bucket,
- const DiskLoc bucketLoc) {
- invariant(bucketLoc != getRootLoc(opCtx));
-
- _cursorRegistry->invalidateCursorsForBucket(bucketLoc);
-
- BucketType* p = getBucket(opCtx, bucket->parent);
- int parentIdx = indexInParent(opCtx, bucket, bucketLoc);
- *opCtx->recoveryUnit()->writing(&childLocForPos(p, parentIdx)) = DiskLoc();
- deallocBucket(opCtx, bucket, bucketLoc);
-}
-
-template <class BtreeLayout>
-void BtreeLogic<BtreeLayout>::deallocBucket(OperationContext* opCtx,
- BucketType* bucket,
- const DiskLoc bucketLoc) {
- bucket->n = BtreeLayout::INVALID_N_SENTINEL;
- bucket->parent.Null();
- _recordStore->deleteRecord(opCtx, bucketLoc.toRecordId());
-}
-
-template <class BtreeLayout>
-void BtreeLogic<BtreeLayout>::restorePosition(OperationContext* opCtx,
- const BSONObj& savedKey,
- const DiskLoc& savedLoc,
- int direction,
- DiskLoc* bucketLocInOut,
- int* keyOffsetInOut) const {
- // The caller has to ensure validity of the saved cursor using the SavedCursorRegistry
- BucketType* bucket = getBucket(opCtx, *bucketLocInOut);
- invariant(bucket);
- invariant(BtreeLayout::INVALID_N_SENTINEL != bucket->n);
-
- if (_keyIsAt(savedKey, savedLoc, bucket, *keyOffsetInOut)) {
- skipUnusedKeys(opCtx, bucketLocInOut, keyOffsetInOut, direction);
- return;
- }
-
- if (*keyOffsetInOut > 0) {
- (*keyOffsetInOut)--;
- if (_keyIsAt(savedKey, savedLoc, bucket, *keyOffsetInOut)) {
- skipUnusedKeys(opCtx, bucketLocInOut, keyOffsetInOut, direction);
- return;
- }
- }
-
- locate(opCtx, savedKey, savedLoc, direction, keyOffsetInOut, bucketLocInOut);
-}
-
-template <class BtreeLayout>
-bool BtreeLogic<BtreeLayout>::_keyIsAt(const BSONObj& savedKey,
- const DiskLoc& savedLoc,
- BucketType* bucket,
- int keyPos) const {
- if (keyPos >= bucket->n) {
- return false;
- }
-
- FullKey key = getFullKey(bucket, keyPos);
- if (!key.data.toBson().binaryEqual(savedKey)) {
- return false;
- }
- return key.header.recordLoc == savedLoc;
-}
-
-/**
- * May delete the bucket 'bucket' rendering 'bucketLoc' invalid.
- */
-template <class BtreeLayout>
-void BtreeLogic<BtreeLayout>::delKeyAtPos(OperationContext* opCtx,
- BucketType* bucket,
- const DiskLoc bucketLoc,
- int p) {
- invariant(bucket->n > 0);
- DiskLoc left = childLocForPos(bucket, p);
- if (bucket->n == 1) {
- if (left.isNull() && bucket->nextChild.isNull()) {
- _delKeyAtPos(bucket, p);
- if (isHead(bucket)) {
- // we don't delete the top bucket ever
- } else {
- if (!mayBalanceWithNeighbors(opCtx, bucket, bucketLoc)) {
- // An empty bucket is only allowed as a txnient state. If
- // there are no neighbors to balance with, we delete ourself.
- // This condition is only expected in legacy btrees.
- delBucket(opCtx, bucket, bucketLoc);
- }
- }
- return;
- }
- deleteInternalKey(opCtx, bucket, bucketLoc, p);
- return;
- }
-
- if (left.isNull()) {
- _delKeyAtPos(bucket, p);
- mayBalanceWithNeighbors(opCtx, bucket, bucketLoc);
- } else {
- deleteInternalKey(opCtx, bucket, bucketLoc, p);
- }
-}
-
-/**
- * This function replaces the specified key (k) by either the prev or next key in the btree
- * (k'). We require that k have either a left or right child. If k has a left child, we set k'
- * to the prev key of k, which must be a leaf present in the left child. If k does not have a
- * left child, we set k' to the next key of k, which must be a leaf present in the right child.
- * When we replace k with k', we copy k' over k (which may cause a split) and then remove k'
- * from its original location. Because k' is stored in a descendent of k, replacing k by k'
- * will not modify the storage location of the original k', and we can easily remove k' from its
- * original location.
- *
- * This function is only needed in cases where k has a left or right child; in other cases a
- * simpler key removal implementation is possible.
- *
- * NOTE on noncompliant BtreeBuilder btrees: It is possible (though likely rare) for btrees
- * created by BtreeBuilder to have k' that is not a leaf, see SERVER-2732. These cases are
- * handled in the same manner as described in the "legacy btree structures" note below.
- *
- * NOTE on legacy btree structures: In legacy btrees, k' can be a nonleaf. In such a case we
- * 'delete' k by marking it as an unused node rather than replacing it with k'. Also, k' may be
- * a leaf but marked as an unused node. In such a case we replace k by k', preserving the key's
- * unused marking. This function is only expected to mark a key as unused when handling a
- * legacy btree.
- */
-template <class BtreeLayout>
-void BtreeLogic<BtreeLayout>::deleteInternalKey(OperationContext* opCtx,
- BucketType* bucket,
- const DiskLoc bucketLoc,
- int keypos) {
- DiskLoc lchild = childLocForPos(bucket, keypos);
- DiskLoc rchild = childLocForPos(bucket, keypos + 1);
- invariant(!lchild.isNull() || !rchild.isNull());
- int advanceDirection = lchild.isNull() ? 1 : -1;
- int advanceKeyOfs = keypos;
- DiskLoc advanceLoc = advance(opCtx, bucketLoc, &advanceKeyOfs, advanceDirection);
- // advanceLoc must be a descentant of thisLoc, because thisLoc has a
- // child in the proper direction and all descendants of thisLoc must be
- // nonempty because they are not the root.
- BucketType* advanceBucket = getBucket(opCtx, advanceLoc);
-
- if (!childLocForPos(advanceBucket, advanceKeyOfs).isNull() ||
- !childLocForPos(advanceBucket, advanceKeyOfs + 1).isNull()) {
- markUnused(bucket, keypos);
- return;
- }
-
- FullKey kn = getFullKey(advanceBucket, advanceKeyOfs);
- // Because advanceLoc is a descendant of thisLoc, updating thisLoc will
- // not affect packing or keys of advanceLoc and kn will be stable
- // during the following setInternalKey()
- setInternalKey(opCtx,
- bucket,
- bucketLoc,
- keypos,
- kn.recordLoc,
- kn.data,
- childLocForPos(bucket, keypos),
- childLocForPos(bucket, keypos + 1));
- delKeyAtPos(opCtx, btreemod(opCtx, advanceBucket), advanceLoc, advanceKeyOfs);
-}
-
-template <class BtreeLayout>
-void BtreeLogic<BtreeLayout>::replaceWithNextChild(OperationContext* opCtx,
- BucketType* bucket,
- const DiskLoc bucketLoc) {
- invariant(bucket->n == 0 && !bucket->nextChild.isNull());
- if (bucket->parent.isNull()) {
- invariant(getRootLoc(opCtx) == bucketLoc);
- _headManager->setHead(opCtx, bucket->nextChild.toRecordId());
- } else {
- BucketType* parentBucket = getBucket(opCtx, bucket->parent);
- int bucketIndexInParent = indexInParent(opCtx, bucket, bucketLoc);
- *opCtx->recoveryUnit()->writing(&childLocForPos(parentBucket, bucketIndexInParent)) =
- bucket->nextChild;
- }
-
- *opCtx->recoveryUnit()->writing(&getBucket(opCtx, bucket->nextChild)->parent) = bucket->parent;
- _cursorRegistry->invalidateCursorsForBucket(bucketLoc);
- deallocBucket(opCtx, bucket, bucketLoc);
-}
-
-template <class BtreeLayout>
-bool BtreeLogic<BtreeLayout>::canMergeChildren(OperationContext* opCtx,
- BucketType* bucket,
- const DiskLoc bucketLoc,
- const int leftIndex) {
- invariant(leftIndex >= 0 && leftIndex < bucket->n);
-
- DiskLoc leftNodeLoc = childLocForPos(bucket, leftIndex);
- DiskLoc rightNodeLoc = childLocForPos(bucket, leftIndex + 1);
-
- if (leftNodeLoc.isNull() || rightNodeLoc.isNull()) {
- return false;
- }
-
- int pos = 0;
-
- BucketType* leftBucket = getBucket(opCtx, leftNodeLoc);
- BucketType* rightBucket = getBucket(opCtx, rightNodeLoc);
-
- int sum = BucketType::HeaderSize + _packedDataSize(leftBucket, pos) +
- _packedDataSize(rightBucket, pos) + getFullKey(bucket, leftIndex).data.dataSize() +
- sizeof(KeyHeaderType);
-
- return sum <= BtreeLayout::BucketSize;
-}
-
-/**
- * This implementation must respect the meaning and value of lowWaterMark. Also see comments in
- * splitPos().
- */
-template <class BtreeLayout>
-int BtreeLogic<BtreeLayout>::_rebalancedSeparatorPos(OperationContext* opCtx,
- BucketType* bucket,
- int leftIndex) {
- int split = -1;
- int rightSize = 0;
-
- const BucketType* l = childForPos(opCtx, bucket, leftIndex);
- const BucketType* r = childForPos(opCtx, bucket, leftIndex + 1);
-
- int KNS = sizeof(KeyHeaderType);
- int rightSizeLimit = (l->topSize + l->n * KNS + getFullKey(bucket, leftIndex).data.dataSize() +
- KNS + r->topSize + r->n * KNS) /
- 2;
-
- // This constraint should be ensured by only calling this function
- // if we go below the low water mark.
- invariant(rightSizeLimit < BtreeLayout::BucketBodySize);
-
- for (int i = r->n - 1; i > -1; --i) {
- rightSize += getFullKey(r, i).data.dataSize() + KNS;
- if (rightSize > rightSizeLimit) {
- split = l->n + 1 + i;
- break;
- }
- }
-
- if (split == -1) {
- rightSize += getFullKey(bucket, leftIndex).data.dataSize() + KNS;
- if (rightSize > rightSizeLimit) {
- split = l->n;
- }
- }
-
- if (split == -1) {
- for (int i = l->n - 1; i > -1; --i) {
- rightSize += getFullKey(l, i).data.dataSize() + KNS;
- if (rightSize > rightSizeLimit) {
- split = i;
- break;
- }
- }
- }
-
- // safeguards - we must not create an empty bucket
- if (split < 1) {
- split = 1;
- } else if (split > l->n + 1 + r->n - 2) {
- split = l->n + 1 + r->n - 2;
- }
-
- return split;
-}
-
-template <class BtreeLayout>
-void BtreeLogic<BtreeLayout>::doMergeChildren(OperationContext* opCtx,
- BucketType* bucket,
- const DiskLoc bucketLoc,
- int leftIndex) {
- DiskLoc leftNodeLoc = childLocForPos(bucket, leftIndex);
- DiskLoc rightNodeLoc = childLocForPos(bucket, leftIndex + 1);
-
- BucketType* l = btreemod(opCtx, getBucket(opCtx, leftNodeLoc));
- BucketType* r = btreemod(opCtx, getBucket(opCtx, rightNodeLoc));
-
- int pos = 0;
- _packReadyForMod(l, pos);
- _packReadyForMod(r, pos);
-
- // We know the additional keys below will fit in l because canMergeChildren() must be true.
- int oldLNum = l->n;
- // left child's right child becomes old parent key's left child
- FullKey knLeft = getFullKey(bucket, leftIndex);
- invariant(pushBack(l, knLeft.recordLoc, knLeft.data, l->nextChild));
-
- for (int i = 0; i < r->n; ++i) {
- FullKey kn = getFullKey(r, i);
- invariant(pushBack(l, kn.recordLoc, kn.data, kn.prevChildBucket));
- }
-
- l->nextChild = r->nextChild;
- fixParentPtrs(opCtx, l, leftNodeLoc, oldLNum);
- delBucket(opCtx, r, rightNodeLoc);
-
- childLocForPos(bucket, leftIndex + 1) = leftNodeLoc;
- childLocForPos(bucket, leftIndex) = DiskLoc();
- _delKeyAtPos(bucket, leftIndex, true);
-
- if (bucket->n == 0) {
- // Will trash bucket and bucketLoc.
- //
- // TODO To ensure all leaves are of equal height, we should ensure this is only called
- // on the root.
- replaceWithNextChild(opCtx, bucket, bucketLoc);
- } else {
- mayBalanceWithNeighbors(opCtx, bucket, bucketLoc);
- }
-}
-
-template <class BtreeLayout>
-int BtreeLogic<BtreeLayout>::indexInParent(OperationContext* opCtx,
- BucketType* bucket,
- const DiskLoc bucketLoc) const {
- invariant(!bucket->parent.isNull());
- const BucketType* p = getBucket(opCtx, bucket->parent);
- if (p->nextChild == bucketLoc) {
- return p->n;
- }
-
- for (int i = 0; i < p->n; ++i) {
- if (getKeyHeader(p, i).prevChildBucket == bucketLoc) {
- return i;
- }
- }
-
- log() << "ERROR: can't find ref to child bucket.\n";
- log() << "child: " << bucketLoc << "\n";
- // dump();
- log() << "Parent: " << bucket->parent << "\n";
- // p->dump();
- MONGO_UNREACHABLE;
-}
-
-template <class BtreeLayout>
-bool BtreeLogic<BtreeLayout>::tryBalanceChildren(OperationContext* opCtx,
- BucketType* bucket,
- const DiskLoc bucketLoc,
- int leftIndex) {
- // If we can merge, then we must merge rather than balance to preserve bucket utilization
- // constraints.
- if (canMergeChildren(opCtx, bucket, bucketLoc, leftIndex)) {
- return false;
- }
-
- doBalanceChildren(opCtx, btreemod(opCtx, bucket), bucketLoc, leftIndex);
- return true;
-}
-
-template <class BtreeLayout>
-void BtreeLogic<BtreeLayout>::doBalanceLeftToRight(OperationContext* opCtx,
- BucketType* bucket,
- const DiskLoc bucketLoc,
- int leftIndex,
- int split,
- BucketType* l,
- const DiskLoc lchild,
- BucketType* r,
- const DiskLoc rchild) {
- // TODO maybe do some audits the same way pushBack() does? As a precondition, rchild + the
- // old separator are <= half a body size, and lchild is at most completely full. Based on
- // the value of split, rchild will get <= half of the total bytes which is at most 75% of a
- // full body. So rchild will have room for the following keys:
- int rAdd = l->n - split;
- reserveKeysFront(r, rAdd);
-
- for (int i = split + 1, j = 0; i < l->n; ++i, ++j) {
- FullKey kn = getFullKey(l, i);
- setKey(r, j, kn.recordLoc, kn.data, kn.prevChildBucket);
- }
-
- FullKey leftIndexKN = getFullKey(bucket, leftIndex);
- setKey(r, rAdd - 1, leftIndexKN.recordLoc, leftIndexKN.data, l->nextChild);
-
- fixParentPtrs(opCtx, r, rchild, 0, rAdd - 1);
-
- FullKey kn = getFullKey(l, split);
- l->nextChild = kn.prevChildBucket;
-
- // Because lchild is a descendant of thisLoc, updating thisLoc will not affect packing or
- // keys of lchild and kn will be stable during the following setInternalKey()
- setInternalKey(opCtx, bucket, bucketLoc, leftIndex, kn.recordLoc, kn.data, lchild, rchild);
-
- // lchild and rchild cannot be merged, so there must be >0 (actually more) keys to the left
- // of split.
- int zeropos = 0;
- truncateTo(l, split, zeropos);
-}
-
-template <class BtreeLayout>
-void BtreeLogic<BtreeLayout>::doBalanceRightToLeft(OperationContext* opCtx,
- BucketType* bucket,
- const DiskLoc bucketLoc,
- int leftIndex,
- int split,
- BucketType* l,
- const DiskLoc lchild,
- BucketType* r,
- const DiskLoc rchild) {
- // As a precondition, lchild + the old separator are <= half a body size,
- // and rchild is at most completely full. Based on the value of split,
- // lchild will get less than half of the total bytes which is at most 75%
- // of a full body. So lchild will have room for the following keys:
- int lN = l->n;
-
- {
- // left child's right child becomes old parent key's left child
- FullKey kn = getFullKey(bucket, leftIndex);
- invariant(pushBack(l, kn.recordLoc, kn.data, l->nextChild));
- }
-
- for (int i = 0; i < split - lN - 1; ++i) {
- FullKey kn = getFullKey(r, i);
- invariant(pushBack(l, kn.recordLoc, kn.data, kn.prevChildBucket));
- }
-
- {
- FullKey kn = getFullKey(r, split - lN - 1);
- l->nextChild = kn.prevChildBucket;
- // Child lN was lchild's old nextChild, and don't need to fix that one.
- fixParentPtrs(opCtx, l, lchild, lN + 1, l->n);
- // Because rchild is a descendant of thisLoc, updating thisLoc will
- // not affect packing or keys of rchild and kn will be stable
- // during the following setInternalKey()
- setInternalKey(opCtx, bucket, bucketLoc, leftIndex, kn.recordLoc, kn.data, lchild, rchild);
- }
-
- // lchild and rchild cannot be merged, so there must be >0 (actually more)
- // keys to the right of split.
- int zeropos = 0;
- dropFront(r, split - lN, zeropos);
-}
-
-template <class BtreeLayout>
-void BtreeLogic<BtreeLayout>::doBalanceChildren(OperationContext* opCtx,
- BucketType* bucket,
- const DiskLoc bucketLoc,
- int leftIndex) {
- DiskLoc lchild = childLocForPos(bucket, leftIndex);
- DiskLoc rchild = childLocForPos(bucket, leftIndex + 1);
-
- int zeropos = 0;
- BucketType* l = btreemod(opCtx, getBucket(opCtx, lchild));
- _packReadyForMod(l, zeropos);
-
- BucketType* r = btreemod(opCtx, getBucket(opCtx, rchild));
- _packReadyForMod(r, zeropos);
-
- int split = _rebalancedSeparatorPos(opCtx, bucket, leftIndex);
-
- // By definition, if we are below the low water mark and cannot merge
- // then we must actively balance.
- invariant(split != l->n);
- if (split < l->n) {
- doBalanceLeftToRight(opCtx, bucket, bucketLoc, leftIndex, split, l, lchild, r, rchild);
- } else {
- doBalanceRightToLeft(opCtx, bucket, bucketLoc, leftIndex, split, l, lchild, r, rchild);
- }
-}
-
-template <class BtreeLayout>
-bool BtreeLogic<BtreeLayout>::mayBalanceWithNeighbors(OperationContext* opCtx,
- BucketType* bucket,
- const DiskLoc bucketLoc) {
- if (bucket->parent.isNull()) {
- return false;
- }
-
- if (_packedDataSize(bucket, 0) >= lowWaterMark()) {
- return false;
- }
-
- BucketType* p = getBucket(opCtx, bucket->parent);
- int parentIdx = indexInParent(opCtx, bucket, bucketLoc);
-
- // TODO will missing neighbor case be possible long term? Should we try to merge/balance
- // somehow in that case if so?
- bool mayBalanceRight = (parentIdx < p->n) && !childLocForPos(p, parentIdx + 1).isNull();
- bool mayBalanceLeft = (parentIdx > 0) && !childLocForPos(p, parentIdx - 1).isNull();
-
- // Balance if possible on one side - we merge only if absolutely necessary to preserve btree
- // bucket utilization constraints since that's a more heavy duty operation (especially if we
- // must re-split later).
- if (mayBalanceRight && tryBalanceChildren(opCtx, p, bucket->parent, parentIdx)) {
- return true;
- }
-
- if (mayBalanceLeft && tryBalanceChildren(opCtx, p, bucket->parent, parentIdx - 1)) {
- return true;
- }
-
- BucketType* pm = btreemod(opCtx, getBucket(opCtx, bucket->parent));
- if (mayBalanceRight) {
- doMergeChildren(opCtx, pm, bucket->parent, parentIdx);
- return true;
- } else if (mayBalanceLeft) {
- doMergeChildren(opCtx, pm, bucket->parent, parentIdx - 1);
- return true;
- }
-
- return false;
-}
-
-template <class BtreeLayout>
-bool BtreeLogic<BtreeLayout>::unindex(OperationContext* opCtx,
- const BSONObj& key,
- const DiskLoc& recordLoc) {
- int pos;
- bool found = false;
- KeyDataOwnedType ownedKey(key);
-
- DiskLoc loc = _locate(opCtx, getRootLoc(opCtx), ownedKey, &pos, &found, recordLoc, 1);
- if (found) {
- BucketType* bucket = btreemod(opCtx, getBucket(opCtx, loc));
- delKeyAtPos(opCtx, bucket, loc, pos);
- assertValid(_indexName, getRoot(opCtx), _ordering);
- }
- return found;
-}
-
-template <class BtreeLayout>
-bool BtreeLogic<BtreeLayout>::isEmpty(OperationContext* opCtx) const {
- return getRoot(opCtx)->n == 0;
-}
-
-/**
- * This can cause a lot of additional page writes when we assign buckets to different parents.
- * Maybe get rid of parent ptrs?
- */
-template <class BtreeLayout>
-void BtreeLogic<BtreeLayout>::fixParentPtrs(OperationContext* opCtx,
- BucketType* bucket,
- const DiskLoc bucketLoc,
- int firstIndex,
- int lastIndex) {
- invariant(getBucket(opCtx, bucketLoc) == bucket);
-
- if (lastIndex == -1) {
- lastIndex = bucket->n;
- }
-
- for (int i = firstIndex; i <= lastIndex; i++) {
- const DiskLoc childLoc = childLocForPos(bucket, i);
- if (!childLoc.isNull()) {
- *opCtx->recoveryUnit()->writing(&getBucket(opCtx, childLoc)->parent) = bucketLoc;
- }
- }
-}
-
-template <class BtreeLayout>
-void BtreeLogic<BtreeLayout>::setInternalKey(OperationContext* opCtx,
- BucketType* bucket,
- const DiskLoc bucketLoc,
- int keypos,
- const DiskLoc recordLoc,
- const KeyDataType& key,
- const DiskLoc lchild,
- const DiskLoc rchild) {
- childLocForPos(bucket, keypos).Null();
- // This may leave the bucket empty (n == 0) which is ok only as a txnient state. In the
- // instant case, the implementation of insertHere behaves correctly when n == 0 and as a
- // side effect increments n.
- _delKeyAtPos(bucket, keypos, true);
-
- // Ensure we do not orphan neighbor's old child.
- invariant(childLocForPos(bucket, keypos) == rchild);
-
- // Just set temporarily - required to pass validation in insertHere()
- childLocForPos(bucket, keypos) = lchild;
-
- insertHere(opCtx, bucketLoc, keypos, key, recordLoc, lchild, rchild);
-}
-
-/**
- * insert a key in this bucket, splitting if necessary.
- *
- * @keypos - where to insert the key in range 0..n. 0=make leftmost, n=make rightmost. NOTE
- * this function may free some data, and as a result the value passed for keypos may be invalid
- * after calling insertHere()
- *
- * Some of the write intent signaling below relies on the implementation of the optimized write
- * intent code in basicInsert().
- */
-template <class BtreeLayout>
-void BtreeLogic<BtreeLayout>::insertHere(OperationContext* opCtx,
- const DiskLoc bucketLoc,
- int pos,
- const KeyDataType& key,
- const DiskLoc recordLoc,
- const DiskLoc leftChildLoc,
- const DiskLoc rightChildLoc) {
- BucketType* bucket = getBucket(opCtx, bucketLoc);
-
- if (!basicInsert(opCtx, bucket, bucketLoc, pos, key, recordLoc)) {
- // If basicInsert() fails, the bucket will be packed as required by split().
- split(opCtx,
- btreemod(opCtx, bucket),
- bucketLoc,
- pos,
- recordLoc,
- key,
- leftChildLoc,
- rightChildLoc);
- return;
- }
-
- KeyHeaderType* kn = &getKeyHeader(bucket, pos);
- if (pos + 1 == bucket->n) {
- // It's the last key.
- if (bucket->nextChild != leftChildLoc) {
- // XXX log more
- MONGO_UNREACHABLE;
- }
- kn->prevChildBucket = bucket->nextChild;
- invariant(kn->prevChildBucket == leftChildLoc);
- *opCtx->recoveryUnit()->writing(&bucket->nextChild) = rightChildLoc;
- if (!rightChildLoc.isNull()) {
- *opCtx->recoveryUnit()->writing(&getBucket(opCtx, rightChildLoc)->parent) = bucketLoc;
- }
- } else {
- kn->prevChildBucket = leftChildLoc;
- if (getKeyHeader(bucket, pos + 1).prevChildBucket != leftChildLoc) {
- // XXX: log more
- MONGO_UNREACHABLE;
- }
- const LocType* pc = &getKeyHeader(bucket, pos + 1).prevChildBucket;
- // Intent declared in basicInsert()
- *const_cast<LocType*>(pc) = rightChildLoc;
- if (!rightChildLoc.isNull()) {
- *opCtx->recoveryUnit()->writing(&getBucket(opCtx, rightChildLoc)->parent) = bucketLoc;
- }
- }
-}
-
-template <class BtreeLayout>
-void BtreeLogic<BtreeLayout>::split(OperationContext* opCtx,
- BucketType* bucket,
- const DiskLoc bucketLoc,
- int keypos,
- const DiskLoc recordLoc,
- const KeyDataType& key,
- const DiskLoc lchild,
- const DiskLoc rchild) {
- int split = splitPos(bucket, keypos);
- DiskLoc rLoc = _addBucket(opCtx);
- BucketType* r = btreemod(opCtx, getBucket(opCtx, rLoc));
-
- for (int i = split + 1; i < bucket->n; i++) {
- FullKey kn = getFullKey(bucket, i);
- invariant(pushBack(r, kn.recordLoc, kn.data, kn.prevChildBucket));
- }
- r->nextChild = bucket->nextChild;
- assertValid(_indexName, r, _ordering);
-
- r = NULL;
- fixParentPtrs(opCtx, getBucket(opCtx, rLoc), rLoc);
-
- FullKey splitkey = getFullKey(bucket, split);
- // splitkey key gets promoted, its children will be thisLoc (l) and rLoc (r)
- bucket->nextChild = splitkey.prevChildBucket;
-
- // Because thisLoc is a descendant of parent, updating parent will not affect packing or
- // keys of thisLoc and splitkey will be stable during the following:
-
- if (bucket->parent.isNull()) {
- // promote splitkey to a parent this->node make a new parent if we were the root
- DiskLoc L = _addBucket(opCtx);
- BucketType* p = btreemod(opCtx, getBucket(opCtx, L));
- invariant(pushBack(p, splitkey.recordLoc, splitkey.data, bucketLoc));
- p->nextChild = rLoc;
- assertValid(_indexName, p, _ordering);
- bucket->parent = L;
- _headManager->setHead(opCtx, L.toRecordId());
- *opCtx->recoveryUnit()->writing(&getBucket(opCtx, rLoc)->parent) = bucket->parent;
- } else {
- // set this before calling _insert - if it splits it will do fixParent() logic and
- // change the value.
- *opCtx->recoveryUnit()->writing(&getBucket(opCtx, rLoc)->parent) = bucket->parent;
- _insert(opCtx,
- getBucket(opCtx, bucket->parent),
- bucket->parent,
- splitkey.data,
- splitkey.recordLoc,
- true, // dupsallowed
- bucketLoc,
- rLoc)
- .transitional_ignore();
- }
-
- int newpos = keypos;
- // note this may trash splitkey.key. thus we had to promote it before finishing up here.
- truncateTo(bucket, split, newpos);
-
- // add our this->new key, there is room this->now
- if (keypos <= split) {
- insertHere(opCtx, bucketLoc, newpos, key, recordLoc, lchild, rchild);
- } else {
- int kp = keypos - split - 1;
- invariant(kp >= 0);
- insertHere(opCtx, rLoc, kp, key, recordLoc, lchild, rchild);
- }
-}
-
-class DummyDocWriter final : public DocWriter {
-public:
- DummyDocWriter(size_t sz) : _sz(sz) {}
- virtual void writeDocument(char* buf) const { /* no-op */
- }
- virtual size_t documentSize() const {
- return _sz;
- }
-
-private:
- size_t _sz;
-};
-
-template <class BtreeLayout>
-Status BtreeLogic<BtreeLayout>::initAsEmpty(OperationContext* opCtx) {
- if (!_headManager->getHead(opCtx).isNull()) {
- return Status(ErrorCodes::InternalError, "index already initialized");
- }
-
- _headManager->setHead(opCtx, _addBucket(opCtx).toRecordId());
- return Status::OK();
-}
-
-template <class BtreeLayout>
-DiskLoc BtreeLogic<BtreeLayout>::_addBucket(OperationContext* opCtx) {
- DummyDocWriter docWriter(BtreeLayout::BucketSize);
- StatusWith<RecordId> loc =
- _recordStore->insertRecordWithDocWriter(opCtx, &docWriter, Timestamp());
- // XXX: remove this(?) or turn into massert or sanely bubble it back up.
- uassertStatusOK(loc.getStatus());
-
- // this is a new bucket, not referenced by anyone, probably don't need this lock
- BucketType* b = btreemod(opCtx, getBucket(opCtx, loc.getValue()));
- init(b);
- return DiskLoc::fromRecordId(loc.getValue());
-}
-
-// static
-template <class BtreeLayout>
-void BtreeLogic<BtreeLayout>::dumpBucket(const BucketType* bucket, int indentLength) {
- log() << "BUCKET n:" << bucket->n << ", parent:" << hex << bucket->parent.getOfs() << dec;
-
- const string indent = string(indentLength, ' ');
-
- for (int i = 0; i < bucket->n; i++) {
- FullKey k = getFullKey(bucket, i);
- string ks = k.data.toString();
- log() << indent << " " << hex << k.prevChildBucket.getOfs() << "<-- prevChildBucket for "
- << i;
- log() << indent << " " << i << ' ' << redact(ks.substr(0, 60))
- << " Loc:" << k.recordLoc.toString() << dec;
- if (getKeyHeader(bucket, i).isUnused()) {
- log() << " UNUSED";
- }
- }
-
- log() << indent << " " << hex << bucket->nextChild.getOfs() << dec << "<-- nextChild bucket";
-}
-
-template <class BtreeLayout>
-DiskLoc BtreeLogic<BtreeLayout>::getDiskLoc(OperationContext* opCtx,
- const DiskLoc& bucketLoc,
- const int keyOffset) const {
- invariant(!bucketLoc.isNull());
- BucketType* bucket = getBucket(opCtx, bucketLoc);
- return getKeyHeader(bucket, keyOffset).recordLoc;
-}
-
-template <class BtreeLayout>
-BSONObj BtreeLogic<BtreeLayout>::getKey(OperationContext* opCtx,
- const DiskLoc& bucketLoc,
- const int keyOffset) const {
- invariant(!bucketLoc.isNull());
- BucketType* bucket = getBucket(opCtx, bucketLoc);
- int n = bucket->n;
- invariant(n != BtreeLayout::INVALID_N_SENTINEL);
- invariant(n >= 0);
- invariant(n < 10000);
- invariant(n != 0xffff);
-
- invariant(keyOffset >= 0);
- invariant(keyOffset < n);
-
- // XXX: should we really return an empty obj if keyOffset>=n?
- if (keyOffset >= n) {
- return BSONObj();
- } else {
- return getFullKey(bucket, keyOffset).data.toBson();
- }
-}
-
-template <class BtreeLayout>
-IndexKeyEntry BtreeLogic<BtreeLayout>::getRandomEntry(OperationContext* opCtx) const {
- // To ensure a uniform distribution, all keys must have an equal probability of being selected.
- // Specifically, a key from the root should have the same probability of being selected as a key
- // from a leaf.
- //
- // Here we do a random walk until we get to a leaf, storing a random key from each bucket along
- // the way down. Because the root is always present in the random walk, but any given leaf would
- // seldom be seen, we assign weights to each key such that the key from the leaf is much more
- // likely to be selected than the key from the root. These weights attempt to ensure each entry
- // is equally likely to be selected and avoid bias towards the entries closer to the root.
- //
- // As a simplification, we treat all buckets in a given level as having the same number of
- // children. While this is inaccurate if the tree isn't perfectly balanced or if key-size
- // greatly varies, it is assumed to be good enough for this purpose.
- invariant(!isEmpty(opCtx));
- BucketType* root = getRoot(opCtx);
-
- vector<int64_t> nKeysInLevel;
- vector<FullKey> selectedKeys;
-
- auto& prng = opCtx->getClient()->getPrng();
-
- int nRetries = 0;
- const int kMaxRetries = 5;
- do {
- // See documentation below for description of parameters.
- recordRandomWalk(opCtx, &prng, root, 1, &nKeysInLevel, &selectedKeys);
- } while (selectedKeys.empty() && nRetries++ < kMaxRetries);
- massert(28826,
- str::stream() << "index " << _indexName << " may be corrupt, please repair",
- !selectedKeys.empty());
-
- invariant(nKeysInLevel.size() == selectedKeys.size());
- // Select a key from the random walk such that each key from the B-tree has an equal probability
- // of being selected.
- //
- // Let N be the sum of 'nKeysInLevel'. That is, the total number of keys in the B-tree.
- //
- // On our walk down the tree, we selected exactly one key from each level of the B-tree, where
- // 'selectedKeys[i]' came from the ith level of the tree. On any given level, each key has an
- // equal probability of being selected. Specifically, a key on level i has a probability of
- // 1/'nKeysInLevel[i]' of being selected as 'selectedKeys[i]'. Then if, given our selected keys,
- // we choose to return 'selectedKeys[i]' with a probability of 'nKeysInLevel[i]'/N, that key
- // will be returned with a probability of 1/'nKeysInLevel[i]' * 'nKeysInLevel[i]'/N = 1/N.
- //
- // So 'selectedKeys[i]' should have a probability of 'nKeysInLevel[i]'/N of being returned. We
- // will do so by picking a random number X in the range [0, N). Then, if X is in the first
- // 'nKeysInLevel[0]' numbers, we will return 'selectedKeys[0]'. If X is in the next
- // 'nKeysInLevel[1]' numbers, we will return 'selectedKeys[1]', and so on.
- int64_t choice = prng.nextInt64(std::accumulate(nKeysInLevel.begin(), nKeysInLevel.end(), 0));
- for (size_t i = 0; i < nKeysInLevel.size(); i++) {
- if (choice < nKeysInLevel[i]) {
- return {selectedKeys[i].data.toBson(), selectedKeys[i].header.recordLoc.toRecordId()};
- }
- choice -= nKeysInLevel[i];
- }
- MONGO_UNREACHABLE;
-}
-
-/**
- * Does a random walk through the tree, recording information about the walk along the way.
- *
- * 'nKeysInLevel' will be filled in such that 'nKeysInLevel[i]' is an approximation of the number of
- * keys in the ith level of the B-tree.
- *
- * 'selectedKeys' will be filled in such that 'selectedKeys[i]' will be a pseudo-random key selected
- * from the bucket we went through on the ith level of the B-tree.
- */
-template <class BtreeLayout>
-void BtreeLogic<BtreeLayout>::recordRandomWalk(OperationContext* opCtx,
- PseudoRandom* prng,
- BucketType* curBucket,
- int64_t nBucketsInCurrentLevel,
- vector<int64_t>* nKeysInLevel,
- vector<FullKey>* selectedKeys) const {
- // Select a random key from this bucket, and record it.
- int nKeys = curBucket->n;
- int keyToReturn = prng->nextInt32(nKeys);
- auto fullKey = getFullKey(curBucket, keyToReturn);
- // If the key is not used, just skip this level.
- if (fullKey.header.isUsed()) {
- selectedKeys->push_back(std::move(fullKey));
- nKeysInLevel->push_back(nBucketsInCurrentLevel * nKeys);
- }
-
- // Select a random child and descend (if there are any).
- int nChildren = nKeys + 1;
- int nextChild = prng->nextInt32(nChildren);
- if (auto child = childForPos(opCtx, curBucket, nextChild)) {
- recordRandomWalk(
- opCtx, prng, child, nBucketsInCurrentLevel * nChildren, nKeysInLevel, selectedKeys);
- }
-}
-
-template <class BtreeLayout>
-Status BtreeLogic<BtreeLayout>::touch(OperationContext* opCtx) const {
- return _recordStore->touch(opCtx, NULL);
-}
-
-template <class BtreeLayout>
-long long BtreeLogic<BtreeLayout>::fullValidate(OperationContext* opCtx,
- long long* unusedCount,
- bool strict,
- bool dumpBuckets,
- unsigned depth) const {
- return _fullValidate(opCtx, getRootLoc(opCtx), unusedCount, strict, dumpBuckets, depth);
-}
-
-template <class BtreeLayout>
-long long BtreeLogic<BtreeLayout>::_fullValidate(OperationContext* opCtx,
- const DiskLoc bucketLoc,
- long long* unusedCount,
- bool strict,
- bool dumpBuckets,
- unsigned depth) const {
- BucketType* bucket = getBucket(opCtx, bucketLoc);
- assertValid(_indexName, bucket, _ordering, true);
-
- if (dumpBuckets) {
- log() << bucketLoc.toString() << ' ';
- dumpBucket(bucket, depth);
- }
-
- long long keyCount = 0;
-
- for (int i = 0; i < bucket->n; i++) {
- KeyHeaderType& kn = getKeyHeader(bucket, i);
-
- if (kn.isUsed()) {
- keyCount++;
- } else if (NULL != unusedCount) {
- ++(*unusedCount);
- }
-
- if (!kn.prevChildBucket.isNull()) {
- DiskLoc left = kn.prevChildBucket;
- BucketType* b = getBucket(opCtx, left);
-
- if (strict) {
- invariant(b->parent == bucketLoc);
- } else if (b->parent != bucketLoc) {
- warning() << "index corruption detected: b->parent != bucketLoc";
- }
-
- keyCount += _fullValidate(opCtx, left, unusedCount, strict, dumpBuckets, depth + 1);
- }
- }
-
- if (!bucket->nextChild.isNull()) {
- BucketType* b = getBucket(opCtx, bucket->nextChild);
- if (strict) {
- invariant(b->parent == bucketLoc);
- } else if (b->parent != bucketLoc) {
- warning() << "index corruption detected: b->parent != bucketLoc";
- }
-
- keyCount +=
- _fullValidate(opCtx, bucket->nextChild, unusedCount, strict, dumpBuckets, depth + 1);
- }
-
- return keyCount;
-}
-
-// XXX: remove this(?) used to not dump every key in assertValid.
-int nDumped = 0;
-
-// static
-template <class BtreeLayout>
-void BtreeLogic<BtreeLayout>::assertValid(const std::string& ns,
- BucketType* bucket,
- const Ordering& ordering,
- bool force) {
- if (!force) {
- return;
- }
-
- // this is very slow so don't do often
- {
- static int _k;
- if (++_k % 128) {
- return;
- }
- }
-
- DEV {
- // slow:
- for (int i = 0; i < bucket->n - 1; i++) {
- FullKey firstKey = getFullKey(bucket, i);
- FullKey secondKey = getFullKey(bucket, i + 1);
- int z = firstKey.data.woCompare(secondKey.data, ordering);
- if (z > 0) {
- log() << "ERROR: btree key order corrupt. Keys:" << endl;
- if (++nDumped < 5) {
- for (int j = 0; j < bucket->n; j++) {
- log() << " " << redact(getFullKey(bucket, j).data.toString()) << endl;
- }
- dumpBucket(bucket);
- }
- MONGO_UNREACHABLE;
- break;
- } else if (z == 0) {
- if (!(firstKey.header.recordLoc < secondKey.header.recordLoc)) {
- log() << "ERROR: btree key order corrupt (recordlocs wrong):" << endl;
- log() << " k(" << i << ")" << redact(firstKey.data.toString())
- << " RL:" << firstKey.header.recordLoc.toString() << endl;
- log() << " k(" << i + 1 << ")" << redact(secondKey.data.toString())
- << " RL:" << secondKey.header.recordLoc.toString() << endl;
- invariant(firstKey.header.recordLoc < secondKey.header.recordLoc);
- }
- }
- }
- }
- else {
- // faster:
- if (bucket->n > 1) {
- FullKey k1 = getFullKey(bucket, 0);
- FullKey k2 = getFullKey(bucket, bucket->n - 1);
- int z = k1.data.woCompare(k2.data, ordering);
- if (z > 0) {
- log() << "Btree keys out of order in collection " << ns;
- std::call_once(assertValidFlag, [&bucket]() { dumpBucket(bucket); });
- MONGO_UNREACHABLE;
- }
- }
- }
-}
-
-template <class BtreeLayout>
-Status BtreeLogic<BtreeLayout>::insert(OperationContext* opCtx,
- const BSONObj& rawKey,
- const DiskLoc& value,
- bool dupsAllowed) {
- KeyDataOwnedType key(rawKey);
-
- if (key.dataSize() > BtreeLayout::KeyMax) {
- string msg = str::stream() << "Btree::insert: key too large to index, failing "
- << _indexName << ' ' << key.dataSize() << ' ' << key.toString();
- return Status(ErrorCodes::KeyTooLong, msg);
- }
-
- Status status = _insert(
- opCtx, getRoot(opCtx), getRootLoc(opCtx), key, value, dupsAllowed, DiskLoc(), DiskLoc());
-
- assertValid(_indexName, getRoot(opCtx), _ordering);
- return status;
-}
-
-template <class BtreeLayout>
-Status BtreeLogic<BtreeLayout>::_insert(OperationContext* opCtx,
- BucketType* bucket,
- const DiskLoc bucketLoc,
- const KeyDataType& key,
- const DiskLoc recordLoc,
- bool dupsAllowed,
- const DiskLoc leftChild,
- const DiskLoc rightChild) {
- invariant(key.dataSize() > 0);
-
- int pos;
- bool found;
- Status findStatus = _find(opCtx, bucket, key, recordLoc, !dupsAllowed, &pos, &found);
- if (!findStatus.isOK()) {
- return findStatus;
- }
-
- if (found) {
- KeyHeaderType& header = getKeyHeader(bucket, pos);
- if (header.isUnused()) {
- LOG(4) << "btree _insert: reusing unused key" << endl;
- massert(17433, "_insert: reuse key but lchild is not null", leftChild.isNull());
- massert(17434, "_insert: reuse key but rchild is not null", rightChild.isNull());
- opCtx->recoveryUnit()->writing(&header)->setUsed();
- return Status::OK();
- }
- // The logic in _find() prohibits finding and returning a position if the 'used' bit
- // in the header is set and dups are disallowed.
- invariant(dupsAllowed);
-
- // The key and value are already in the index. Not an error because documents that have
- // already been indexed may be seen again due to updates during a background index scan.
- return Status::OK();
- }
-
- DiskLoc childLoc = childLocForPos(bucket, pos);
-
- // In current usage, rightChild is NULL for a new key and is not NULL when we are
- // promoting a split key. These are the only two cases where _insert() is called
- // currently.
- if (childLoc.isNull() || !rightChild.isNull()) {
- insertHere(opCtx, bucketLoc, pos, key, recordLoc, leftChild, rightChild);
- return Status::OK();
- } else {
- return _insert(opCtx,
- getBucket(opCtx, childLoc),
- childLoc,
- key,
- recordLoc,
- dupsAllowed,
- DiskLoc(),
- DiskLoc());
- }
-}
-
-template <class BtreeLayout>
-DiskLoc BtreeLogic<BtreeLayout>::advance(OperationContext* opCtx,
- const DiskLoc& bucketLoc,
- int* posInOut,
- int direction) const {
- BucketType* bucket = getBucket(opCtx, bucketLoc);
-
- if (*posInOut < 0 || *posInOut >= bucket->n) {
- log() << "ASSERT failure advancing btree bucket" << endl;
- log() << " thisLoc: " << bucketLoc.toString() << endl;
- log() << " keyOfs: " << *posInOut << " n:" << bucket->n << " direction: " << direction
- << endl;
- // log() << bucketSummary() << endl;
- MONGO_UNREACHABLE;
- }
-
- // XXX document
- int adj = direction < 0 ? 1 : 0;
- int ko = *posInOut + direction;
-
- // Look down if we need to.
- DiskLoc nextDownLoc = childLocForPos(bucket, ko + adj);
- BucketType* nextDown = getBucket(opCtx, nextDownLoc);
- if (NULL != nextDown) {
- for (;;) {
- if (direction > 0) {
- *posInOut = 0;
- } else {
- *posInOut = nextDown->n - 1;
- }
- DiskLoc newNextDownLoc = childLocForPos(nextDown, *posInOut + adj);
- BucketType* newNextDownBucket = getBucket(opCtx, newNextDownLoc);
- if (NULL == newNextDownBucket) {
- break;
- }
- nextDownLoc = newNextDownLoc;
- nextDown = newNextDownBucket;
- }
- return nextDownLoc;
- }
-
- // Looking down isn't the right choice, move forward.
- if (ko < bucket->n && ko >= 0) {
- *posInOut = ko;
- return bucketLoc;
- }
-
- // Hit the end of the bucket, move up and over.
- DiskLoc childLoc = bucketLoc;
- DiskLoc ancestor = getBucket(opCtx, bucketLoc)->parent;
- for (;;) {
- if (ancestor.isNull()) {
- break;
- }
- BucketType* an = getBucket(opCtx, ancestor);
- for (int i = 0; i < an->n; i++) {
- if (childLocForPos(an, i + adj) == childLoc) {
- *posInOut = i;
- return ancestor;
- }
- }
- invariant(direction < 0 || an->nextChild == childLoc);
- // parent exhausted also, keep going up
- childLoc = ancestor;
- ancestor = an->parent;
- }
-
- return DiskLoc();
-}
-
-template <class BtreeLayout>
-bool BtreeLogic<BtreeLayout>::keyIsUsed(OperationContext* opCtx,
- const DiskLoc& loc,
- const int& pos) const {
- return getKeyHeader(getBucket(opCtx, loc), pos).isUsed();
-}
-
-template <class BtreeLayout>
-bool BtreeLogic<BtreeLayout>::locate(OperationContext* opCtx,
- const BSONObj& key,
- const DiskLoc& recordLoc,
- const int direction,
- int* posOut,
- DiskLoc* bucketLocOut) const {
- // Clear out any data.
- *posOut = 0;
- *bucketLocOut = DiskLoc();
-
- bool found = false;
- KeyDataOwnedType owned(key);
-
- *bucketLocOut = _locate(opCtx, getRootLoc(opCtx), owned, posOut, &found, recordLoc, direction);
-
- skipUnusedKeys(opCtx, bucketLocOut, posOut, direction);
-
- return found;
-}
-
-/**
- * Recursively walk down the btree, looking for a match of key and recordLoc.
- * Caller should have acquired lock on bucketLoc.
- */
-template <class BtreeLayout>
-DiskLoc BtreeLogic<BtreeLayout>::_locate(OperationContext* opCtx,
- const DiskLoc& bucketLoc,
- const KeyDataType& key,
- int* posOut,
- bool* foundOut,
- const DiskLoc& recordLoc,
- const int direction) const {
- int position;
- BucketType* bucket = getBucket(opCtx, bucketLoc);
- // XXX: owned to not owned conversion(?)
- _find(opCtx, bucket, key, recordLoc, false, &position, foundOut).transitional_ignore();
-
- // Look in our current bucket.
- if (*foundOut) {
- *posOut = position;
- return bucketLoc;
- }
-
- // Not in our current bucket. 'position' tells us where there may be a child.
- DiskLoc childLoc = childLocForPos(bucket, position);
-
- if (!childLoc.isNull()) {
- DiskLoc inChild = _locate(opCtx, childLoc, key, posOut, foundOut, recordLoc, direction);
- if (!inChild.isNull()) {
- return inChild;
- }
- }
-
- *posOut = position;
-
- if (direction < 0) {
- // The key *would* go to our left.
- (*posOut)--;
- if (-1 == *posOut) {
- // But there's no space for that in our bucket.
- return DiskLoc();
- } else {
- return bucketLoc;
- }
- } else {
- // The key would go to our right...
- if (bucket->n == *posOut) {
- return DiskLoc();
- } else {
- // But only if there is space.
- return bucketLoc;
- }
- }
-}
-
-// TODO relcoate
-template <class BtreeLayout>
-bool BtreeLogic<BtreeLayout>::isHead(BucketType* bucket) {
- return bucket->parent.isNull();
-}
-
-template <class BtreeLayout>
-typename BtreeLogic<BtreeLayout>::BucketType* BtreeLogic<BtreeLayout>::getBucket(
- OperationContext* opCtx, const RecordId id) const {
- if (id.isNull()) {
- return NULL;
- }
-
- RecordData recordData = _recordStore->dataFor(opCtx, id);
-
- // we need to be working on the raw bytes, not a transient copy
- invariant(!recordData.isOwned());
-
- return reinterpret_cast<BucketType*>(const_cast<char*>(recordData.data()));
-}
-
-template <class BtreeLayout>
-typename BtreeLogic<BtreeLayout>::BucketType* BtreeLogic<BtreeLayout>::getRoot(
- OperationContext* opCtx) const {
- return getBucket(opCtx, _headManager->getHead(opCtx));
-}
-
-template <class BtreeLayout>
-DiskLoc BtreeLogic<BtreeLayout>::getRootLoc(OperationContext* opCtx) const {
- return DiskLoc::fromRecordId(_headManager->getHead(opCtx));
-}
-
-template <class BtreeLayout>
-typename BtreeLogic<BtreeLayout>::BucketType* BtreeLogic<BtreeLayout>::childForPos(
- OperationContext* opCtx, BucketType* bucket, int pos) const {
- DiskLoc loc = childLocForPos(bucket, pos);
- return getBucket(opCtx, loc);
-}
-
-template <class BtreeLayout>
-typename BtreeLogic<BtreeLayout>::LocType& BtreeLogic<BtreeLayout>::childLocForPos(
- BucketType* bucket, int pos) {
- if (bucket->n == pos) {
- return bucket->nextChild;
- } else {
- return getKeyHeader(bucket, pos).prevChildBucket;
- }
-}
-
-//
-// And, template stuff.
-//
-
-// V0 format.
-template struct FixedWidthKey<DiskLoc>;
-template class BtreeLogic<BtreeLayoutV0>;
-
-// V1 format.
-template struct FixedWidthKey<DiskLoc56Bit>;
-template class BtreeLogic<BtreeLayoutV1>;
-
-} // namespace mongo
diff --git a/src/mongo/db/storage/mmap_v1/btree/btree_logic.h b/src/mongo/db/storage/mmap_v1/btree/btree_logic.h
deleted file mode 100644
index 1f6f0645875..00000000000
--- a/src/mongo/db/storage/mmap_v1/btree/btree_logic.h
+++ /dev/null
@@ -1,587 +0,0 @@
-/**
- * Copyright (C) 2014 MongoDB Inc.
- *
- * This program is free software: you can redistribute it and/or modify
- * it under the terms of the GNU Affero General Public License, version 3,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU Affero General Public License for more details.
- *
- * You should have received a copy of the GNU Affero General Public License
- * along with this program. If not, see <http://www.gnu.org/licenses/>.
- *
- * As a special exception, the copyright holders give permission to link the
- * code of portions of this program with the OpenSSL library under certain
- * conditions as described in each individual source file and distribute
- * linked combinations including the program with the OpenSSL library. You
- * must comply with the GNU Affero General Public License in all respects for
- * all of the code used other than as permitted herein. If you modify file(s)
- * with this exception, you may extend this exception to your version of the
- * file(s), but you are not obligated to do so. If you do not wish to do so,
- * delete this exception statement from your version. If you delete this
- * exception statement from all source files in the program, then also delete
- * it in the license file.
- */
-
-#pragma once
-
-#include <string>
-
-#include "mongo/db/catalog/head_manager.h"
-#include "mongo/db/catalog/index_catalog_entry.h"
-#include "mongo/db/jsobj.h"
-#include "mongo/db/operation_context.h"
-#include "mongo/db/storage/index_entry_comparison.h"
-#include "mongo/db/storage/mmap_v1/btree/btree_ondisk.h"
-#include "mongo/db/storage/mmap_v1/btree/key.h"
-#include "mongo/db/storage/mmap_v1/diskloc.h"
-
-namespace mongo {
-
-class PseudoRandom;
-class RecordStore;
-class SavedCursorRegistry;
-
-// Used for unit-testing only
-template <class BtreeLayout>
-class BtreeLogicTestBase;
-template <class BtreeLayout>
-class ArtificialTreeBuilder;
-
-/**
- * This is the logic for manipulating the Btree. It is (mostly) independent of the on-disk
- * format.
- */
-template <class BtreeLayout>
-class BtreeLogic {
-public:
- // AKA _keyNode
- typedef typename BtreeLayout::FixedWidthKeyType KeyHeaderType;
-
- // AKA Key
- typedef typename BtreeLayout::KeyType KeyDataType;
-
- // AKA KeyOwned
- typedef typename BtreeLayout::KeyOwnedType KeyDataOwnedType;
-
- // AKA Loc
- typedef typename BtreeLayout::LocType LocType;
-
- // AKA BucketBasics or BtreeBucket, either one.
- typedef typename BtreeLayout::BucketType BucketType;
-
- /**
- * 'head' manages the catalog information.
- * 'store' allocates and frees buckets.
- * 'ordering' is meta-information we store in the catalog.
- * 'indexName' is a string identifying the index that we use to print errors with.
- */
- BtreeLogic(HeadManager* head,
- RecordStore* store,
- SavedCursorRegistry* cursors,
- const Ordering& ordering,
- const std::string& indexName,
- bool isUnique)
- : _headManager(head),
- _recordStore(store),
- _cursorRegistry(cursors),
- _ordering(ordering),
- _indexName(indexName),
- _isUnique(isUnique) {}
-
- //
- // Public-facing
- //
-
- class Builder {
- public:
- typedef typename BtreeLayout::KeyOwnedType KeyDataOwnedType;
- typedef typename BtreeLayout::KeyType KeyDataType;
-
- Status addKey(const BSONObj& key, const DiskLoc& loc);
-
- private:
- friend class BtreeLogic;
-
- class SetRightLeafLocChange;
-
- Builder(BtreeLogic* logic, OperationContext* opCtx, bool dupsAllowed);
-
- /**
- * Creates and returns a new empty bucket to the right of leftSib, maintaining the
- * internal consistency of the tree. leftSib must be the right-most child of its parent
- * or it must be the root.
- */
- DiskLoc newBucket(BucketType* leftSib, DiskLoc leftSibLoc);
-
- BucketType* _getModifiableBucket(DiskLoc loc);
- BucketType* _getBucket(DiskLoc loc);
-
- // Not owned.
- BtreeLogic* _logic;
-
- DiskLoc _rightLeafLoc; // DiskLoc of right-most (highest) leaf bucket.
- bool _dupsAllowed;
- std::unique_ptr<KeyDataOwnedType> _keyLast;
-
- // Not owned.
- OperationContext* _opCtx;
- };
-
- /**
- * Caller owns the returned pointer.
- * 'this' must outlive the returned pointer.
- */
- Builder* newBuilder(OperationContext* opCtx, bool dupsAllowed);
-
- Status dupKeyCheck(OperationContext* opCtx, const BSONObj& key, const DiskLoc& loc) const;
-
- Status insert(OperationContext* opCtx,
- const BSONObj& rawKey,
- const DiskLoc& value,
- bool dupsAllowed);
-
- /**
- * Navigates down the tree and locates the bucket and position containing a record with
- * the specified <key, recordLoc> combination.
- *
- * @return true if the exact <key, recordLoc> was found. Otherwise, false and the
- * bucketLocOut would contain the bucket containing key which is before or after the
- * searched one (dependent on the direction).
- */
- bool locate(OperationContext* opCtx,
- const BSONObj& key,
- const DiskLoc& recordLoc,
- const int direction,
- int* posOut,
- DiskLoc* bucketLocOut) const;
-
- void advance(OperationContext* opCtx,
- DiskLoc* bucketLocInOut,
- int* posInOut,
- int direction) const;
-
- bool exists(OperationContext* opCtx, const KeyDataType& key) const;
-
- bool unindex(OperationContext* opCtx, const BSONObj& key, const DiskLoc& recordLoc);
-
- bool isEmpty(OperationContext* opCtx) const;
-
- long long fullValidate(OperationContext*,
- long long* unusedCount,
- bool strict,
- bool dumpBuckets,
- unsigned depth) const;
-
- DiskLoc getDiskLoc(OperationContext* opCtx,
- const DiskLoc& bucketLoc,
- const int keyOffset) const;
-
- BSONObj getKey(OperationContext* opCtx, const DiskLoc& bucketLoc, const int keyOffset) const;
-
- /**
- * Returns a pseudo-random element from the tree. It is an error to call this method if the tree
- * is empty.
- */
- IndexKeyEntry getRandomEntry(OperationContext* opCtx) const;
-
- DiskLoc getHead(OperationContext* opCtx) const {
- return DiskLoc::fromRecordId(_headManager->getHead(opCtx));
- }
-
- Status touch(OperationContext* opCtx) const;
-
- //
- // Composite key navigation methods
- //
-
- void customLocate(OperationContext* opCtx,
- DiskLoc* locInOut,
- int* keyOfsInOut,
- const IndexSeekPoint& seekPoint,
- int direction) const;
-
- void advanceTo(OperationContext*,
- DiskLoc* thisLocInOut,
- int* keyOfsInOut,
- const IndexSeekPoint& seekPoint,
- int direction) const;
-
- void restorePosition(OperationContext* opCtx,
- const BSONObj& savedKey,
- const DiskLoc& savedLoc,
- int direction,
- DiskLoc* bucketInOut,
- int* keyOffsetInOut) const;
-
- //
- // Creation and deletion
- //
-
- /**
- * Returns OK if the index was uninitialized before, error status otherwise.
- */
- Status initAsEmpty(OperationContext* opCtx);
-
- //
- // Size constants
- //
-
- const RecordStore* getRecordStore() const {
- return _recordStore;
- }
-
- SavedCursorRegistry* savedCursors() const {
- return _cursorRegistry;
- }
-
- static int lowWaterMark();
-
- Ordering ordering() const {
- return _ordering;
- }
-
- int customBSONCmp(const BSONObj& inIndex_left,
- const IndexSeekPoint& seekPoint_right,
- int direction) const;
-
- bool isUnique() const {
- return _isUnique;
- }
-
-private:
- friend class BtreeLogic::Builder;
-
- // Used for unit-testing only
- friend class BtreeLogicTestBase<BtreeLayout>;
- friend class ArtificialTreeBuilder<BtreeLayout>;
-
- /**
- * This is an in memory wrapper for the variable length data associated with a
- * KeyHeaderType. It points to on-disk data but is not itself on-disk data.
- *
- * This object and its BSONObj 'key' will become invalid if the KeyHeaderType data that owns
- * this it is moved within the btree. In general, a KeyWrapper should not be expected to be
- * valid after a write.
- */
- struct FullKey {
- FullKey(const BucketType* bucket, int i)
- : header(getKeyHeader(bucket, i)),
- prevChildBucket(header.prevChildBucket),
- recordLoc(header.recordLoc),
- data(bucket->data + header.keyDataOfs()) {}
-
- // This is actually a reference to something on-disk.
- const KeyHeaderType& header;
-
- // These are actually in 'header'.
- const LocType& prevChildBucket;
- const LocType& recordLoc;
-
- // This is *not* memory-mapped but its members point to something on-disk.
- KeyDataType data;
- };
-
- //
- // Functions that depend on the templated type info but nothing in 'this'.
- //
-
- static LocType& childLocForPos(BucketType* bucket, int pos);
-
- static FullKey getFullKey(const BucketType* bucket, int i);
-
- static KeyHeaderType& getKeyHeader(BucketType* bucket, int i);
-
- static const KeyHeaderType& getKeyHeader(const BucketType* bucket, int i);
-
- static char* dataAt(BucketType* bucket, short ofs);
-
- static void markUnused(BucketType* bucket, int keypos);
-
- static int totalDataSize(BucketType* bucket);
-
- static void init(BucketType* bucket);
-
- static int _alloc(BucketType* bucket, int bytes);
-
- static void _unalloc(BucketType* bucket, int bytes);
-
- static void _delKeyAtPos(BucketType* bucket, int keypos, bool mayEmpty = false);
-
- static void popBack(BucketType* bucket, DiskLoc* recordLocOut, KeyDataType* keyDataOut);
-
- static bool mayDropKey(BucketType* bucket, int index, int refPos);
-
- static int _packedDataSize(BucketType* bucket, int refPos);
-
- static void setPacked(BucketType* bucket);
-
- static void setNotPacked(BucketType* bucket);
-
- static BucketType* btreemod(OperationContext* opCtx, BucketType* bucket);
-
- static int splitPos(BucketType* bucket, int keypos);
-
- static void reserveKeysFront(BucketType* bucket, int nAdd);
-
- static void setKey(BucketType* bucket,
- int i,
- const DiskLoc recordLoc,
- const KeyDataType& key,
- const DiskLoc prevChildBucket);
-
- static bool isHead(BucketType* bucket);
-
- static void dumpBucket(const BucketType* bucket, int indentLength = 0);
-
- static void assertValid(const std::string& ns,
- BucketType* bucket,
- const Ordering& ordering,
- bool force = false);
-
- //
- // 'this'-specific helpers (require record store, catalog information, or ordering, or type
- // information).
- //
-
- bool basicInsert(OperationContext* opCtx,
- BucketType* bucket,
- const DiskLoc bucketLoc,
- int& keypos,
- const KeyDataType& key,
- const DiskLoc recordLoc);
-
- void dropFront(BucketType* bucket, int nDrop, int& refpos);
-
- void _pack(OperationContext* opCtx, BucketType* bucket, const DiskLoc thisLoc, int& refPos);
-
- void customLocate(OperationContext* opCtx,
- DiskLoc* locInOut,
- int* keyOfsInOut,
- const IndexSeekPoint& seekPoint,
- int direction,
- std::pair<DiskLoc, int>& bestParent) const;
-
- Status _find(OperationContext* opCtx,
- BucketType* bucket,
- const KeyDataType& key,
- const DiskLoc& recordLoc,
- bool errorIfDup,
- int* keyPositionOut,
- bool* foundOut) const;
-
- bool customFind(OperationContext* opCtx,
- int low,
- int high,
- const IndexSeekPoint& seekPoint,
- int direction,
- DiskLoc* thisLocInOut,
- int* keyOfsInOut,
- std::pair<DiskLoc, int>& bestParent) const;
-
- void advanceToImpl(OperationContext* opCtx,
- DiskLoc* thisLocInOut,
- int* keyOfsInOut,
- const IndexSeekPoint& seekPoint,
- int direction) const;
-
- bool wouldCreateDup(OperationContext* opCtx, const KeyDataType& key, const DiskLoc self) const;
-
- bool keyIsUsed(OperationContext* opCtx, const DiskLoc& loc, const int& pos) const;
-
- void skipUnusedKeys(OperationContext* opCtx, DiskLoc* loc, int* pos, int direction) const;
-
- DiskLoc advance(OperationContext* opCtx,
- const DiskLoc& bucketLoc,
- int* posInOut,
- int direction) const;
-
- DiskLoc _locate(OperationContext* opCtx,
- const DiskLoc& bucketLoc,
- const KeyDataType& key,
- int* posOut,
- bool* foundOut,
- const DiskLoc& recordLoc,
- const int direction) const;
-
- long long _fullValidate(OperationContext* opCtx,
- const DiskLoc bucketLoc,
- long long* unusedCount,
- bool strict,
- bool dumpBuckets,
- unsigned depth) const;
-
- DiskLoc _addBucket(OperationContext* opCtx);
-
- bool canMergeChildren(OperationContext* opCtx,
- BucketType* bucket,
- const DiskLoc bucketLoc,
- const int leftIndex);
-
- // has to look in children of 'bucket' and requires record store
- int _rebalancedSeparatorPos(OperationContext* opCtx, BucketType* bucket, int leftIndex);
-
- void _packReadyForMod(BucketType* bucket, int& refPos);
-
- void truncateTo(BucketType* bucket, int N, int& refPos);
-
- void split(OperationContext* opCtx,
- BucketType* bucket,
- const DiskLoc bucketLoc,
- int keypos,
- const DiskLoc recordLoc,
- const KeyDataType& key,
- const DiskLoc lchild,
- const DiskLoc rchild);
-
- Status _insert(OperationContext* opCtx,
- BucketType* bucket,
- const DiskLoc bucketLoc,
- const KeyDataType& key,
- const DiskLoc recordLoc,
- bool dupsAllowed,
- const DiskLoc leftChild,
- const DiskLoc rightChild);
-
- // TODO take a BucketType*?
- void insertHere(OperationContext* opCtx,
- const DiskLoc bucketLoc,
- int pos,
- const KeyDataType& key,
- const DiskLoc recordLoc,
- const DiskLoc leftChild,
- const DiskLoc rightChild);
-
- std::string dupKeyError(const KeyDataType& key) const;
-
- void setInternalKey(OperationContext* opCtx,
- BucketType* bucket,
- const DiskLoc bucketLoc,
- int keypos,
- const DiskLoc recordLoc,
- const KeyDataType& key,
- const DiskLoc lchild,
- const DiskLoc rchild);
-
- void fixParentPtrs(OperationContext* trans,
- BucketType* bucket,
- const DiskLoc bucketLoc,
- int firstIndex = 0,
- int lastIndex = -1);
-
- bool mayBalanceWithNeighbors(OperationContext* opCtx,
- BucketType* bucket,
- const DiskLoc bucketLoc);
-
- void doBalanceChildren(OperationContext* opCtx,
- BucketType* bucket,
- const DiskLoc bucketLoc,
- int leftIndex);
-
- void doBalanceLeftToRight(OperationContext* opCtx,
- BucketType* bucket,
- const DiskLoc thisLoc,
- int leftIndex,
- int split,
- BucketType* l,
- const DiskLoc lchild,
- BucketType* r,
- const DiskLoc rchild);
-
- void doBalanceRightToLeft(OperationContext* opCtx,
- BucketType* bucket,
- const DiskLoc thisLoc,
- int leftIndex,
- int split,
- BucketType* l,
- const DiskLoc lchild,
- BucketType* r,
- const DiskLoc rchild);
-
- bool tryBalanceChildren(OperationContext* opCtx,
- BucketType* bucket,
- const DiskLoc bucketLoc,
- int leftIndex);
-
- int indexInParent(OperationContext* opCtx, BucketType* bucket, const DiskLoc bucketLoc) const;
-
- void doMergeChildren(OperationContext* opCtx,
- BucketType* bucket,
- const DiskLoc bucketLoc,
- int leftIndex);
-
- void replaceWithNextChild(OperationContext* opCtx, BucketType* bucket, const DiskLoc bucketLoc);
-
- void deleteInternalKey(OperationContext* opCtx,
- BucketType* bucket,
- const DiskLoc bucketLoc,
- int keypos);
-
- void delKeyAtPos(OperationContext* opCtx, BucketType* bucket, const DiskLoc bucketLoc, int p);
-
- void delBucket(OperationContext* opCtx, BucketType* bucket, const DiskLoc bucketLoc);
-
- void deallocBucket(OperationContext* opCtx, BucketType* bucket, const DiskLoc bucketLoc);
-
- bool _keyIsAt(const BSONObj& savedKey,
- const DiskLoc& savedLoc,
- BucketType* bucket,
- int keyPos) const;
-
- /**
- * Tries to push key into bucket. Return false if it can't because key doesn't fit.
- *
- * bucket must be declared as writable by the caller.
- * The new key/recordLoc pair must be higher than any others in bucket.
- *
- * TODO needs 'this' for _ordering for sanity check
- */
- bool pushBack(BucketType* bucket,
- const DiskLoc recordLoc,
- const KeyDataType& key,
- const DiskLoc prevChild);
-
-
- BucketType* childForPos(OperationContext* opCtx, BucketType* bucket, int pos) const;
-
- BucketType* getBucket(OperationContext* opCtx, const DiskLoc dl) const {
- return getBucket(opCtx, dl.toRecordId());
- }
- BucketType* getBucket(OperationContext* opCtx, const RecordId dl) const;
-
- BucketType* getRoot(OperationContext* opCtx) const;
-
- DiskLoc getRootLoc(OperationContext* opCtx) const;
-
- void recordRandomWalk(OperationContext* opCtx,
- PseudoRandom* prng,
- BucketType* curBucket,
- int64_t nBucketsInCurrentLevel,
- std::vector<int64_t>* nKeysInLevel,
- std::vector<FullKey>* selectedKeys) const;
-
- //
- // Data
- //
-
- // Not owned here.
- HeadManager* _headManager;
-
- // Not owned here.
- RecordStore* _recordStore;
-
- // Not owned Here.
- SavedCursorRegistry* _cursorRegistry;
-
- Ordering _ordering;
-
- std::string _indexName;
-
- // True if this is a unique index, i.e. if duplicate key values are disallowed.
- const bool _isUnique;
-};
-
-} // namespace mongo
diff --git a/src/mongo/db/storage/mmap_v1/btree/btree_logic_test.cpp b/src/mongo/db/storage/mmap_v1/btree/btree_logic_test.cpp
deleted file mode 100644
index b3667b14e40..00000000000
--- a/src/mongo/db/storage/mmap_v1/btree/btree_logic_test.cpp
+++ /dev/null
@@ -1,2500 +0,0 @@
-// btree_logic_test.cpp : Btree unit tests
-//
-
-/**
- * Copyright (C) 2014 MongoDB
- *
- * This program is free software: you can redistribute it and/or modify
- * it under the terms of the GNU Affero General Public License, version 3,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU Affero General Public License for more details.
- *
- * You should have received a copy of the GNU Affero General Public License
- * along with this program. If not, see <http://www.gnu.org/licenses/>.
- *
- * As a special exception, the copyright holders give permission to link the
- * code of portions of this program with the OpenSSL library under certain
- * conditions as described in each individual source file and distribute
- * linked combinations including the program with the OpenSSL library. You
- * must comply with the GNU Affero General Public License in all respects
- * for all of the code used other than as permitted herein. If you modify
- * file(s) with this exception, you may extend this exception to your
- * version of the file(s), but you are not obligated to do so. If you do not
- * wish to do so, delete this exception statement from your version. If you
- * delete this exception statement from all source files in the program,
- * then also delete it in the license file.
- */
-
-// This file contains simple single-threaded tests, which check various aspects of the Btree logic
-//
-
-#define MONGO_LOG_DEFAULT_COMPONENT ::mongo::logger::LogComponent::kStorage
-
-#include "mongo/db/operation_context_noop.h"
-#include "mongo/db/storage/mmap_v1/btree/btree_test_help.h"
-#include "mongo/unittest/unittest.h"
-#include "mongo/util/log.h"
-
-
-namespace mongo {
-
-using std::string;
-
-/**
- * This class is made friend of BtreeLogic so we can add whatever private method accesses we
- * need to it, to be used by the tests.
- */
-template <class BtreeLayoutType>
-class BtreeLogicTestBase {
-public:
- typedef typename BtreeLayoutType::BucketType BucketType;
- typedef typename BtreeLayoutType::FixedWidthKeyType FixedWidthKeyType;
-
- typedef typename BtreeLogic<BtreeLayoutType>::FullKey FullKey;
- typedef typename BtreeLogic<BtreeLayoutType>::KeyDataOwnedType KeyDataOwnedType;
-
- BtreeLogicTestBase() : _helper(BSON("TheKey" << 1)) {}
-
- virtual ~BtreeLogicTestBase() {}
-
-protected:
- void checkValidNumKeys(int nKeys) {
- OperationContextNoop opCtx;
- ASSERT_EQUALS(nKeys, _helper.btree.fullValidate(&opCtx, NULL, true, false, 0));
- }
-
- Status insert(const BSONObj& key, const DiskLoc dl, bool dupsAllowed = true) {
- OperationContextNoop opCtx;
- return _helper.btree.insert(&opCtx, key, dl, dupsAllowed);
- }
-
- bool unindex(const BSONObj& key) {
- OperationContextNoop opCtx;
- return _helper.btree.unindex(&opCtx, key, _helper.dummyDiskLoc);
- }
-
- void locate(const BSONObj& key,
- int expectedPos,
- bool expectedFound,
- const RecordId& expectedLocation,
- int direction) {
- return locate(
- key, expectedPos, expectedFound, DiskLoc::fromRecordId(expectedLocation), direction);
- }
- void locate(const BSONObj& key,
- int expectedPos,
- bool expectedFound,
- const DiskLoc& expectedLocation,
- int direction) {
- int pos;
- DiskLoc loc;
- OperationContextNoop opCtx;
- ASSERT_EQUALS(
- expectedFound,
- _helper.btree.locate(&opCtx, key, _helper.dummyDiskLoc, direction, &pos, &loc));
- ASSERT_EQUALS(expectedLocation, loc);
- ASSERT_EQUALS(expectedPos, pos);
- }
-
- const BucketType* child(const BucketType* bucket, int i) const {
- verify(i <= bucket->n);
-
- DiskLoc diskLoc;
- if (i == bucket->n) {
- diskLoc = bucket->nextChild;
- } else {
- FullKey fullKey = BtreeLogic<BtreeLayoutType>::getFullKey(bucket, i);
- diskLoc = fullKey.prevChildBucket;
- }
-
- verify(!diskLoc.isNull());
-
- return _helper.btree.getBucket(NULL, diskLoc);
- }
-
- BucketType* head() const {
- OperationContextNoop opCtx;
- return _helper.btree.getBucket(&opCtx, _helper.headManager.getHead(&opCtx));
- }
-
- void forcePackBucket(const RecordId bucketLoc) {
- BucketType* bucket = _helper.btree.getBucket(NULL, bucketLoc);
-
- bucket->topSize += bucket->emptySize;
- bucket->emptySize = 0;
- BtreeLogic<BtreeLayoutType>::setNotPacked(bucket);
- }
-
- void truncateBucket(BucketType* bucket, int N, int& refPos) {
- _helper.btree.truncateTo(bucket, N, refPos);
- }
-
- int bucketPackedDataSize(BucketType* bucket, int refPos) {
- return _helper.btree._packedDataSize(bucket, refPos);
- }
-
- int bucketRebalancedSeparatorPos(const RecordId bucketLoc, int leftIndex) {
- BucketType* bucket = _helper.btree.getBucket(NULL, bucketLoc);
- OperationContextNoop opCtx;
- return _helper.btree._rebalancedSeparatorPos(&opCtx, bucket, leftIndex);
- }
-
- FullKey getKey(const RecordId bucketLoc, int pos) const {
- const BucketType* bucket = _helper.btree.getBucket(NULL, bucketLoc);
- return BtreeLogic<BtreeLayoutType>::getFullKey(bucket, pos);
- }
-
- void markKeyUnused(const DiskLoc bucketLoc, int keyPos) {
- BucketType* bucket = _helper.btree.getBucket(NULL, bucketLoc);
- invariant(keyPos >= 0 && keyPos < bucket->n);
-
- _helper.btree.getKeyHeader(bucket, keyPos).setUnused();
- }
-
- DiskLoc newBucket() {
- OperationContextNoop opCtx;
- return _helper.btree._addBucket(&opCtx);
- }
-
- /**
- * Sets the nextChild pointer for the bucket at the specified location.
- */
- void setBucketNextChild(const DiskLoc bucketLoc, const DiskLoc nextChild) {
- OperationContextNoop opCtx;
-
- BucketType* bucket = _helper.btree.getBucket(&opCtx, bucketLoc);
- bucket->nextChild = nextChild;
-
- _helper.btree.fixParentPtrs(&opCtx, bucket, bucketLoc);
- }
-
-protected:
- BtreeLogicTestHelper<BtreeLayoutType> _helper;
-};
-
-//
-// TESTS
-//
-
-template <class OnDiskFormat>
-class SimpleCreate : public BtreeLogicTestBase<OnDiskFormat> {
-public:
- void run() {
- OperationContextNoop opCtx;
- this->_helper.btree.initAsEmpty(&opCtx).transitional_ignore();
-
- this->checkValidNumKeys(0);
- }
-};
-
-template <class OnDiskFormat>
-class SimpleInsertDelete : public BtreeLogicTestBase<OnDiskFormat> {
-public:
- void run() {
- OperationContextNoop opCtx;
- this->_helper.btree.initAsEmpty(&opCtx).transitional_ignore();
-
- BSONObj key = simpleKey('z');
- this->insert(key, this->_helper.dummyDiskLoc).transitional_ignore();
-
- this->checkValidNumKeys(1);
- this->locate(key, 0, true, this->_helper.headManager.getHead(&opCtx), 1);
-
- this->unindex(key);
-
- this->checkValidNumKeys(0);
- this->locate(key, 0, false, DiskLoc(), 1);
- }
-};
-
-template <class OnDiskFormat>
-class SplitUnevenBucketBase : public BtreeLogicTestBase<OnDiskFormat> {
-public:
- void run() {
- OperationContextNoop opCtx;
- this->_helper.btree.initAsEmpty(&opCtx).transitional_ignore();
-
- for (int i = 0; i < 10; ++i) {
- BSONObj shortKey = simpleKey(shortToken(i), 1);
- this->insert(shortKey, this->_helper.dummyDiskLoc).transitional_ignore();
-
- BSONObj longKey = simpleKey(longToken(i), 800);
- this->insert(longKey, this->_helper.dummyDiskLoc).transitional_ignore();
- }
-
- this->checkValidNumKeys(20);
- ASSERT_EQUALS(1, this->head()->n);
- checkSplit();
- }
-
-protected:
- virtual char shortToken(int i) const = 0;
- virtual char longToken(int i) const = 0;
- virtual void checkSplit() = 0;
-
- static char leftToken(int i) {
- return 'a' + i;
- }
-
- static char rightToken(int i) {
- return 'z' - i;
- }
-};
-
-template <class OnDiskFormat>
-class SplitRightHeavyBucket : public SplitUnevenBucketBase<OnDiskFormat> {
-private:
- virtual char shortToken(int i) const {
- return this->leftToken(i);
- }
- virtual char longToken(int i) const {
- return this->rightToken(i);
- }
- virtual void checkSplit() {
- ASSERT_EQUALS(15, this->child(this->head(), 0)->n);
- ASSERT_EQUALS(4, this->child(this->head(), 1)->n);
- }
-};
-
-template <class OnDiskFormat>
-class SplitLeftHeavyBucket : public SplitUnevenBucketBase<OnDiskFormat> {
-private:
- virtual char shortToken(int i) const {
- return this->rightToken(i);
- }
- virtual char longToken(int i) const {
- return this->leftToken(i);
- }
- virtual void checkSplit() {
- ASSERT_EQUALS(4, this->child(this->head(), 0)->n);
- ASSERT_EQUALS(15, this->child(this->head(), 1)->n);
- }
-};
-
-template <class OnDiskFormat>
-class MissingLocate : public BtreeLogicTestBase<OnDiskFormat> {
-public:
- void run() {
- OperationContextNoop opCtx;
- this->_helper.btree.initAsEmpty(&opCtx).transitional_ignore();
-
- for (int i = 0; i < 3; ++i) {
- BSONObj k = simpleKey('b' + 2 * i);
- this->insert(k, this->_helper.dummyDiskLoc).transitional_ignore();
- }
-
- locateExtended(1, 'a', 'b', this->_helper.headManager.getHead(&opCtx));
- locateExtended(1, 'c', 'd', this->_helper.headManager.getHead(&opCtx));
- locateExtended(1, 'e', 'f', this->_helper.headManager.getHead(&opCtx));
- locateExtended(1, 'g', 'g' + 1, RecordId()); // of course, 'h' isn't in the index.
-
- // old behavior
- // locateExtended( -1, 'a', 'b', dl() );
- // locateExtended( -1, 'c', 'd', dl() );
- // locateExtended( -1, 'e', 'f', dl() );
- // locateExtended( -1, 'g', 'f', dl() );
-
- locateExtended(-1, 'a', 'a' - 1, RecordId()); // of course, 'a' - 1 isn't in the index
- locateExtended(-1, 'c', 'b', this->_helper.headManager.getHead(&opCtx));
- locateExtended(-1, 'e', 'd', this->_helper.headManager.getHead(&opCtx));
- locateExtended(-1, 'g', 'f', this->_helper.headManager.getHead(&opCtx));
- }
-
-private:
- void locateExtended(int direction, char token, char expectedMatch, RecordId expectedLocation) {
- const BSONObj k = simpleKey(token);
- int expectedPos = (expectedMatch - 'b') / 2;
-
- this->locate(k, expectedPos, false, expectedLocation, direction);
- }
-};
-
-template <class OnDiskFormat>
-class MissingLocateMultiBucket : public BtreeLogicTestBase<OnDiskFormat> {
-public:
- void run() {
- OperationContextNoop opCtx;
- this->_helper.btree.initAsEmpty(&opCtx).transitional_ignore();
-
- this->insert(simpleKey('A', 800), this->_helper.dummyDiskLoc).transitional_ignore();
- this->insert(simpleKey('B', 800), this->_helper.dummyDiskLoc).transitional_ignore();
- this->insert(simpleKey('C', 800), this->_helper.dummyDiskLoc).transitional_ignore();
- this->insert(simpleKey('D', 800), this->_helper.dummyDiskLoc).transitional_ignore();
- this->insert(simpleKey('E', 800), this->_helper.dummyDiskLoc).transitional_ignore();
- this->insert(simpleKey('F', 800), this->_helper.dummyDiskLoc).transitional_ignore();
- this->insert(simpleKey('G', 800), this->_helper.dummyDiskLoc).transitional_ignore();
- this->insert(simpleKey('H', 800), this->_helper.dummyDiskLoc).transitional_ignore();
- this->insert(simpleKey('J', 800), this->_helper.dummyDiskLoc).transitional_ignore();
-
- // This causes split
- this->insert(simpleKey('I', 800), this->_helper.dummyDiskLoc).transitional_ignore();
-
- int pos;
- DiskLoc loc;
-
- // 'E' is the split point and should be in the head the rest should be ~50/50
- const BSONObj splitPoint = simpleKey('E', 800);
- this->_helper.btree.locate(&opCtx, splitPoint, this->_helper.dummyDiskLoc, 1, &pos, &loc);
- ASSERT_EQUALS(this->_helper.headManager.getHead(&opCtx), loc.toRecordId());
- ASSERT_EQUALS(0, pos);
-
- // Find the one before 'E'
- int largePos;
- DiskLoc largeLoc;
- this->_helper.btree.locate(
- &opCtx, splitPoint, this->_helper.dummyDiskLoc, 1, &largePos, &largeLoc);
- this->_helper.btree.advance(&opCtx, &largeLoc, &largePos, -1);
-
- // Find the one after 'E'
- int smallPos;
- DiskLoc smallLoc;
- this->_helper.btree.locate(
- &opCtx, splitPoint, this->_helper.dummyDiskLoc, 1, &smallPos, &smallLoc);
- this->_helper.btree.advance(&opCtx, &smallLoc, &smallPos, 1);
-
- ASSERT_NOT_EQUALS(smallLoc, largeLoc);
- ASSERT_NOT_EQUALS(smallLoc, loc);
- ASSERT_NOT_EQUALS(largeLoc, loc);
- }
-};
-
-/**
- * Validates that adding keys incrementally produces buckets, which are 90%/10% full.
- */
-template <class OnDiskFormat>
-class SERVER983 : public BtreeLogicTestBase<OnDiskFormat> {
-public:
- void run() {
- OperationContextNoop opCtx;
- this->_helper.btree.initAsEmpty(&opCtx).transitional_ignore();
-
- this->insert(simpleKey('A', 800), this->_helper.dummyDiskLoc).transitional_ignore();
- this->insert(simpleKey('B', 800), this->_helper.dummyDiskLoc).transitional_ignore();
- this->insert(simpleKey('C', 800), this->_helper.dummyDiskLoc).transitional_ignore();
- this->insert(simpleKey('D', 800), this->_helper.dummyDiskLoc).transitional_ignore();
- this->insert(simpleKey('E', 800), this->_helper.dummyDiskLoc).transitional_ignore();
- this->insert(simpleKey('F', 800), this->_helper.dummyDiskLoc).transitional_ignore();
- this->insert(simpleKey('G', 800), this->_helper.dummyDiskLoc).transitional_ignore();
- this->insert(simpleKey('H', 800), this->_helper.dummyDiskLoc).transitional_ignore();
- this->insert(simpleKey('I', 800), this->_helper.dummyDiskLoc).transitional_ignore();
-
- // This will cause split
- this->insert(simpleKey('J', 800), this->_helper.dummyDiskLoc).transitional_ignore();
-
- int pos;
- DiskLoc loc;
-
- // 'H' is the maximum 'large' interval key, 90% should be < 'H' and 10% larger
- const BSONObj splitPoint = simpleKey('H', 800);
- this->_helper.btree.locate(&opCtx, splitPoint, this->_helper.dummyDiskLoc, 1, &pos, &loc);
- ASSERT_EQUALS(this->_helper.headManager.getHead(&opCtx), loc.toRecordId());
- ASSERT_EQUALS(0, pos);
-
- // Find the one before 'H'
- int largePos;
- DiskLoc largeLoc;
- this->_helper.btree.locate(
- &opCtx, splitPoint, this->_helper.dummyDiskLoc, 1, &largePos, &largeLoc);
- this->_helper.btree.advance(&opCtx, &largeLoc, &largePos, -1);
-
- // Find the one after 'H'
- int smallPos;
- DiskLoc smallLoc;
- this->_helper.btree.locate(
- &opCtx, splitPoint, this->_helper.dummyDiskLoc, 1, &smallPos, &smallLoc);
- this->_helper.btree.advance(&opCtx, &smallLoc, &smallPos, 1);
-
- ASSERT_NOT_EQUALS(smallLoc, largeLoc);
- ASSERT_NOT_EQUALS(smallLoc, loc);
- ASSERT_NOT_EQUALS(largeLoc, loc);
- }
-};
-
-template <class OnDiskFormat>
-class DontReuseUnused : public BtreeLogicTestBase<OnDiskFormat> {
-public:
- void run() {
- OperationContextNoop opCtx;
- this->_helper.btree.initAsEmpty(&opCtx).transitional_ignore();
-
- for (int i = 0; i < 10; ++i) {
- const BSONObj k = simpleKey('b' + 2 * i, 800);
- this->insert(k, this->_helper.dummyDiskLoc).transitional_ignore();
- }
-
- const BSONObj root = simpleKey('p', 800);
- this->unindex(root);
-
- this->insert(root, this->_helper.dummyDiskLoc).transitional_ignore();
- this->locate(root, 0, true, this->head()->nextChild, 1);
- }
-};
-
-template <class OnDiskFormat>
-class MergeBucketsTestBase : public BtreeLogicTestBase<OnDiskFormat> {
-public:
- void run() {
- OperationContextNoop opCtx;
- this->_helper.btree.initAsEmpty(&opCtx).transitional_ignore();
-
- for (int i = 0; i < 10; ++i) {
- const BSONObj k = simpleKey('b' + 2 * i, 800);
- this->insert(k, this->_helper.dummyDiskLoc).transitional_ignore();
- }
-
- // numRecords() - 1, because this->_helper.dummyDiskLoc is actually in the record store too
- ASSERT_EQUALS(3, this->_helper.recordStore.numRecords(NULL) - 1);
-
- long long expectedCount = 10 - unindexKeys();
- ASSERT_EQUALS(1, this->_helper.recordStore.numRecords(NULL) - 1);
-
- long long unusedCount = 0;
- ASSERT_EQUALS(expectedCount,
- this->_helper.btree.fullValidate(&opCtx, &unusedCount, true, false, 0));
- ASSERT_EQUALS(0, unusedCount);
- }
-
-protected:
- virtual int unindexKeys() = 0;
-};
-
-template <class OnDiskFormat>
-class MergeBucketsLeft : public MergeBucketsTestBase<OnDiskFormat> {
- virtual int unindexKeys() {
- BSONObj k = simpleKey('b', 800);
- this->unindex(k);
-
- k = simpleKey('b' + 2, 800);
- this->unindex(k);
-
- k = simpleKey('b' + 4, 800);
- this->unindex(k);
-
- k = simpleKey('b' + 6, 800);
- this->unindex(k);
-
- return 4;
- }
-};
-
-template <class OnDiskFormat>
-class MergeBucketsRight : public MergeBucketsTestBase<OnDiskFormat> {
- virtual int unindexKeys() {
- const BSONObj k = simpleKey('b' + 2 * 9, 800);
- this->unindex(k);
- return 1;
- }
-};
-
-template <class OnDiskFormat>
-class MergeBucketsDontReplaceHead : public BtreeLogicTestBase<OnDiskFormat> {
-public:
- void run() {
- OperationContextNoop opCtx;
- this->_helper.btree.initAsEmpty(&opCtx).transitional_ignore();
-
- for (int i = 0; i < 18; ++i) {
- const BSONObj k = simpleKey('a' + i, 800);
- this->insert(k, this->_helper.dummyDiskLoc).transitional_ignore();
- }
-
- // numRecords(NULL) - 1, because fixedDiskLoc is actually in the record store too
- ASSERT_EQUALS(4, this->_helper.recordStore.numRecords(NULL) - 1);
-
- const BSONObj k = simpleKey('a' + 17, 800);
- this->unindex(k);
- ASSERT_EQUALS(3, this->_helper.recordStore.numRecords(NULL) - 1);
-
- long long unusedCount = 0;
- ASSERT_EQUALS(17, this->_helper.btree.fullValidate(&opCtx, &unusedCount, true, false, 0));
- ASSERT_EQUALS(0, unusedCount);
- }
-};
-
-template <class OnDiskFormat>
-class MergeBucketsDelInternal : public BtreeLogicTestBase<OnDiskFormat> {
-public:
- void run() {
- OperationContextNoop opCtx;
- ArtificialTreeBuilder<OnDiskFormat> builder(&opCtx, &this->_helper);
-
- builder.makeTree("{d:{b:{a:null},bb:null,_:{c:null}},_:{f:{e:null},_:{g:null}}}");
- ASSERT_EQUALS(8, this->_helper.btree.fullValidate(&opCtx, NULL, true, false, 0));
-
- // The tree has 7 buckets + 1 for the this->_helper.dummyDiskLoc
- ASSERT_EQUALS(8, this->_helper.recordStore.numRecords(NULL));
-
- const BSONObj k = BSON(""
- << "bb");
- verify(this->unindex(k));
-
- ASSERT_EQUALS(7, this->_helper.btree.fullValidate(&opCtx, NULL, true, false, 0));
-
- // The tree has 5 buckets + 1 for the this->_helper.dummyDiskLoc
- ASSERT_EQUALS(6, this->_helper.recordStore.numRecords(NULL));
-
- builder.checkStructure("{b:{a:null},d:{c:null},f:{e:null},_:{g:null}}");
- }
-};
-
-template <class OnDiskFormat>
-class MergeBucketsRightNull : public BtreeLogicTestBase<OnDiskFormat> {
-public:
- void run() {
- OperationContextNoop opCtx;
- ArtificialTreeBuilder<OnDiskFormat> builder(&opCtx, &this->_helper);
-
- builder.makeTree("{d:{b:{a:null},bb:null,cc:{c:null}},_:{f:{e:null},h:{g:null}}}");
- ASSERT_EQUALS(10, this->_helper.btree.fullValidate(&opCtx, NULL, true, false, 0));
-
- // The tree has 7 buckets + 1 for the this->_helper.dummyDiskLoc
- ASSERT_EQUALS(8, this->_helper.recordStore.numRecords(NULL));
-
- const BSONObj k = BSON(""
- << "bb");
- verify(this->unindex(k));
-
- ASSERT_EQUALS(9, this->_helper.btree.fullValidate(&opCtx, NULL, true, false, 0));
-
- // The tree has 5 buckets + 1 for the this->_helper.dummyDiskLoc
- ASSERT_EQUALS(6, this->_helper.recordStore.numRecords(NULL));
-
- builder.checkStructure("{b:{a:null},cc:{c:null},d:null,f:{e:null},h:{g:null}}");
- }
-};
-
-// This comment was here during porting, not sure what it means:
-//
-// "Not yet handling this case"
-template <class OnDiskFormat>
-class DontMergeSingleBucket : public BtreeLogicTestBase<OnDiskFormat> {
-public:
- void run() {
- OperationContextNoop opCtx;
- ArtificialTreeBuilder<OnDiskFormat> builder(&opCtx, &this->_helper);
-
- builder.makeTree("{d:{b:{a:null},c:null}}");
-
- ASSERT_EQUALS(4, this->_helper.btree.fullValidate(&opCtx, NULL, true, false, 0));
-
- // The tree has 3 buckets + 1 for the this->_helper.dummyDiskLoc
- ASSERT_EQUALS(4, this->_helper.recordStore.numRecords(NULL));
-
- const BSONObj k = BSON(""
- << "c");
- verify(this->unindex(k));
-
- ASSERT_EQUALS(3, this->_helper.btree.fullValidate(&opCtx, NULL, true, false, 0));
-
- // The tree has 3 buckets + 1 for the this->_helper.dummyDiskLoc
- ASSERT_EQUALS(4, this->_helper.recordStore.numRecords(NULL));
-
- builder.checkStructure("{d:{b:{a:null}}}");
- }
-};
-
-template <class OnDiskFormat>
-class ParentMergeNonRightToLeft : public BtreeLogicTestBase<OnDiskFormat> {
-public:
- void run() {
- OperationContextNoop opCtx;
- ArtificialTreeBuilder<OnDiskFormat> builder(&opCtx, &this->_helper);
-
- builder.makeTree("{d:{b:{a:null},bb:null,cc:{c:null}},i:{f:{e:null},h:{g:null}}}");
-
- ASSERT_EQUALS(11, this->_helper.btree.fullValidate(&opCtx, NULL, true, false, 0));
-
- // The tree has 7 buckets + 1 for the this->_helper.dummyDiskLoc
- ASSERT_EQUALS(8, this->_helper.recordStore.numRecords(NULL));
-
- const BSONObj k = BSON(""
- << "bb");
- verify(this->unindex(k));
-
- ASSERT_EQUALS(10, this->_helper.btree.fullValidate(&opCtx, NULL, true, false, 0));
-
- // Child does not currently replace parent in this case. Also, the tree
- // has 6 buckets + 1 for the this->_helper.dummyDiskLoc.
- ASSERT_EQUALS(7, this->_helper.recordStore.numRecords(NULL));
-
- builder.checkStructure("{i:{b:{a:null},cc:{c:null},d:null,f:{e:null},h:{g:null}}}");
- }
-};
-
-template <class OnDiskFormat>
-class ParentMergeNonRightToRight : public BtreeLogicTestBase<OnDiskFormat> {
-public:
- void run() {
- OperationContextNoop opCtx;
- ArtificialTreeBuilder<OnDiskFormat> builder(&opCtx, &this->_helper);
-
- builder.makeTree("{d:{b:{a:null},cc:{c:null}},i:{f:{e:null},ff:null,h:{g:null}}}");
-
- ASSERT_EQUALS(11, this->_helper.btree.fullValidate(&opCtx, NULL, true, false, 0));
-
- // The tree has 7 buckets + 1 for the this->_helper.dummyDiskLoc
- ASSERT_EQUALS(8, this->_helper.recordStore.numRecords(NULL));
-
- const BSONObj k = BSON(""
- << "ff");
- verify(this->unindex(k));
-
- ASSERT_EQUALS(10, this->_helper.btree.fullValidate(&opCtx, NULL, true, false, 0));
-
- // Child does not currently replace parent in this case. Also, the tree
- // has 6 buckets + 1 for the this->_helper.dummyDiskLoc.
- ASSERT_EQUALS(7, this->_helper.recordStore.numRecords(NULL));
-
- builder.checkStructure("{i:{b:{a:null},cc:{c:null},d:null,f:{e:null},h:{g:null}}}");
- }
-};
-
-template <class OnDiskFormat>
-class CantMergeRightNoMerge : public BtreeLogicTestBase<OnDiskFormat> {
-public:
- void run() {
- OperationContextNoop opCtx;
- ArtificialTreeBuilder<OnDiskFormat> builder(&opCtx, &this->_helper);
-
- builder.makeTree(
- "{d:{b:{a:null},bb:null,cc:{c:null}},"
- "dd:null,"
- "_:{f:{e:null},h:{g:null}}}");
-
- ASSERT_EQUALS(11, this->_helper.btree.fullValidate(&opCtx, NULL, true, false, 0));
-
- // The tree has 7 buckets + 1 for the this->_helper.dummyDiskLoc
- ASSERT_EQUALS(8, this->_helper.recordStore.numRecords(NULL));
-
- const BSONObj k = BSON(""
- << "bb");
- verify(this->unindex(k));
-
- ASSERT_EQUALS(10, this->_helper.btree.fullValidate(&opCtx, NULL, true, false, 0));
-
- // The tree has 7 buckets + 1 for the this->_helper.dummyDiskLoc
- ASSERT_EQUALS(8, this->_helper.recordStore.numRecords(NULL));
-
- builder.checkStructure(
- "{d:{b:{a:null},cc:{c:null}},"
- "dd:null,"
- "_:{f:{e:null},h:{g:null}}}");
- }
-};
-
-template <class OnDiskFormat>
-class CantMergeLeftNoMerge : public BtreeLogicTestBase<OnDiskFormat> {
-public:
- void run() {
- OperationContextNoop opCtx;
- ArtificialTreeBuilder<OnDiskFormat> builder(&opCtx, &this->_helper);
-
- builder.makeTree("{c:{b:{a:null}},d:null,_:{f:{e:null},g:null}}");
-
- ASSERT_EQUALS(7, this->_helper.btree.fullValidate(&opCtx, NULL, true, false, 0));
-
- // The tree has 5 buckets + 1 for the this->_helper.dummyDiskLoc
- ASSERT_EQUALS(6, this->_helper.recordStore.numRecords(NULL));
-
- const BSONObj k = BSON(""
- << "g");
- verify(this->unindex(k));
-
- ASSERT_EQUALS(6, this->_helper.btree.fullValidate(&opCtx, NULL, true, false, 0));
-
- // The tree has 5 buckets + 1 for the this->_helper.dummyDiskLoc
- ASSERT_EQUALS(6, this->_helper.recordStore.numRecords(NULL));
-
- builder.checkStructure("{c:{b:{a:null}},d:null,_:{f:{e:null}}}");
- }
-};
-
-template <class OnDiskFormat>
-class MergeOption : public BtreeLogicTestBase<OnDiskFormat> {
-public:
- void run() {
- OperationContextNoop opCtx;
- ArtificialTreeBuilder<OnDiskFormat> builder(&opCtx, &this->_helper);
-
- builder.makeTree("{c:{b:{a:null}},f:{e:{d:null},ee:null},_:{h:{g:null}}}");
-
- ASSERT_EQUALS(9, this->_helper.btree.fullValidate(&opCtx, NULL, true, false, 0));
-
- // The tree has 7 buckets + 1 for the this->_helper.dummyDiskLoc
- ASSERT_EQUALS(8, this->_helper.recordStore.numRecords(NULL));
-
- const BSONObj k = BSON(""
- << "ee");
- verify(this->unindex(k));
-
- ASSERT_EQUALS(8, this->_helper.btree.fullValidate(&opCtx, NULL, true, false, 0));
-
- // The tree has 6 buckets + 1 for the this->_helper.dummyDiskLoc
- ASSERT_EQUALS(7, this->_helper.recordStore.numRecords(NULL));
-
- builder.checkStructure("{c:{b:{a:null}},_:{e:{d:null},f:null,h:{g:null}}}");
- }
-};
-
-template <class OnDiskFormat>
-class ForceMergeLeft : public BtreeLogicTestBase<OnDiskFormat> {
-public:
- void run() {
- OperationContextNoop opCtx;
- ArtificialTreeBuilder<OnDiskFormat> builder(&opCtx, &this->_helper);
-
- builder.makeTree("{c:{b:{a:null}},f:{e:{d:null},ee:null},ff:null,_:{h:{g:null}}}");
-
- ASSERT_EQUALS(10, this->_helper.btree.fullValidate(&opCtx, NULL, true, false, 0));
-
- // The tree has 7 buckets + 1 for the this->_helper.dummyDiskLoc
- ASSERT_EQUALS(8, this->_helper.recordStore.numRecords(NULL));
-
- const BSONObj k = BSON(""
- << "ee");
- verify(this->unindex(k));
-
- ASSERT_EQUALS(9, this->_helper.btree.fullValidate(&opCtx, NULL, true, false, 0));
-
- // The tree has 6 buckets + 1 for the this->_helper.dummyDiskLoc
- ASSERT_EQUALS(7, this->_helper.recordStore.numRecords(NULL));
-
- builder.checkStructure("{f:{b:{a:null},c:null,e:{d:null}},ff:null,_:{h:{g:null}}}");
- }
-};
-
-template <class OnDiskFormat>
-class ForceMergeRight : public BtreeLogicTestBase<OnDiskFormat> {
-public:
- void run() {
- OperationContextNoop opCtx;
- ArtificialTreeBuilder<OnDiskFormat> builder(&opCtx, &this->_helper);
-
- builder.makeTree("{c:{b:{a:null}},cc:null,f:{e:{d:null},ee:null},_:{h:{g:null}}}");
-
- ASSERT_EQUALS(10, this->_helper.btree.fullValidate(&opCtx, NULL, true, false, 0));
-
- // The tree has 7 buckets + 1 for the this->_helper.dummyDiskLoc
- ASSERT_EQUALS(8, this->_helper.recordStore.numRecords(NULL));
-
- const BSONObj k = BSON(""
- << "ee");
- verify(this->unindex(k));
-
- ASSERT_EQUALS(9, this->_helper.btree.fullValidate(&opCtx, NULL, true, false, 0));
-
- // The tree has 6 buckets + 1 for the this->_helper.dummyDiskLoc
- ASSERT_EQUALS(7, this->_helper.recordStore.numRecords(NULL));
-
- builder.checkStructure("{c:{b:{a:null}},cc:null,_:{e:{d:null},f:null,h:{g:null}}}");
- }
-};
-
-template <class OnDiskFormat>
-class RecursiveMerge : public BtreeLogicTestBase<OnDiskFormat> {
-public:
- void run() {
- OperationContextNoop opCtx;
- ArtificialTreeBuilder<OnDiskFormat> builder(&opCtx, &this->_helper);
-
- builder.makeTree("{h:{e:{b:{a:null},c:null,d:null},g:{f:null}},j:{i:null}}");
-
- ASSERT_EQUALS(10, this->_helper.btree.fullValidate(&opCtx, NULL, true, false, 0));
-
- // The tree has 6 buckets + 1 for the this->_helper.dummyDiskLoc
- ASSERT_EQUALS(7, this->_helper.recordStore.numRecords(NULL));
-
- const BSONObj k = BSON(""
- << "c");
- verify(this->unindex(k));
-
- ASSERT_EQUALS(9, this->_helper.btree.fullValidate(&opCtx, NULL, true, false, 0));
-
- // The tree has 4 buckets + 1 for the this->_helper.dummyDiskLoc
- ASSERT_EQUALS(5, this->_helper.recordStore.numRecords(NULL));
-
- // Height is not currently reduced in this case
- builder.checkStructure("{j:{g:{b:{a:null},d:null,e:null,f:null},h:null,i:null}}");
- }
-};
-
-template <class OnDiskFormat>
-class RecursiveMergeRightBucket : public BtreeLogicTestBase<OnDiskFormat> {
-public:
- void run() {
- OperationContextNoop opCtx;
- ArtificialTreeBuilder<OnDiskFormat> builder(&opCtx, &this->_helper);
-
- builder.makeTree("{h:{e:{b:{a:null},c:null,d:null},g:{f:null}},_:{i:null}}");
-
- ASSERT_EQUALS(9, this->_helper.btree.fullValidate(&opCtx, NULL, true, false, 0));
-
- // The tree has 6 buckets + 1 for the this->_helper.dummyDiskLoc
- ASSERT_EQUALS(7, this->_helper.recordStore.numRecords(NULL));
-
- const BSONObj k = BSON(""
- << "c");
- verify(this->unindex(k));
-
- ASSERT_EQUALS(8, this->_helper.btree.fullValidate(&opCtx, NULL, true, false, 0));
-
- // The tree has 3 buckets + 1 for the this->_helper.dummyDiskLoc
- ASSERT_EQUALS(4, this->_helper.recordStore.numRecords(NULL));
-
- builder.checkStructure("{g:{b:{a:null},d:null,e:null,f:null},h:null,i:null}");
- }
-};
-
-template <class OnDiskFormat>
-class RecursiveMergeDoubleRightBucket : public BtreeLogicTestBase<OnDiskFormat> {
-public:
- void run() {
- OperationContextNoop opCtx;
- ArtificialTreeBuilder<OnDiskFormat> builder(&opCtx, &this->_helper);
-
- builder.makeTree("{h:{e:{b:{a:null},c:null,d:null},_:{f:null}},_:{i:null}}");
-
- ASSERT_EQUALS(8, this->_helper.btree.fullValidate(&opCtx, NULL, true, false, 0));
-
- // The tree has 6 buckets + 1 for the this->_helper.dummyDiskLoc
- ASSERT_EQUALS(7, this->_helper.recordStore.numRecords(NULL));
-
- const BSONObj k = BSON(""
- << "c");
- verify(this->unindex(k));
-
- ASSERT_EQUALS(7, this->_helper.btree.fullValidate(&opCtx, NULL, true, false, 0));
-
- // The tree has 4 buckets + 1 for the this->_helper.dummyDiskLoc
- ASSERT_EQUALS(5, this->_helper.recordStore.numRecords(NULL));
-
- // no recursion currently in this case
- builder.checkStructure("{h:{b:{a:null},d:null,e:null,f:null},_:{i:null}}");
- }
-};
-
-template <class OnDiskFormat>
-class MergeSizeTestBase : public BtreeLogicTestBase<OnDiskFormat> {
-public:
- MergeSizeTestBase() : _count(0) {}
-
- void run() {
- OperationContextNoop opCtx;
- this->_helper.btree.initAsEmpty(&opCtx).transitional_ignore();
-
- ArtificialTreeBuilder<OnDiskFormat> builder(&opCtx, &this->_helper);
-
- const BSONObj& topKey = biggestKey('m');
-
- DiskLoc leftChild = this->newBucket();
- builder.push(
- DiskLoc::fromRecordId(this->_helper.headManager.getHead(&opCtx)), topKey, leftChild);
- _count++;
-
- DiskLoc rightChild = this->newBucket();
- this->setBucketNextChild(DiskLoc::fromRecordId(this->_helper.headManager.getHead(&opCtx)),
- rightChild);
-
- _count += builder.fillBucketToExactSize(leftChild, leftSize(), 'a');
- _count += builder.fillBucketToExactSize(rightChild, rightSize(), 'n');
-
- ASSERT(leftAdditional() <= 2);
- if (leftAdditional() >= 2) {
- builder.push(leftChild, bigKey('k'), DiskLoc());
- }
- if (leftAdditional() >= 1) {
- builder.push(leftChild, bigKey('l'), DiskLoc());
- }
-
- ASSERT(rightAdditional() <= 2);
- if (rightAdditional() >= 2) {
- builder.push(rightChild, bigKey('y'), DiskLoc());
- }
- if (rightAdditional() >= 1) {
- builder.push(rightChild, bigKey('z'), DiskLoc());
- }
-
- _count += leftAdditional() + rightAdditional();
-
- initCheck();
-
- const char* keys = delKeys();
- for (const char* i = keys; *i; ++i) {
- long long unused = 0;
- ASSERT_EQUALS(_count,
- this->_helper.btree.fullValidate(&opCtx, &unused, true, false, 0));
- ASSERT_EQUALS(0, unused);
-
- // The tree has 3 buckets + 1 for the this->_helper.dummyDiskLoc
- ASSERT_EQUALS(4, this->_helper.recordStore.numRecords(NULL));
-
- const BSONObj k = bigKey(*i);
- this->unindex(k);
-
- --_count;
- }
-
- long long unused = 0;
- ASSERT_EQUALS(_count, this->_helper.btree.fullValidate(&opCtx, &unused, true, false, 0));
- ASSERT_EQUALS(0, unused);
-
- validate();
-
- if (!merge()) {
- // The tree has 3 buckets + 1 for the this->_helper.dummyDiskLoc
- ASSERT_EQUALS(4, this->_helper.recordStore.numRecords(NULL));
- } else {
- // The tree has 1 bucket + 1 for the this->_helper.dummyDiskLoc
- ASSERT_EQUALS(2, this->_helper.recordStore.numRecords(NULL));
- }
- }
-
-protected:
- virtual int leftAdditional() const {
- return 2;
- }
- virtual int rightAdditional() const {
- return 2;
- }
- virtual void initCheck() {}
- virtual void validate() {}
- virtual int leftSize() const = 0;
- virtual int rightSize() const = 0;
- virtual const char* delKeys() const {
- return "klyz";
- }
- virtual bool merge() const {
- return true;
- }
-
- static BSONObj bigKey(char a) {
- return simpleKey(a, 801);
- }
-
- static BSONObj biggestKey(char a) {
- int size = OnDiskFormat::KeyMax - bigSize() + 801;
- return simpleKey(a, size);
- }
-
- static int bigSize() {
- return typename BtreeLogicTestBase<OnDiskFormat>::KeyDataOwnedType(bigKey('a')).dataSize();
- }
-
- static int biggestSize() {
- return
- typename BtreeLogicTestBase<OnDiskFormat>::KeyDataOwnedType(biggestKey('a')).dataSize();
- }
-
- int _count;
-};
-
-template <class OnDiskFormat>
-class MergeSizeJustRightRight : public MergeSizeTestBase<OnDiskFormat> {
-protected:
- virtual int rightSize() const {
- return BtreeLogic<OnDiskFormat>::lowWaterMark() - 1;
- }
-
- virtual int leftSize() const {
- return OnDiskFormat::BucketBodySize - MergeSizeTestBase<OnDiskFormat>::biggestSize() -
- sizeof(typename BtreeLogicTestBase<OnDiskFormat>::FixedWidthKeyType) -
- (BtreeLogic<OnDiskFormat>::lowWaterMark() - 1);
- }
-};
-
-template <class OnDiskFormat>
-class MergeSizeJustRightLeft : public MergeSizeTestBase<OnDiskFormat> {
-protected:
- virtual int leftSize() const {
- return BtreeLogic<OnDiskFormat>::lowWaterMark() - 1;
- }
-
- virtual int rightSize() const {
- return OnDiskFormat::BucketBodySize - MergeSizeTestBase<OnDiskFormat>::biggestSize() -
- sizeof(typename BtreeLogicTestBase<OnDiskFormat>::FixedWidthKeyType) -
- (BtreeLogic<OnDiskFormat>::lowWaterMark() - 1);
- }
-
- virtual const char* delKeys() const {
- return "yzkl";
- }
-};
-
-template <class OnDiskFormat>
-class MergeSizeRight : public MergeSizeJustRightRight<OnDiskFormat> {
- virtual int rightSize() const {
- return MergeSizeJustRightRight<OnDiskFormat>::rightSize() - 1;
- }
- virtual int leftSize() const {
- return MergeSizeJustRightRight<OnDiskFormat>::leftSize() + 1;
- }
-};
-
-template <class OnDiskFormat>
-class MergeSizeLeft : public MergeSizeJustRightLeft<OnDiskFormat> {
- virtual int rightSize() const {
- return MergeSizeJustRightLeft<OnDiskFormat>::rightSize() + 1;
- }
- virtual int leftSize() const {
- return MergeSizeJustRightLeft<OnDiskFormat>::leftSize() - 1;
- }
-};
-
-template <class OnDiskFormat>
-class NoMergeBelowMarkRight : public MergeSizeJustRightRight<OnDiskFormat> {
- virtual int rightSize() const {
- return MergeSizeJustRightRight<OnDiskFormat>::rightSize() + 1;
- }
- virtual int leftSize() const {
- return MergeSizeJustRightRight<OnDiskFormat>::leftSize() - 1;
- }
- virtual bool merge() const {
- return false;
- }
-};
-
-template <class OnDiskFormat>
-class NoMergeBelowMarkLeft : public MergeSizeJustRightLeft<OnDiskFormat> {
- virtual int rightSize() const {
- return MergeSizeJustRightLeft<OnDiskFormat>::rightSize() - 1;
- }
- virtual int leftSize() const {
- return MergeSizeJustRightLeft<OnDiskFormat>::leftSize() + 1;
- }
- virtual bool merge() const {
- return false;
- }
-};
-
-template <class OnDiskFormat>
-class MergeSizeRightTooBig : public MergeSizeJustRightLeft<OnDiskFormat> {
- virtual int rightSize() const {
- return MergeSizeJustRightLeft<OnDiskFormat>::rightSize() + 1;
- }
- virtual bool merge() const {
- return false;
- }
-};
-
-template <class OnDiskFormat>
-class MergeSizeLeftTooBig : public MergeSizeJustRightRight<OnDiskFormat> {
- virtual int leftSize() const {
- return MergeSizeJustRightRight<OnDiskFormat>::leftSize() + 1;
- }
- virtual bool merge() const {
- return false;
- }
-};
-
-template <class OnDiskFormat>
-class MergeRightEmpty : public MergeSizeTestBase<OnDiskFormat> {
-protected:
- virtual int rightAdditional() const {
- return 1;
- }
- virtual int leftAdditional() const {
- return 1;
- }
- virtual const char* delKeys() const {
- return "lz";
- }
- virtual int rightSize() const {
- return 0;
- }
- virtual int leftSize() const {
- return OnDiskFormat::BucketBodySize - MergeSizeTestBase<OnDiskFormat>::biggestSize() -
- sizeof(typename BtreeLogicTestBase<OnDiskFormat>::FixedWidthKeyType);
- }
-};
-
-template <class OnDiskFormat>
-class MergeMinRightEmpty : public MergeSizeTestBase<OnDiskFormat> {
-protected:
- virtual int rightAdditional() const {
- return 1;
- }
- virtual int leftAdditional() const {
- return 0;
- }
- virtual const char* delKeys() const {
- return "z";
- }
- virtual int rightSize() const {
- return 0;
- }
- virtual int leftSize() const {
- return MergeSizeTestBase<OnDiskFormat>::bigSize() +
- sizeof(typename BtreeLogicTestBase<OnDiskFormat>::FixedWidthKeyType);
- }
-};
-
-template <class OnDiskFormat>
-class MergeLeftEmpty : public MergeSizeTestBase<OnDiskFormat> {
-protected:
- virtual int rightAdditional() const {
- return 1;
- }
- virtual int leftAdditional() const {
- return 1;
- }
- virtual const char* delKeys() const {
- return "zl";
- }
- virtual int leftSize() const {
- return 0;
- }
- virtual int rightSize() const {
- return OnDiskFormat::BucketBodySize - MergeSizeTestBase<OnDiskFormat>::biggestSize() -
- sizeof(typename BtreeLogicTestBase<OnDiskFormat>::FixedWidthKeyType);
- }
-};
-
-template <class OnDiskFormat>
-class MergeMinLeftEmpty : public MergeSizeTestBase<OnDiskFormat> {
-protected:
- virtual int leftAdditional() const {
- return 1;
- }
- virtual int rightAdditional() const {
- return 0;
- }
- virtual const char* delKeys() const {
- return "l";
- }
- virtual int leftSize() const {
- return 0;
- }
- virtual int rightSize() const {
- return MergeSizeTestBase<OnDiskFormat>::bigSize() +
- sizeof(typename BtreeLogicTestBase<OnDiskFormat>::FixedWidthKeyType);
- }
-};
-
-template <class OnDiskFormat>
-class BalanceRightEmpty : public MergeRightEmpty<OnDiskFormat> {
-protected:
- virtual int leftSize() const {
- return OnDiskFormat::BucketBodySize - MergeSizeTestBase<OnDiskFormat>::biggestSize() -
- sizeof(typename BtreeLogicTestBase<OnDiskFormat>::FixedWidthKeyType) + 1;
- }
-
- virtual bool merge() const {
- return false;
- }
-
- virtual void initCheck() {
- OperationContextNoop opCtx;
- _oldTop = this->getKey(this->_helper.headManager.getHead(&opCtx), 0).data.toBson();
- }
-
- virtual void validate() {
- OperationContextNoop opCtx;
- ASSERT_BSONOBJ_NE(_oldTop,
- this->getKey(this->_helper.headManager.getHead(&opCtx), 0).data.toBson());
- }
-
-private:
- BSONObj _oldTop;
-};
-
-template <class OnDiskFormat>
-class BalanceLeftEmpty : public MergeLeftEmpty<OnDiskFormat> {
-protected:
- virtual int rightSize() const {
- return OnDiskFormat::BucketBodySize - MergeSizeTestBase<OnDiskFormat>::biggestSize() -
- sizeof(typename BtreeLogicTestBase<OnDiskFormat>::FixedWidthKeyType) + 1;
- }
-
- virtual bool merge() const {
- return false;
- }
-
- virtual void initCheck() {
- OperationContextNoop opCtx;
- _oldTop = this->getKey(this->_helper.headManager.getHead(&opCtx), 0).data.toBson();
- }
-
- virtual void validate() {
- OperationContextNoop opCtx;
- ASSERT_BSONOBJ_NE(_oldTop,
- this->getKey(this->_helper.headManager.getHead(&opCtx), 0).data.toBson());
- }
-
-private:
- BSONObj _oldTop;
-};
-
-template <class OnDiskFormat>
-class BalanceOneLeftToRight : public BtreeLogicTestBase<OnDiskFormat> {
-public:
- void run() {
- OperationContextNoop opCtx;
- ArtificialTreeBuilder<OnDiskFormat> builder(&opCtx, &this->_helper);
-
- builder.makeTree(
- "{$10:{$1:null,$2:null,$3:null,$4:null,$5:null,$6:null},"
- "b:{$20:null,$30:null,$40:null,$50:null,a:null},"
- "_:{c:null}}");
-
- ASSERT_EQUALS(14, this->_helper.btree.fullValidate(&opCtx, NULL, true, false, 0));
-
- // The tree has 4 buckets + 1 for the this->_helper.dummyDiskLoc
- ASSERT_EQUALS(5, this->_helper.recordStore.numRecords(NULL));
-
- const BSONObj k = BSON("" << bigNumString(0x40, 800));
- ASSERT(this->unindex(k));
-
- ASSERT_EQUALS(13, this->_helper.btree.fullValidate(&opCtx, NULL, true, false, 0));
-
- // The tree has 4 buckets + 1 for the this->_helper.dummyDiskLoc
- ASSERT_EQUALS(5, this->_helper.recordStore.numRecords(NULL));
-
- builder.checkStructure(
- "{$6:{$1:null,$2:null,$3:null,$4:null,$5:null},"
- "b:{$10:null,$20:null,$30:null,$50:null,a:null},"
- "_:{c:null}}");
- }
-};
-
-template <class OnDiskFormat>
-class BalanceOneRightToLeft : public BtreeLogicTestBase<OnDiskFormat> {
-public:
- void run() {
- OperationContextNoop opCtx;
- ArtificialTreeBuilder<OnDiskFormat> builder(&opCtx, &this->_helper);
-
- builder.makeTree(
- "{$10:{$1:null,$2:null,$3:null,$4:null},"
- "b:{$20:null,$30:null,$40:null,$50:null,$60:null,$70:null},"
- "_:{c:null}}");
-
- ASSERT_EQUALS(13, this->_helper.btree.fullValidate(&opCtx, NULL, true, false, 0));
-
- // The tree has 4 buckets + 1 for the this->_helper.dummyDiskLoc
- ASSERT_EQUALS(5, this->_helper.recordStore.numRecords(NULL));
-
- const BSONObj k = BSON("" << bigNumString(0x3, 800));
- ASSERT(this->unindex(k));
-
- ASSERT_EQUALS(12, this->_helper.btree.fullValidate(&opCtx, NULL, true, false, 0));
-
- // The tree has 4 buckets + 1 for the this->_helper.dummyDiskLoc
- ASSERT_EQUALS(5, this->_helper.recordStore.numRecords(NULL));
-
- builder.checkStructure(
- "{$20:{$1:null,$2:null,$4:null,$10:null},"
- "b:{$30:null,$40:null,$50:null,$60:null,$70:null},"
- "_:{c:null}}");
- }
-};
-
-template <class OnDiskFormat>
-class BalanceThreeLeftToRight : public BtreeLogicTestBase<OnDiskFormat> {
-public:
- void run() {
- OperationContextNoop opCtx;
- ArtificialTreeBuilder<OnDiskFormat> builder(&opCtx, &this->_helper);
-
- builder.makeTree(
- "{$20:{$1:{$0:null},$3:{$2:null},$5:{$4:null},$7:{$6:null},"
- "$9:{$8:null},$11:{$10:null},$13:{$12:null},_:{$14:null}},"
- "b:{$30:null,$40:{$35:null},$50:{$45:null}},"
- "_:{c:null}}");
-
- ASSERT_EQUALS(23, this->_helper.btree.fullValidate(&opCtx, NULL, true, false, 0));
-
- // The tree has 14 buckets + 1 for the this->_helper.dummyDiskLoc
- ASSERT_EQUALS(15, this->_helper.recordStore.numRecords(NULL));
-
- const BSONObj k = BSON("" << bigNumString(0x30, 800));
- ASSERT(this->unindex(k));
-
- ASSERT_EQUALS(22, this->_helper.btree.fullValidate(&opCtx, NULL, true, false, 0));
-
- // The tree has 14 buckets + 1 for the this->_helper.dummyDiskLoc
- ASSERT_EQUALS(15, this->_helper.recordStore.numRecords(NULL));
-
- builder.checkStructure(
- "{$9:{$1:{$0:null},$3:{$2:null},"
- "$5:{$4:null},$7:{$6:null},_:{$8:null}},"
- "b:{$11:{$10:null},$13:{$12:null},$20:{$14:null},"
- "$40:{$35:null},$50:{$45:null}},"
- "_:{c:null}}");
- }
-};
-
-template <class OnDiskFormat>
-class BalanceThreeRightToLeft : public BtreeLogicTestBase<OnDiskFormat> {
-public:
- void run() {
- OperationContextNoop opCtx;
- ArtificialTreeBuilder<OnDiskFormat> builder(&opCtx, &this->_helper);
-
- builder.makeTree(
- "{$20:{$1:{$0:null},$3:{$2:null},$5:null,_:{$14:null}},"
- "b:{$30:{$25:null},$40:{$35:null},$50:{$45:null},$60:{$55:null},"
- "$70:{$65:null},$80:{$75:null},"
- "$90:{$85:null},$100:{$95:null}},"
- "_:{c:null}}");
-
- ASSERT_EQUALS(25, this->_helper.btree.fullValidate(&opCtx, NULL, true, false, 0));
-
- // The tree has 15 buckets + 1 for the this->_helper.dummyDiskLoc
- ASSERT_EQUALS(16, this->_helper.recordStore.numRecords(NULL));
-
- const BSONObj k = BSON("" << bigNumString(0x5, 800));
- ASSERT(this->unindex(k));
-
- ASSERT_EQUALS(24, this->_helper.btree.fullValidate(&opCtx, NULL, true, false, 0));
-
- // The tree has 15 buckets + 1 for the this->_helper.dummyDiskLoc
- ASSERT_EQUALS(16, this->_helper.recordStore.numRecords(NULL));
-
- builder.checkStructure(
- "{$50:{$1:{$0:null},$3:{$2:null},$20:{$14:null},"
- "$30:{$25:null},$40:{$35:null},_:{$45:null}},"
- "b:{$60:{$55:null},$70:{$65:null},$80:{$75:null},"
- "$90:{$85:null},$100:{$95:null}},"
- "_:{c:null}}");
- }
-};
-
-template <class OnDiskFormat>
-class BalanceSingleParentKey : public BtreeLogicTestBase<OnDiskFormat> {
-public:
- void run() {
- OperationContextNoop opCtx;
- ArtificialTreeBuilder<OnDiskFormat> builder(&opCtx, &this->_helper);
-
- builder.makeTree(
- "{$10:{$1:null,$2:null,$3:null,$4:null,$5:null,$6:null},"
- "_:{$20:null,$30:null,$40:null,$50:null,a:null}}");
-
- ASSERT_EQUALS(12, this->_helper.btree.fullValidate(&opCtx, NULL, true, false, 0));
-
- // The tree has 3 buckets + 1 for the this->_helper.dummyDiskLoc
- ASSERT_EQUALS(4, this->_helper.recordStore.numRecords(NULL));
-
- const BSONObj k = BSON("" << bigNumString(0x40, 800));
- ASSERT(this->unindex(k));
-
- ASSERT_EQUALS(11, this->_helper.btree.fullValidate(&opCtx, NULL, true, false, 0));
-
- // The tree has 3 buckets + 1 for the this->_helper.dummyDiskLoc
- ASSERT_EQUALS(4, this->_helper.recordStore.numRecords(NULL));
-
- builder.checkStructure(
- "{$6:{$1:null,$2:null,$3:null,$4:null,$5:null},"
- "_:{$10:null,$20:null,$30:null,$50:null,a:null}}");
- }
-};
-
-template <class OnDiskFormat>
-class PackEmptyBucket : public BtreeLogicTestBase<OnDiskFormat> {
-public:
- void run() {
- OperationContextNoop opCtx;
- ArtificialTreeBuilder<OnDiskFormat> builder(&opCtx, &this->_helper);
-
- builder.makeTree("{a:null}");
-
- const BSONObj k = BSON(""
- << "a");
- ASSERT(this->unindex(k));
-
- this->forcePackBucket(this->_helper.headManager.getHead(&opCtx));
-
- typename BtreeLogicTestBase<OnDiskFormat>::BucketType* headBucket = this->head();
-
- ASSERT_EQUALS(0, headBucket->n);
- ASSERT_FALSE(headBucket->flags & Packed);
-
- int unused = 0;
- this->truncateBucket(headBucket, 0, unused);
-
- ASSERT_EQUALS(0, headBucket->n);
- ASSERT_EQUALS(0, headBucket->topSize);
- ASSERT_EQUALS((int)OnDiskFormat::BucketBodySize, headBucket->emptySize);
- ASSERT_TRUE(headBucket->flags & Packed);
- }
-};
-
-template <class OnDiskFormat>
-class PackedDataSizeEmptyBucket : public BtreeLogicTestBase<OnDiskFormat> {
-public:
- void run() {
- OperationContextNoop opCtx;
- ArtificialTreeBuilder<OnDiskFormat> builder(&opCtx, &this->_helper);
-
- builder.makeTree("{a:null}");
-
- const BSONObj k = BSON(""
- << "a");
- ASSERT(this->unindex(k));
-
- this->forcePackBucket(this->_helper.headManager.getHead(&opCtx));
-
- typename BtreeLogicTestBase<OnDiskFormat>::BucketType* headBucket = this->head();
-
- ASSERT_EQUALS(0, headBucket->n);
- ASSERT_FALSE(headBucket->flags & Packed);
- ASSERT_EQUALS(0, this->bucketPackedDataSize(headBucket, 0));
- ASSERT_FALSE(headBucket->flags & Packed);
- }
-};
-
-template <class OnDiskFormat>
-class BalanceSingleParentKeyPackParent : public BtreeLogicTestBase<OnDiskFormat> {
-public:
- void run() {
- OperationContextNoop opCtx;
- ArtificialTreeBuilder<OnDiskFormat> builder(&opCtx, &this->_helper);
-
- builder.makeTree(
- "{$10:{$1:null,$2:null,$3:null,$4:null,$5:null,$6:null},"
- "_:{$20:null,$30:null,$40:null,$50:null,a:null}}");
-
- ASSERT_EQUALS(12, this->_helper.btree.fullValidate(&opCtx, NULL, true, false, 0));
-
- // The tree has 3 buckets + 1 for the this->_helper.dummyDiskLoc
- ASSERT_EQUALS(4, this->_helper.recordStore.numRecords(NULL));
-
- // force parent pack
- this->forcePackBucket(this->_helper.headManager.getHead(&opCtx));
-
- const BSONObj k = BSON("" << bigNumString(0x40, 800));
- ASSERT(this->unindex(k));
-
- ASSERT_EQUALS(11, this->_helper.btree.fullValidate(&opCtx, NULL, true, false, 0));
-
- // The tree has 3 buckets + 1 for the this->_helper.dummyDiskLoc
- ASSERT_EQUALS(4, this->_helper.recordStore.numRecords(NULL));
-
- builder.checkStructure(
- "{$6:{$1:null,$2:null,$3:null,$4:null,$5:null},"
- "_:{$10:null,$20:null,$30:null,$50:null,a:null}}");
- }
-};
-
-template <class OnDiskFormat>
-class BalanceSplitParent : public BtreeLogicTestBase<OnDiskFormat> {
-public:
- void run() {
- OperationContextNoop opCtx;
- ArtificialTreeBuilder<OnDiskFormat> builder(&opCtx, &this->_helper);
-
- builder.makeTree(
- "{$10$10:{$1:null,$2:null,$3:null,$4:null},"
- "$100:{$20:null,$30:null,$40:null,$50:null,$60:null,$70:null,$80:null},"
- "$200:null,$300:null,$400:null,$500:null,$600:null,"
- "$700:null,$800:null,$900:null,_:{c:null}}");
-
- ASSERT_EQUALS(22, this->_helper.btree.fullValidate(&opCtx, NULL, true, false, 0));
-
- // The tree has 4 buckets + 1 for the this->_helper.dummyDiskLoc
- ASSERT_EQUALS(5, this->_helper.recordStore.numRecords(NULL));
-
- const BSONObj k = BSON("" << bigNumString(0x3, 800));
- ASSERT(this->unindex(k));
-
- ASSERT_EQUALS(21, this->_helper.btree.fullValidate(&opCtx, NULL, true, false, 0));
-
- // The tree has 6 buckets + 1 for the this->_helper.dummyDiskLoc
- ASSERT_EQUALS(7, this->_helper.recordStore.numRecords(NULL));
-
- builder.checkStructure(
- "{$500:{ $30:{$1:null,$2:null,$4:null,$10$10:null,$20:null},"
- "$100:{$40:null,$50:null,$60:null,$70:null,$80:null},"
- "$200:null,$300:null,$400:null},"
- "_:{$600:null,$700:null,$800:null,$900:null,_:{c:null}}}");
- }
-};
-
-template <class OnDiskFormat>
-class RebalancedSeparatorBase : public BtreeLogicTestBase<OnDiskFormat> {
-public:
- void run() {
- OperationContextNoop opCtx;
- ArtificialTreeBuilder<OnDiskFormat> builder(&opCtx, &this->_helper);
-
- builder.makeTree(treeSpec());
- modTree();
-
- ASSERT_EQUALS(
- expectedSeparator(),
- this->bucketRebalancedSeparatorPos(this->_helper.headManager.getHead(&opCtx), 0));
- }
-
- virtual string treeSpec() const = 0;
- virtual int expectedSeparator() const = 0;
- virtual void modTree() {}
-};
-
-template <class OnDiskFormat>
-class EvenRebalanceLeft : public RebalancedSeparatorBase<OnDiskFormat> {
- virtual string treeSpec() const {
- return "{$7:{$1:null,$2$31f:null,$3:null,"
- "$4$31f:null,$5:null,$6:null},"
- "_:{$8:null,$9:null,$10$31e:null}}";
- }
- virtual int expectedSeparator() const {
- return 4;
- }
-};
-
-template <class OnDiskFormat>
-class EvenRebalanceLeftCusp : public RebalancedSeparatorBase<OnDiskFormat> {
- virtual string treeSpec() const {
- return "{$6:{$1:null,$2$31f:null,$3:null,$4$31f:null,$5:null},"
- "_:{$7:null,$8:null,$9$31e:null,$10:null}}";
- }
- virtual int expectedSeparator() const {
- return 4;
- }
-};
-
-template <class OnDiskFormat>
-class EvenRebalanceRight : public RebalancedSeparatorBase<OnDiskFormat> {
- virtual string treeSpec() const {
- return "{$3:{$1:null,$2$31f:null},_:{$4$31f:null,$5:null,$6:null,$7:null,$8$31e:null,$9:"
- "null,$10:null}}";
- }
- virtual int expectedSeparator() const {
- return 4;
- }
-};
-
-template <class OnDiskFormat>
-class EvenRebalanceRightCusp : public RebalancedSeparatorBase<OnDiskFormat> {
- virtual string treeSpec() const {
- return "{$4$31f:{$1:null,$2$31f:null,$3:null},_:{$5:null,$6:null,$7$31e:null,$8:null,$9:"
- "null,$10:null}}";
- }
- virtual int expectedSeparator() const {
- return 4;
- }
-};
-
-template <class OnDiskFormat>
-class EvenRebalanceCenter : public RebalancedSeparatorBase<OnDiskFormat> {
- virtual string treeSpec() const {
- return "{$5:{$1:null,$2$31f:null,$3:null,$4$31f:null},_:{$6:null,$7$31e:null,$8:null,$9:"
- "null,$10:null}}";
- }
- virtual int expectedSeparator() const {
- return 4;
- }
-};
-
-template <class OnDiskFormat>
-class OddRebalanceLeft : public RebalancedSeparatorBase<OnDiskFormat> {
- virtual string treeSpec() const {
- return "{$6$31f:{$1:null,$2:null,$3:null,$4:null,$5:null},_:{$7:null,$8:null,$9:null,$10:"
- "null}}";
- }
- virtual int expectedSeparator() const {
- return 4;
- }
-};
-
-template <class OnDiskFormat>
-class OddRebalanceRight : public RebalancedSeparatorBase<OnDiskFormat> {
- virtual string treeSpec() const {
- return "{$4:{$1:null,$2:null,$3:null},_:{$5:null,$6:null,$7:null,$8$31f:null,$9:null,$10:"
- "null}}";
- }
- virtual int expectedSeparator() const {
- return 4;
- }
-};
-
-template <class OnDiskFormat>
-class OddRebalanceCenter : public RebalancedSeparatorBase<OnDiskFormat> {
- virtual string treeSpec() const {
- return "{$5:{$1:null,$2:null,$3:null,$4:null},_:{$6:null,$7:null,$8:null,$9:null,$10$31f:"
- "null}}";
- }
- virtual int expectedSeparator() const {
- return 4;
- }
-};
-
-template <class OnDiskFormat>
-class RebalanceEmptyRight : public RebalancedSeparatorBase<OnDiskFormat> {
- virtual string treeSpec() const {
- return "{$a:{$1:null,$2:null,$3:null,$4:null,$5:null,$6:null,$7:null,$8:null,$9:null},_:{$"
- "b:null}}";
- }
- virtual void modTree() {
- BSONObj k = BSON("" << bigNumString(0xb, 800));
- ASSERT(this->unindex(k));
- }
- virtual int expectedSeparator() const {
- return 4;
- }
-};
-
-template <class OnDiskFormat>
-class RebalanceEmptyLeft : public RebalancedSeparatorBase<OnDiskFormat> {
- virtual string treeSpec() const {
- return "{$a:{$1:null},_:{$11:null,$12:null,$13:null,$14:null,$15:null,$16:null,$17:null,$"
- "18:null,$19:null}}";
- }
- virtual void modTree() {
- BSONObj k = BSON("" << bigNumString(0x1, 800));
- ASSERT(this->unindex(k));
- }
- virtual int expectedSeparator() const {
- return 4;
- }
-};
-
-template <class OnDiskFormat>
-class NoMoveAtLowWaterMarkRight : public MergeSizeJustRightRight<OnDiskFormat> {
- virtual int rightSize() const {
- return MergeSizeJustRightRight<OnDiskFormat>::rightSize() + 1;
- }
-
- virtual void initCheck() {
- OperationContextNoop opCtx;
- _oldTop = this->getKey(this->_helper.headManager.getHead(&opCtx), 0).data.toBson();
- }
-
- virtual void validate() {
- OperationContextNoop opCtx;
- ASSERT_BSONOBJ_EQ(_oldTop,
- this->getKey(this->_helper.headManager.getHead(&opCtx), 0).data.toBson());
- }
-
- virtual bool merge() const {
- return false;
- }
-
-protected:
- BSONObj _oldTop;
-};
-
-template <class OnDiskFormat>
-class MoveBelowLowWaterMarkRight : public NoMoveAtLowWaterMarkRight<OnDiskFormat> {
- virtual int rightSize() const {
- return MergeSizeJustRightRight<OnDiskFormat>::rightSize();
- }
- virtual int leftSize() const {
- return MergeSizeJustRightRight<OnDiskFormat>::leftSize() + 1;
- }
-
- virtual void validate() {
- OperationContextNoop opCtx;
- // Different top means we rebalanced
- ASSERT_BSONOBJ_NE(this->_oldTop,
- this->getKey(this->_helper.headManager.getHead(&opCtx), 0).data.toBson());
- }
-};
-
-template <class OnDiskFormat>
-class NoMoveAtLowWaterMarkLeft : public MergeSizeJustRightLeft<OnDiskFormat> {
- virtual int leftSize() const {
- return MergeSizeJustRightLeft<OnDiskFormat>::leftSize() + 1;
- }
- virtual void initCheck() {
- OperationContextNoop opCtx;
- this->_oldTop = this->getKey(this->_helper.headManager.getHead(&opCtx), 0).data.toBson();
- }
-
- virtual void validate() {
- OperationContextNoop opCtx;
- ASSERT_BSONOBJ_EQ(this->_oldTop,
- this->getKey(this->_helper.headManager.getHead(&opCtx), 0).data.toBson());
- }
- virtual bool merge() const {
- return false;
- }
-
-protected:
- BSONObj _oldTop;
-};
-
-template <class OnDiskFormat>
-class MoveBelowLowWaterMarkLeft : public NoMoveAtLowWaterMarkLeft<OnDiskFormat> {
- virtual int leftSize() const {
- return MergeSizeJustRightLeft<OnDiskFormat>::leftSize();
- }
- virtual int rightSize() const {
- return MergeSizeJustRightLeft<OnDiskFormat>::rightSize() + 1;
- }
-
- virtual void validate() {
- OperationContextNoop opCtx;
- // Different top means we rebalanced
- ASSERT_BSONOBJ_NE(this->_oldTop,
- this->getKey(this->_helper.headManager.getHead(&opCtx), 0).data.toBson());
- }
-};
-
-template <class OnDiskFormat>
-class PreferBalanceLeft : public BtreeLogicTestBase<OnDiskFormat> {
-public:
- void run() {
- OperationContextNoop opCtx;
- ArtificialTreeBuilder<OnDiskFormat> builder(&opCtx, &this->_helper);
-
- builder.makeTree(
- "{$10:{$1:null,$2:null,$3:null,$4:null,$5:null,$6:null},"
- "$20:{$11:null,$12:null,$13:null,$14:null},"
- "_:{$30:null}}");
-
- ASSERT_EQUALS(13, this->_helper.btree.fullValidate(&opCtx, NULL, true, false, 0));
-
- // The tree has 4 buckets + 1 for the this->_helper.dummyDiskLoc
- ASSERT_EQUALS(5, this->_helper.recordStore.numRecords(NULL));
-
- const BSONObj k = BSON("" << bigNumString(0x12, 800));
- ASSERT(this->unindex(k));
-
- ASSERT_EQUALS(12, this->_helper.btree.fullValidate(&opCtx, NULL, true, false, 0));
-
- // The tree has 4 buckets + 1 for the this->_helper.dummyDiskLoc
- ASSERT_EQUALS(5, this->_helper.recordStore.numRecords(NULL));
-
- builder.checkStructure(
- "{$5:{$1:null,$2:null,$3:null,$4:null},"
- "$20:{$6:null,$10:null,$11:null,$13:null,$14:null},"
- "_:{$30:null}}");
- }
-};
-
-template <class OnDiskFormat>
-class PreferBalanceRight : public BtreeLogicTestBase<OnDiskFormat> {
-public:
- void run() {
- OperationContextNoop opCtx;
- ArtificialTreeBuilder<OnDiskFormat> builder(&opCtx, &this->_helper);
-
- builder.makeTree(
- "{$10:{$1:null},"
- "$20:{$11:null,$12:null,$13:null,$14:null},"
- "_:{$31:null,$32:null,$33:null,$34:null,$35:null,$36:null}}");
-
- ASSERT_EQUALS(13, this->_helper.btree.fullValidate(&opCtx, NULL, true, false, 0));
-
- // The tree has 4 buckets + 1 for the this->_helper.dummyDiskLoc
- ASSERT_EQUALS(5, this->_helper.recordStore.numRecords(NULL));
-
- const BSONObj k = BSON("" << bigNumString(0x12, 800));
- ASSERT(this->unindex(k));
-
- ASSERT_EQUALS(12, this->_helper.btree.fullValidate(&opCtx, NULL, true, false, 0));
-
- // The tree has 4 buckets + 1 for the this->_helper.dummyDiskLoc
- ASSERT_EQUALS(5, this->_helper.recordStore.numRecords(NULL));
-
- builder.checkStructure(
- "{$10:{$1:null},"
- "$31:{$11:null,$13:null,$14:null,$20:null},"
- "_:{$32:null,$33:null,$34:null,$35:null,$36:null}}");
- }
-};
-
-template <class OnDiskFormat>
-class RecursiveMergeThenBalance : public BtreeLogicTestBase<OnDiskFormat> {
-public:
- void run() {
- OperationContextNoop opCtx;
- ArtificialTreeBuilder<OnDiskFormat> builder(&opCtx, &this->_helper);
-
- builder.makeTree(
- "{$10:{$5:{$1:null,$2:null},$8:{$6:null,$7:null}},"
- "_:{$20:null,$30:null,$40:null,$50:null,"
- "$60:null,$70:null,$80:null,$90:null}}");
-
- ASSERT_EQUALS(15, this->_helper.btree.fullValidate(&opCtx, NULL, true, false, 0));
-
- // The tree has 5 buckets + 1 for the this->_helper.dummyDiskLoc
- ASSERT_EQUALS(6, this->_helper.recordStore.numRecords(NULL));
-
- const BSONObj k = BSON("" << bigNumString(0x7, 800));
- ASSERT(this->unindex(k));
-
- ASSERT_EQUALS(14, this->_helper.btree.fullValidate(&opCtx, NULL, true, false, 0));
-
- // The tree has 4 buckets + 1 for the this->_helper.dummyDiskLoc
- ASSERT_EQUALS(5, this->_helper.recordStore.numRecords(NULL));
-
- builder.checkStructure(
- "{$40:{$8:{$1:null,$2:null,$5:null,$6:null},$10:null,$20:null,$30:null},"
- "_:{$50:null,$60:null,$70:null,$80:null,$90:null}}");
- }
-};
-
-template <class OnDiskFormat>
-class DelEmptyNoNeighbors : public BtreeLogicTestBase<OnDiskFormat> {
-public:
- void run() {
- OperationContextNoop opCtx;
- ArtificialTreeBuilder<OnDiskFormat> builder(&opCtx, &this->_helper);
-
- builder.makeTree("{b:{a:null}}");
-
- ASSERT_EQUALS(2, this->_helper.btree.fullValidate(&opCtx, NULL, true, false, 0));
-
- // The tree has 2 buckets + 1 for the this->_helper.dummyDiskLoc
- ASSERT_EQUALS(3, this->_helper.recordStore.numRecords(NULL));
-
- const BSONObj k = BSON(""
- << "a");
- ASSERT(this->unindex(k));
-
- ASSERT_EQUALS(1, this->_helper.btree.fullValidate(&opCtx, NULL, true, false, 0));
-
- // The tree has 1 bucket + 1 for the this->_helper.dummyDiskLoc
- ASSERT_EQUALS(2, this->_helper.recordStore.numRecords(NULL));
-
- builder.checkStructure("{b:null}");
- }
-};
-
-template <class OnDiskFormat>
-class DelEmptyEmptyNeighbors : public BtreeLogicTestBase<OnDiskFormat> {
-public:
- void run() {
- OperationContextNoop opCtx;
- ArtificialTreeBuilder<OnDiskFormat> builder(&opCtx, &this->_helper);
-
- builder.makeTree("{a:null,c:{b:null},d:null}");
-
- ASSERT_EQUALS(4, this->_helper.btree.fullValidate(&opCtx, NULL, true, false, 0));
-
- // The tree has 2 buckets + 1 for the this->_helper.dummyDiskLoc
- ASSERT_EQUALS(3, this->_helper.recordStore.numRecords(NULL));
-
- const BSONObj k = BSON(""
- << "b");
- ASSERT(this->unindex(k));
-
- ASSERT_EQUALS(3, this->_helper.btree.fullValidate(&opCtx, NULL, true, false, 0));
-
- // The tree has 1 bucket + 1 for the this->_helper.dummyDiskLoc
- ASSERT_EQUALS(2, this->_helper.recordStore.numRecords(NULL));
-
- builder.checkStructure("{a:null,c:null,d:null}");
- }
-};
-
-template <class OnDiskFormat>
-class DelInternal : public BtreeLogicTestBase<OnDiskFormat> {
-public:
- void run() {
- OperationContextNoop opCtx;
- ArtificialTreeBuilder<OnDiskFormat> builder(&opCtx, &this->_helper);
-
- builder.makeTree("{a:null,c:{b:null},d:null}");
-
- long long unused = 0;
- ASSERT_EQUALS(4, this->_helper.btree.fullValidate(&opCtx, &unused, true, false, 0));
-
- // The tree has 2 buckets + 1 for the this->_helper.dummyDiskLoc
- ASSERT_EQUALS(3, this->_helper.recordStore.numRecords(NULL));
- ASSERT_EQUALS(0, unused);
-
- const BSONObj k = BSON(""
- << "c");
- ASSERT(this->unindex(k));
-
- ASSERT_EQUALS(3, this->_helper.btree.fullValidate(&opCtx, &unused, true, false, 0));
-
- // The tree has 1 bucket + 1 for the this->_helper.dummyDiskLoc
- ASSERT_EQUALS(2, this->_helper.recordStore.numRecords(NULL));
- ASSERT_EQUALS(0, unused);
-
- builder.checkStructure("{a:null,b:null,d:null}");
- }
-};
-
-template <class OnDiskFormat>
-class DelInternalReplaceWithUnused : public BtreeLogicTestBase<OnDiskFormat> {
-public:
- void run() {
- OperationContextNoop opCtx;
- ArtificialTreeBuilder<OnDiskFormat> builder(&opCtx, &this->_helper);
-
- builder.makeTree("{a:null,c:{b:null},d:null}");
-
- const DiskLoc prevChildBucket =
- this->getKey(this->_helper.headManager.getHead(&opCtx), 1).prevChildBucket;
- this->markKeyUnused(prevChildBucket, 0);
-
- long long unused = 0;
- ASSERT_EQUALS(3, this->_helper.btree.fullValidate(&opCtx, &unused, true, false, 0));
-
- // The tree has 2 buckets + 1 for the this->_helper.dummyDiskLoc
- ASSERT_EQUALS(3, this->_helper.recordStore.numRecords(NULL));
- ASSERT_EQUALS(1, unused);
-
- const BSONObj k = BSON(""
- << "c");
- ASSERT(this->unindex(k));
-
- unused = 0;
- ASSERT_EQUALS(2, this->_helper.btree.fullValidate(&opCtx, &unused, true, false, 0));
-
- // The tree has 1 bucket + 1 for the this->_helper.dummyDiskLoc
- ASSERT_EQUALS(2, this->_helper.recordStore.numRecords(NULL));
- ASSERT_EQUALS(1, unused);
-
- // doesn't discriminate between used and unused
- builder.checkStructure("{a:null,b:null,d:null}");
- }
-};
-
-template <class OnDiskFormat>
-class DelInternalReplaceRight : public BtreeLogicTestBase<OnDiskFormat> {
-public:
- void run() {
- OperationContextNoop opCtx;
- ArtificialTreeBuilder<OnDiskFormat> builder(&opCtx, &this->_helper);
-
- builder.makeTree("{a:null,_:{b:null}}");
-
- long long unused = 0;
- ASSERT_EQUALS(2, this->_helper.btree.fullValidate(&opCtx, &unused, true, false, 0));
-
- // The tree has 2 buckets + 1 for the this->_helper.dummyDiskLoc
- ASSERT_EQUALS(3, this->_helper.recordStore.numRecords(NULL));
- ASSERT_EQUALS(0, unused);
-
- const BSONObj k = BSON(""
- << "a");
- ASSERT(this->unindex(k));
-
- unused = 0;
- ASSERT_EQUALS(1, this->_helper.btree.fullValidate(&opCtx, &unused, true, false, 0));
-
- // The tree has 1 bucket + 1 for the this->_helper.dummyDiskLoc
- ASSERT_EQUALS(2, this->_helper.recordStore.numRecords(NULL));
- ASSERT_EQUALS(0, unused);
-
- builder.checkStructure("{b:null}");
- }
-};
-
-template <class OnDiskFormat>
-class DelInternalPromoteKey : public BtreeLogicTestBase<OnDiskFormat> {
-public:
- void run() {
- OperationContextNoop opCtx;
- ArtificialTreeBuilder<OnDiskFormat> builder(&opCtx, &this->_helper);
-
- builder.makeTree("{a:null,y:{d:{c:{b:null}},_:{e:null}},z:null}");
-
- long long unused = 0;
- ASSERT_EQUALS(7, this->_helper.btree.fullValidate(&opCtx, &unused, true, false, 0));
-
- // The tree has 5 buckets + 1 for the this->_helper.dummyDiskLoc
- ASSERT_EQUALS(6, this->_helper.recordStore.numRecords(NULL));
- ASSERT_EQUALS(0, unused);
-
- const BSONObj k = BSON(""
- << "y");
- ASSERT(this->unindex(k));
-
- unused = 0;
- ASSERT_EQUALS(6, this->_helper.btree.fullValidate(&opCtx, &unused, true, false, 0));
-
- // The tree has 3 buckets + 1 for the this->_helper.dummyDiskLoc
- ASSERT_EQUALS(4, this->_helper.recordStore.numRecords(NULL));
- ASSERT_EQUALS(0, unused);
-
- builder.checkStructure("{a:null,e:{c:{b:null},d:null},z:null}");
- }
-};
-
-template <class OnDiskFormat>
-class DelInternalPromoteRightKey : public BtreeLogicTestBase<OnDiskFormat> {
-public:
- void run() {
- OperationContextNoop opCtx;
- ArtificialTreeBuilder<OnDiskFormat> builder(&opCtx, &this->_helper);
-
- builder.makeTree("{a:null,_:{e:{c:null},_:{f:null}}}");
-
- long long unused = 0;
- ASSERT_EQUALS(4, this->_helper.btree.fullValidate(&opCtx, &unused, true, false, 0));
-
- // The tree has 4 buckets + 1 for the this->_helper.dummyDiskLoc
- ASSERT_EQUALS(5, this->_helper.recordStore.numRecords(NULL));
- ASSERT_EQUALS(0, unused);
-
- const BSONObj k = BSON(""
- << "a");
- ASSERT(this->unindex(k));
-
- unused = 0;
- ASSERT_EQUALS(3, this->_helper.btree.fullValidate(&opCtx, &unused, true, false, 0));
-
- // The tree has 2 buckets + 1 for the this->_helper.dummyDiskLoc
- ASSERT_EQUALS(3, this->_helper.recordStore.numRecords(NULL));
- ASSERT_EQUALS(0, unused);
-
- builder.checkStructure("{c:null,_:{e:null,f:null}}");
- }
-};
-
-template <class OnDiskFormat>
-class DelInternalReplacementPrevNonNull : public BtreeLogicTestBase<OnDiskFormat> {
-public:
- void run() {
- OperationContextNoop opCtx;
- ArtificialTreeBuilder<OnDiskFormat> builder(&opCtx, &this->_helper);
-
- builder.makeTree("{a:null,d:{c:{b:null}},e:null}");
-
- long long unused = 0;
- ASSERT_EQUALS(5, this->_helper.btree.fullValidate(&opCtx, &unused, true, false, 0));
-
- // The tree has 3 buckets + 1 for the this->_helper.dummyDiskLoc
- ASSERT_EQUALS(4, this->_helper.recordStore.numRecords(NULL));
- ASSERT_EQUALS(0, unused);
-
- const BSONObj k = BSON(""
- << "d");
- ASSERT(this->unindex(k));
-
- ASSERT_EQUALS(4, this->_helper.btree.fullValidate(&opCtx, &unused, true, false, 0));
-
- // The tree has 3 buckets + 1 for the this->_helper.dummyDiskLoc
- ASSERT_EQUALS(4, this->_helper.recordStore.numRecords(NULL));
- ASSERT_EQUALS(1, unused);
-
- builder.checkStructure("{a:null,d:{c:{b:null}},e:null}");
-
- // Check 'unused' key
- ASSERT(this->getKey(this->_helper.headManager.getHead(&opCtx), 1).recordLoc.getOfs() & 1);
- }
-};
-
-template <class OnDiskFormat>
-class DelInternalReplacementNextNonNull : public BtreeLogicTestBase<OnDiskFormat> {
-public:
- void run() {
- OperationContextNoop opCtx;
- ArtificialTreeBuilder<OnDiskFormat> builder(&opCtx, &this->_helper);
-
- builder.makeTree("{a:null,_:{c:null,_:{d:null}}}");
-
- long long unused = 0;
- ASSERT_EQUALS(3, this->_helper.btree.fullValidate(&opCtx, &unused, true, false, 0));
-
- // The tree has 3 buckets + 1 for the this->_helper.dummyDiskLoc
- ASSERT_EQUALS(4, this->_helper.recordStore.numRecords(NULL));
- ASSERT_EQUALS(0, unused);
-
- const BSONObj k = BSON(""
- << "a");
- ASSERT(this->unindex(k));
-
- ASSERT_EQUALS(2, this->_helper.btree.fullValidate(&opCtx, &unused, true, false, 0));
-
- // The tree has 3 buckets + 1 for the this->_helper.dummyDiskLoc
- ASSERT_EQUALS(4, this->_helper.recordStore.numRecords(NULL));
- ASSERT_EQUALS(1, unused);
-
- builder.checkStructure("{a:null,_:{c:null,_:{d:null}}}");
-
- // Check 'unused' key
- ASSERT(this->getKey(this->_helper.headManager.getHead(&opCtx), 0).recordLoc.getOfs() & 1);
- }
-};
-
-template <class OnDiskFormat>
-class DelInternalSplitPromoteLeft : public BtreeLogicTestBase<OnDiskFormat> {
-public:
- void run() {
- OperationContextNoop opCtx;
- ArtificialTreeBuilder<OnDiskFormat> builder(&opCtx, &this->_helper);
-
- builder.makeTree(
- "{$10:null,$20:null,$30$10:{$25:{$23:null},_:{$27:null}},"
- "$40:null,$50:null,$60:null,$70:null,$80:null,$90:null,$100:null}");
-
- long long unused = 0;
- ASSERT_EQUALS(13, this->_helper.btree.fullValidate(&opCtx, &unused, true, false, 0));
-
- // The tree has 4 buckets + 1 for the this->_helper.dummyDiskLoc
- ASSERT_EQUALS(5, this->_helper.recordStore.numRecords(NULL));
- ASSERT_EQUALS(0, unused);
-
- const BSONObj k = BSON("" << bigNumString(0x30, 0x10));
- ASSERT(this->unindex(k));
-
- ASSERT_EQUALS(12, this->_helper.btree.fullValidate(&opCtx, &unused, true, false, 0));
-
- // The tree has 4 buckets + 1 for the this->_helper.dummyDiskLoc
- ASSERT_EQUALS(5, this->_helper.recordStore.numRecords(NULL));
- ASSERT_EQUALS(0, unused);
-
- builder.checkStructure(
- "{$60:{$10:null,$20:null,"
- "$27:{$23:null,$25:null},$40:null,$50:null},"
- "_:{$70:null,$80:null,$90:null,$100:null}}");
- }
-};
-
-template <class OnDiskFormat>
-class DelInternalSplitPromoteRight : public BtreeLogicTestBase<OnDiskFormat> {
-public:
- void run() {
- OperationContextNoop opCtx;
- ArtificialTreeBuilder<OnDiskFormat> builder(&opCtx, &this->_helper);
-
- builder.makeTree(
- "{$10:null,$20:null,$30:null,$40:null,$50:null,$60:null,$70:null,"
- "$80:null,$90:null,$100$10:{$95:{$93:null},_:{$97:null}}}");
-
- long long unused = 0;
- ASSERT_EQUALS(13, this->_helper.btree.fullValidate(&opCtx, &unused, true, false, 0));
-
- // The tree has 4 buckets + 1 for the this->_helper.dummyDiskLoc
- ASSERT_EQUALS(5, this->_helper.recordStore.numRecords(NULL));
- ASSERT_EQUALS(0, unused);
-
- const BSONObj k = BSON("" << bigNumString(0x100, 0x10));
- ASSERT(this->unindex(k));
-
- ASSERT_EQUALS(12, this->_helper.btree.fullValidate(&opCtx, &unused, true, false, 0));
-
- // The tree has 4 buckets + 1 for the this->_helper.dummyDiskLoc
- ASSERT_EQUALS(5, this->_helper.recordStore.numRecords(NULL));
- ASSERT_EQUALS(0, unused);
-
- builder.checkStructure(
- "{$80:{$10:null,$20:null,$30:null,$40:null,$50:null,$60:null,$70:null},"
- "_:{$90:null,$97:{$93:null,$95:null}}}");
- }
-};
-
-template <class OnDiskFormat>
-class LocateEmptyForward : public BtreeLogicTestBase<OnDiskFormat> {
-public:
- void run() {
- OperationContextNoop opCtx;
- this->_helper.btree.initAsEmpty(&opCtx).transitional_ignore();
-
- BSONObj key1 = simpleKey('a');
- this->insert(key1, this->_helper.dummyDiskLoc).transitional_ignore();
- BSONObj key2 = simpleKey('b');
- this->insert(key2, this->_helper.dummyDiskLoc).transitional_ignore();
- BSONObj key3 = simpleKey('c');
- this->insert(key3, this->_helper.dummyDiskLoc).transitional_ignore();
-
- this->checkValidNumKeys(3);
- this->locate(BSONObj(), 0, false, this->_helper.headManager.getHead(&opCtx), 1);
- }
-};
-
-template <class OnDiskFormat>
-class LocateEmptyReverse : public BtreeLogicTestBase<OnDiskFormat> {
-public:
- void run() {
- OperationContextNoop opCtx;
- this->_helper.btree.initAsEmpty(&opCtx).transitional_ignore();
-
- BSONObj key1 = simpleKey('a');
- this->insert(key1, this->_helper.dummyDiskLoc).transitional_ignore();
- BSONObj key2 = simpleKey('b');
- this->insert(key2, this->_helper.dummyDiskLoc).transitional_ignore();
- BSONObj key3 = simpleKey('c');
- this->insert(key3, this->_helper.dummyDiskLoc).transitional_ignore();
-
- this->checkValidNumKeys(3);
- this->locate(BSONObj(), -1, false, DiskLoc(), -1);
- }
-};
-
-template <class OnDiskFormat>
-class DuplicateKeys : public BtreeLogicTestBase<OnDiskFormat> {
-public:
- void run() {
- OperationContextNoop opCtx;
- this->_helper.btree.initAsEmpty(&opCtx).transitional_ignore();
-
- BSONObj key1 = simpleKey('z');
- ASSERT_OK(this->insert(key1, this->_helper.dummyDiskLoc, true));
- this->checkValidNumKeys(1);
- this->locate(key1, 0, true, this->_helper.headManager.getHead(&opCtx), 1);
-
- // Attempt to insert a dup key/value, which is okay.
- ASSERT_EQUALS(Status::OK(), this->insert(key1, this->_helper.dummyDiskLoc, true));
- this->checkValidNumKeys(1);
- this->locate(key1, 0, true, this->_helper.headManager.getHead(&opCtx), 1);
-
- // Attempt to insert a dup key/value with dupsAllowed=false.
- ASSERT_EQUALS(ErrorCodes::DuplicateKeyValue,
- this->insert(key1, this->_helper.dummyDiskLoc, false));
- this->checkValidNumKeys(1);
- this->locate(key1, 0, true, this->_helper.headManager.getHead(&opCtx), 1);
-
- // Add another record to produce another diskloc.
- StatusWith<RecordId> s =
- this->_helper.recordStore.insertRecord(&opCtx, "a", 1, Timestamp(), false);
-
- ASSERT_TRUE(s.isOK());
- ASSERT_EQUALS(3, this->_helper.recordStore.numRecords(NULL));
-
- const DiskLoc dummyDiskLoc2 = DiskLoc::fromRecordId(s.getValue());
-
- // Attempt to insert a dup key but this time with a different value.
- ASSERT_EQUALS(ErrorCodes::DuplicateKey, this->insert(key1, dummyDiskLoc2, false));
- this->checkValidNumKeys(1);
-
- // Insert a dup key with dupsAllowed=true, should succeed.
- ASSERT_OK(this->insert(key1, dummyDiskLoc2, true));
- this->checkValidNumKeys(2);
-
- // Clean up.
- this->_helper.recordStore.deleteRecord(&opCtx, s.getValue());
- ASSERT_EQUALS(2, this->_helper.recordStore.numRecords(NULL));
- }
-};
-
-
-/* This test requires the entire server to be linked-in and it is better implemented using
- the JS framework. Disabling here and will put in jsCore.
-
-template<class OnDiskFormat>
-class SignedZeroDuplication : public BtreeLogicTestBase<OnDiskFormat> {
-public:
- void run() {
- ASSERT_EQUALS(0.0, -0.0);
- DBDirectClient c;
-
- static const string ns("unittests.SignedZeroDuplication");
-
- c.ensureIndex(ns, BSON("b" << 1), true);
- c.insert(ns, BSON("b" << 0.0));
- c.insert(ns, BSON("b" << 1.0));
- c.update(ns, BSON("b" << 1.0), BSON("b" << -0.0));
-
- ASSERT_EQUALS(1U, c.count(ns, BSON("b" << 0.0)));
- }
-};
-*/
-
-/*
-// QUERY_MIGRATION: port later
- class PackUnused : public Base {
- public:
- void run() {
- for ( long long i = 0; i < 1000000; i += 1000 ) {
- insert( i );
- }
- string orig, after;
- {
- stringstream ss;
- bt()->shape( ss );
- orig = ss.str();
- }
- vector< string > toDel;
- vector< string > other;
- BSONObjBuilder start;
- start.appendMinKey( "a" );
- BSONObjBuilder end;
- end.appendMaxKey( "a" );
- unique_ptr< BtreeCursor > c( BtreeCursor::make( nsdetails( ns() ),
- id(),
- start.done(),
- end.done(),
- false,
- 1 ) );
- while( c->ok() ) {
- bool has_child =
- c->getBucket().btree()->keyNode(c->getKeyOfs()).prevChildBucket.isNull();
-
- if (has_child) {
- toDel.push_back( c->currKey().firstElement().valuestr() );
- }
- else {
- other.push_back( c->currKey().firstElement().valuestr() );
- }
- c->advance();
- }
- ASSERT( toDel.size() > 0 );
- for( vector< string >::const_iterator i = toDel.begin(); i != toDel.end(); ++i ) {
- BSONObj o = BSON( "a" << *i );
- this->unindex( o );
- }
- ASSERT( other.size() > 0 );
- for( vector< string >::const_iterator i = other.begin(); i != other.end(); ++i ) {
- BSONObj o = BSON( "a" << *i );
- this->unindex( o );
- }
-
- long long unused = 0;
- ASSERT_EQUALS( 0, bt()->fullValidate(&opCtx, dl(), order(), &unused, true ) );
-
- for ( long long i = 50000; i < 50100; ++i ) {
- insert( i );
- }
-
- long long unused2 = 0;
- ASSERT_EQUALS( 100, bt()->fullValidate(&opCtx, dl(), order(), &unused2, true ) );
-
-// log() << "old unused: " << unused << ", new unused: " << unused2 << endl;
-//
- ASSERT( unused2 <= unused );
- }
- protected:
- void insert( long long n ) {
- string val = bigNumString( n );
- BSONObj k = BSON( "a" << val );
- Base::insert( k );
- }
- };
-
- class DontDropReferenceKey : public PackUnused {
- public:
- void run() {
- // with 80 root node is full
- for ( long long i = 0; i < 80; i += 1 ) {
- insert( i );
- }
-
- BSONObjBuilder start;
- start.appendMinKey( "a" );
- BSONObjBuilder end;
- end.appendMaxKey( "a" );
- BSONObj l = bt()->keyNode( 0 ).key.toBson();
- string toInsert;
- unique_ptr< BtreeCursor > c( BtreeCursor::make( nsdetails( ns() ),
- id(),
- start.done(),
- end.done(),
- false,
- 1 ) );
- while( c->ok() ) {
- if ( c->currKey().woCompare( l ) > 0 ) {
- toInsert = c->currKey().firstElement().valuestr();
- break;
- }
- c->advance();
- }
- // too much work to try to make this happen through inserts and deletes
- // we are intentionally manipulating the btree bucket directly here
- BtreeBucket::Loc* L = const_cast< BtreeBucket::Loc* >(
- &bt()->keyNode( 1 ).prevChildBucket );
- writing(L)->Null();
- writingInt( const_cast< BtreeBucket::Loc& >(
- bt()->keyNode( 1 ).recordLoc ).GETOFS() ) |= 1; // make unused
- BSONObj k = BSON( "a" << toInsert );
- Base::insert( k );
- }
- };
- */
-
-//
-// TEST SUITE DEFINITION
-//
-
-template <class OnDiskFormat>
-class BtreeLogicTestSuite : public unittest::Suite {
-public:
- BtreeLogicTestSuite(const std::string& name) : Suite(name) {}
-
- void setupTests() {
- add<SimpleCreate<OnDiskFormat>>();
- add<SimpleInsertDelete<OnDiskFormat>>();
- add<SplitRightHeavyBucket<OnDiskFormat>>();
- add<SplitLeftHeavyBucket<OnDiskFormat>>();
- add<MissingLocate<OnDiskFormat>>();
- add<MissingLocateMultiBucket<OnDiskFormat>>();
- add<SERVER983<OnDiskFormat>>();
- add<DontReuseUnused<OnDiskFormat>>();
- add<MergeBucketsLeft<OnDiskFormat>>();
- add<MergeBucketsRight<OnDiskFormat>>();
- add<MergeBucketsDontReplaceHead<OnDiskFormat>>();
- add<MergeBucketsDelInternal<OnDiskFormat>>();
- add<MergeBucketsRightNull<OnDiskFormat>>();
- add<DontMergeSingleBucket<OnDiskFormat>>();
- add<ParentMergeNonRightToLeft<OnDiskFormat>>();
- add<ParentMergeNonRightToRight<OnDiskFormat>>();
- add<CantMergeRightNoMerge<OnDiskFormat>>();
- add<CantMergeLeftNoMerge<OnDiskFormat>>();
- add<MergeOption<OnDiskFormat>>();
- add<ForceMergeLeft<OnDiskFormat>>();
- add<ForceMergeRight<OnDiskFormat>>();
- add<RecursiveMerge<OnDiskFormat>>();
- add<RecursiveMergeRightBucket<OnDiskFormat>>();
- add<RecursiveMergeDoubleRightBucket<OnDiskFormat>>();
-
- add<MergeSizeJustRightRight<OnDiskFormat>>();
- add<MergeSizeJustRightLeft<OnDiskFormat>>();
- add<MergeSizeRight<OnDiskFormat>>();
- add<MergeSizeLeft<OnDiskFormat>>();
- add<NoMergeBelowMarkRight<OnDiskFormat>>();
- add<NoMergeBelowMarkLeft<OnDiskFormat>>();
- add<MergeSizeRightTooBig<OnDiskFormat>>();
- add<MergeSizeLeftTooBig<OnDiskFormat>>();
- add<MergeRightEmpty<OnDiskFormat>>();
- add<MergeMinRightEmpty<OnDiskFormat>>();
- add<MergeLeftEmpty<OnDiskFormat>>();
- add<MergeMinLeftEmpty<OnDiskFormat>>();
- add<BalanceRightEmpty<OnDiskFormat>>();
- add<BalanceLeftEmpty<OnDiskFormat>>();
-
- add<BalanceOneLeftToRight<OnDiskFormat>>();
- add<BalanceOneRightToLeft<OnDiskFormat>>();
- add<BalanceThreeLeftToRight<OnDiskFormat>>();
- add<BalanceThreeRightToLeft<OnDiskFormat>>();
- add<BalanceSingleParentKey<OnDiskFormat>>();
-
- add<PackEmptyBucket<OnDiskFormat>>();
- add<PackedDataSizeEmptyBucket<OnDiskFormat>>();
-
- add<BalanceSingleParentKeyPackParent<OnDiskFormat>>();
- add<BalanceSplitParent<OnDiskFormat>>();
- add<EvenRebalanceLeft<OnDiskFormat>>();
- add<EvenRebalanceLeftCusp<OnDiskFormat>>();
- add<EvenRebalanceRight<OnDiskFormat>>();
- add<EvenRebalanceRightCusp<OnDiskFormat>>();
- add<EvenRebalanceCenter<OnDiskFormat>>();
- add<OddRebalanceLeft<OnDiskFormat>>();
- add<OddRebalanceRight<OnDiskFormat>>();
- add<OddRebalanceCenter<OnDiskFormat>>();
- add<RebalanceEmptyRight<OnDiskFormat>>();
- add<RebalanceEmptyLeft<OnDiskFormat>>();
-
- add<NoMoveAtLowWaterMarkRight<OnDiskFormat>>();
- add<MoveBelowLowWaterMarkRight<OnDiskFormat>>();
- add<NoMoveAtLowWaterMarkLeft<OnDiskFormat>>();
- add<MoveBelowLowWaterMarkLeft<OnDiskFormat>>();
-
- add<PreferBalanceLeft<OnDiskFormat>>();
- add<PreferBalanceRight<OnDiskFormat>>();
- add<RecursiveMergeThenBalance<OnDiskFormat>>();
- add<DelEmptyNoNeighbors<OnDiskFormat>>();
- add<DelEmptyEmptyNeighbors<OnDiskFormat>>();
- add<DelInternal<OnDiskFormat>>();
- add<DelInternalReplaceWithUnused<OnDiskFormat>>();
- add<DelInternalReplaceRight<OnDiskFormat>>();
- add<DelInternalPromoteKey<OnDiskFormat>>();
- add<DelInternalPromoteRightKey<OnDiskFormat>>();
- add<DelInternalReplacementPrevNonNull<OnDiskFormat>>();
- add<DelInternalReplacementNextNonNull<OnDiskFormat>>();
- add<DelInternalSplitPromoteLeft<OnDiskFormat>>();
- add<DelInternalSplitPromoteRight<OnDiskFormat>>();
-
- add<LocateEmptyForward<OnDiskFormat>>();
- add<LocateEmptyReverse<OnDiskFormat>>();
-
- add<DuplicateKeys<OnDiskFormat>>();
- }
-};
-
-// Test suite for both V0 and V1
-static unittest::SuiteInstance<BtreeLogicTestSuite<BtreeLayoutV0>> SUITE_V0("BTreeLogicTests_V0");
-
-static unittest::SuiteInstance<BtreeLogicTestSuite<BtreeLayoutV1>> SUITE_V1("BTreeLogicTests_V1");
-}
diff --git a/src/mongo/db/storage/mmap_v1/btree/btree_ondisk.cpp b/src/mongo/db/storage/mmap_v1/btree/btree_ondisk.cpp
deleted file mode 100644
index 91b7141e7ed..00000000000
--- a/src/mongo/db/storage/mmap_v1/btree/btree_ondisk.cpp
+++ /dev/null
@@ -1,59 +0,0 @@
-/**
- * Copyright (C) 2014 MongoDB Inc.
- *
- * This program is free software: you can redistribute it and/or modify
- * it under the terms of the GNU Affero General Public License, version 3,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU Affero General Public License for more details.
- *
- * You should have received a copy of the GNU Affero General Public License
- * along with this program. If not, see <http://www.gnu.org/licenses/>.
- *
- * As a special exception, the copyright holders give permission to link the
- * code of portions of this program with the OpenSSL library under certain
- * conditions as described in each individual source file and distribute
- * linked combinations including the program with the OpenSSL library. You
- * must comply with the GNU Affero General Public License in all respects for
- * all of the code used other than as permitted herein. If you modify file(s)
- * with this exception, you may extend this exception to your version of the
- * file(s), but you are not obligated to do so. If you do not wish to do so,
- * delete this exception statement from your version. If you delete this
- * exception statement from all source files in the program, then also delete
- * it in the license file.
- */
-
-#define MONGO_LOG_DEFAULT_COMPONENT ::mongo::logger::LogComponent::kStorage
-
-#include "mongo/platform/basic.h"
-
-#include "mongo/db/storage/mmap_v1/btree/btree_ondisk.h"
-
-#include "mongo/util/assert_util.h"
-#include "mongo/util/log.h"
-
-namespace mongo {
-
-void DiskLoc56Bit::operator=(const DiskLoc& loc) {
- ofs = loc.getOfs();
- int la = loc.a();
- if (la == DiskLoc::max().a()) {
- invariant(ofs == DiskLoc::max().getOfs());
- la = OurMaxA;
- }
- invariant(la <= OurMaxA); // must fit in 3 bytes
- if (la < 0) {
- if (la != -1) {
- log() << "btree diskloc isn't negative 1: " << la << std::endl;
- invariant(la == -1);
- }
- la = 0;
- ofs = OurNullOfs;
- }
- memcpy(_a, &la, 3); // endian
-}
-
-} // namespace mongo
diff --git a/src/mongo/db/storage/mmap_v1/btree/btree_ondisk.h b/src/mongo/db/storage/mmap_v1/btree/btree_ondisk.h
deleted file mode 100644
index 9be2f947772..00000000000
--- a/src/mongo/db/storage/mmap_v1/btree/btree_ondisk.h
+++ /dev/null
@@ -1,377 +0,0 @@
-/**
- * Copyright (C) 2014 MongoDB Inc.
- *
- * This program is free software: you can redistribute it and/or modify
- * it under the terms of the GNU Affero General Public License, version 3,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU Affero General Public License for more details.
- *
- * You should have received a copy of the GNU Affero General Public License
- * along with this program. If not, see <http://www.gnu.org/licenses/>.
- *
- * As a special exception, the copyright holders give permission to link the
- * code of portions of this program with the OpenSSL library under certain
- * conditions as described in each individual source file and distribute
- * linked combinations including the program with the OpenSSL library. You
- * must comply with the GNU Affero General Public License in all respects for
- * all of the code used other than as permitted herein. If you modify file(s)
- * with this exception, you may extend this exception to your version of the
- * file(s), but you are not obligated to do so. If you do not wish to do so,
- * delete this exception statement from your version. If you delete this
- * exception statement from all source files in the program, then also delete
- * it in the license file.
- */
-
-#pragma once
-
-#include "mongo/base/static_assert.h"
-#include "mongo/db/jsobj.h"
-#include "mongo/db/storage/mmap_v1/btree/key.h"
-#include "mongo/db/storage/mmap_v1/diskloc.h"
-
-namespace mongo {
-
-const int OldBucketSize = 8192;
-
-//
-// On-disk index format
-//
-
-#pragma pack(1)
-/**
- * This is the fixed width data component for storage of a key within a bucket. It contains an
- * offset pointer to the variable width bson data component. This may be 'unused', please see
- * below.
- *
- * Why is this templated on Loc? Because V0 and V1 have different size DiskLoc(s) but otherwise
- * the same layout.
- */
-template <class LocType>
-struct FixedWidthKey {
- //
- // Data
- //
-
- /**
- * The 'left' child bucket of this key. If this is the i-th key, it points to the i index
- * child bucket.
- */
- LocType prevChildBucket;
-
- /**
- * The location of the record associated with this key.
- */
- LocType recordLoc;
-
- /**
- * Offset within current bucket of the variable width bson key for this _KeyNode.
- */
- unsigned short _kdo;
-
- //
- // Accessors / mutators
- //
-
- short keyDataOfs() const {
- return static_cast<short>(_kdo);
- }
-
- void setKeyDataOfs(short s) {
- _kdo = s;
- invariant(s >= 0);
- }
-
- void setKeyDataOfsSavingUse(short s) {
- // XXX kill this func
- setKeyDataOfs(s);
- }
-
- /**
- * Unused keys are not returned by read operations. Keys may be marked
- * as unused in cases where it is difficult to delete them while
- * maintaining the constraints required of a btree.
- *
- * Setting ofs to odd is the sentinel for unused, as real recordLoc's
- * are always even numbers. Note we need to keep its value basically
- * the same as we use the recordLoc as part of the key in the index
- * (to handle duplicate keys efficiently).
- *
- * Flagging keys as unused is a feature that is being phased out in favor
- * of deleting the keys outright. The current btree implementation is
- * not expected to mark a key as unused in a non legacy btree.
- */
- void setUnused() {
- recordLoc.GETOFS() |= 1;
- }
-
- void setUsed() {
- recordLoc.GETOFS() &= ~1;
- }
-
- int isUnused() const {
- return recordLoc.getOfs() & 1;
- }
-
- int isUsed() const {
- return !isUnused();
- }
-};
-
-/**
- * This structure represents header data for a btree bucket. An object of
- * this type is typically allocated inside of a buffer of size BucketSize,
- * resulting in a full bucket with an appropriate header.
- *
- * The body of a btree bucket contains an array of _KeyNode objects starting
- * from its lowest indexed bytes and growing to higher indexed bytes. The
- * body also contains variable width bson keys, which are allocated from the
- * highest indexed bytes toward lower indexed bytes.
- *
- * |hhhh|kkkkkkk--------bbbbbbbbbbbuuubbbuubbb|
- * h = header data
- * k = KeyNode data
- * - = empty space
- * b = bson key data
- * u = unused (old) bson key data, that may be garbage collected
- */
-struct BtreeBucketV0 {
- /**
- * Parent bucket of this bucket, which isNull() for the root bucket.
- */
- DiskLoc parent;
-
- /**
- * Given that there are n keys, this is the n index child.
- */
- DiskLoc nextChild;
-
- /**
- * Can be reused, value is 8192 in current pdfile version Apr2010
- */
- unsigned short _wasSize;
-
- /**
- * zero
- */
- unsigned short _reserved1;
-
- int flags;
-
- /** basicInsert() assumes the next three members are consecutive and in this order: */
-
- /** Size of the empty region. */
- int emptySize;
-
- /** Size used for bson storage, including storage of old keys. */
- int topSize;
-
- /* Number of keys in the bucket. */
- int n;
-
- int reserved;
-
- /* Beginning of the bucket's body */
- char data[4];
-
- // Precalculated size constants
- enum { HeaderSize = 40 };
-};
-
-// BtreeBucketV0 is part of the on-disk format, so it should never be changed
-MONGO_STATIC_ASSERT(sizeof(BtreeBucketV0) - sizeof(static_cast<BtreeBucketV0*>(NULL)->data) ==
- BtreeBucketV0::HeaderSize);
-
-/**
- * A variant of DiskLoc Used by the V1 bucket type.
- */
-struct DiskLoc56Bit {
- //
- // Data
- //
-
- int ofs;
-
- unsigned char _a[3];
-
- //
- // Accessors XXX rename these, this is terrible
- //
-
- int& GETOFS() {
- return ofs;
- }
-
- int getOfs() const {
- return ofs;
- }
-
- //
- // Comparison
- //
-
- bool isNull() const {
- return ofs < 0;
- }
-
- unsigned long long toLongLong() const {
- // endian
- unsigned long long result = ofs;
- char* cursor = reinterpret_cast<char*>(&result);
- *reinterpret_cast<uint16_t*>(cursor + 4) = *reinterpret_cast<const uint16_t*>(&_a[0]);
- *reinterpret_cast<uint8_t*>(cursor + 6) = *reinterpret_cast<const uint8_t*>(&_a[2]);
- *reinterpret_cast<uint8_t*>(cursor + 7) = uint8_t(0);
- return result;
- }
-
- bool operator<(const DiskLoc56Bit& rhs) const {
- // the orderering of dup keys in btrees isn't too critical, but we'd like to put items
- // that are close together on disk close together in the tree, so we do want the file #
- // to be the most significant bytes
- return toLongLong() < rhs.toLongLong();
- }
-
- int compare(const DiskLoc56Bit& rhs) const {
- unsigned long long a = toLongLong();
- unsigned long long b = rhs.toLongLong();
- if (a < b) {
- return -1;
- } else {
- return a == b ? 0 : 1;
- }
- }
-
- bool operator==(const DiskLoc56Bit& rhs) const {
- return toLongLong() == rhs.toLongLong();
- }
-
- bool operator!=(const DiskLoc56Bit& rhs) const {
- return toLongLong() != rhs.toLongLong();
- }
-
- bool operator==(const DiskLoc& rhs) const {
- return DiskLoc(*this) == rhs;
- }
-
- bool operator!=(const DiskLoc& rhs) const {
- return !(*this == rhs);
- }
-
- //
- // Mutation
- //
-
- enum {
- OurNullOfs = -2, // first bit of offsets used in _KeyNode we don't use -1 here
- OurMaxA = 0xffffff, // highest 3-byte value
- };
-
- void Null() {
- ofs = OurNullOfs;
- _a[0] = _a[1] = _a[2] = 0;
- }
-
- void operator=(const DiskLoc& loc);
-
- //
- // Type Conversion
- //
-
- RecordId toRecordId() const {
- return DiskLoc(*this).toRecordId();
- }
-
- operator DiskLoc() const {
- // endian
- if (isNull())
- return DiskLoc();
- unsigned a = *((unsigned*)(_a - 1));
- return DiskLoc(a >> 8, ofs);
- }
-
- std::string toString() const {
- return DiskLoc(*this).toString();
- }
-};
-
-struct BtreeBucketV1 {
- /** Parent bucket of this bucket, which isNull() for the root bucket. */
- DiskLoc56Bit parent;
-
- /** Given that there are n keys, this is the n index child. */
- DiskLoc56Bit nextChild;
-
- unsigned short flags;
-
- /** Size of the empty region. */
- unsigned short emptySize;
-
- /** Size used for bson storage, including storage of old keys. */
- unsigned short topSize;
-
- /* Number of keys in the bucket. */
- unsigned short n;
-
- /* Beginning of the bucket's body */
- char data[4];
-
- // Precalculated size constants
- enum { HeaderSize = 22 };
-};
-
-// BtreeBucketV1 is part of the on-disk format, so it should never be changed
-MONGO_STATIC_ASSERT(sizeof(BtreeBucketV1) - sizeof(static_cast<BtreeBucketV1*>(NULL)->data) ==
- BtreeBucketV1::HeaderSize);
-
-enum Flags { Packed = 1 };
-
-struct BtreeLayoutV0 {
- typedef FixedWidthKey<DiskLoc> FixedWidthKeyType;
- typedef DiskLoc LocType;
- typedef KeyBson KeyType;
- typedef KeyBson KeyOwnedType;
- typedef BtreeBucketV0 BucketType;
-
- enum { BucketSize = 8192, BucketBodySize = BucketSize - BucketType::HeaderSize };
-
- // largest key size we allow. note we very much need to support bigger keys (somehow) in
- // the future.
-
- static const int KeyMax = OldBucketSize / 10;
-
- // A sentinel value sometimes used to identify a deallocated bucket.
- static const int INVALID_N_SENTINEL = -1;
-
- static void initBucket(BucketType* bucket) {
- bucket->_reserved1 = 0;
- bucket->_wasSize = BucketSize;
- bucket->reserved = 0;
- }
-};
-
-struct BtreeLayoutV1 {
- typedef FixedWidthKey<DiskLoc56Bit> FixedWidthKeyType;
- typedef KeyV1 KeyType;
- typedef KeyV1Owned KeyOwnedType;
- typedef DiskLoc56Bit LocType;
- typedef BtreeBucketV1 BucketType;
-
- enum {
- BucketSize = 8192 - 16, // The -16 is to leave room for the MmapV1RecordHeader header
- BucketBodySize = BucketSize - BucketType::HeaderSize
- };
-
- static const int KeyMax = 1024;
-
- // A sentinel value sometimes used to identify a deallocated bucket.
- static const unsigned short INVALID_N_SENTINEL = 0xffff;
-
- static void initBucket(BucketType* bucket) {}
-};
-
-#pragma pack()
-
-} // namespace mongo
diff --git a/src/mongo/db/storage/mmap_v1/btree/btree_test_help.cpp b/src/mongo/db/storage/mmap_v1/btree/btree_test_help.cpp
deleted file mode 100644
index 6e5bce9b553..00000000000
--- a/src/mongo/db/storage/mmap_v1/btree/btree_test_help.cpp
+++ /dev/null
@@ -1,244 +0,0 @@
-// btree_test_help.cpp : Helper functions for Btree unit-testing
-//
-
-/**
- * Copyright (C) 2014 MongoDB
- *
- * This program is free software: you can redistribute it and/or modify
- * it under the terms of the GNU Affero General Public License, version 3,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU Affero General Public License for more details.
- *
- * You should have received a copy of the GNU Affero General Public License
- * along with this program. If not, see <http://www.gnu.org/licenses/>.
- *
- * As a special exception, the copyright holders give permission to link the
- * code of portions of this program with the OpenSSL library under certain
- * conditions as described in each individual source file and distribute
- * linked combinations including the program with the OpenSSL library. You
- * must comply with the GNU Affero General Public License in all respects
- * for all of the code used other than as permitted herein. If you modify
- * file(s) with this exception, you may extend this exception to your
- * version of the file(s), but you are not obligated to do so. If you do not
- * wish to do so, delete this exception statement from your version. If you
- * delete this exception statement from all source files in the program,
- * then also delete it in the license file.
- */
-
-#include "mongo/db/storage/mmap_v1/btree/btree_test_help.h"
-
-#include "mongo/db/operation_context_noop.h"
-#include "mongo/unittest/unittest.h"
-
-
-namespace mongo {
-
-using std::string;
-
-string bigNumString(long long n, int len) {
- char sub[17];
- sprintf(sub, "%.16llx", n);
- string val(len, ' ');
- for (int i = 0; i < len; ++i) {
- val[i] = sub[i % 16];
- }
- return val;
-}
-
-BSONObj simpleKey(char c, int n) {
- BSONObjBuilder builder;
- string val(n, c);
- builder.append("a", val);
- return builder.obj();
-}
-
-//
-// BtreeLogicTestHelper
-//
-
-template <class OnDiskFormat>
-BtreeLogicTestHelper<OnDiskFormat>::BtreeLogicTestHelper(const BSONObj& order)
- : recordStore("TestRecordStore"),
- btree(&headManager,
- &recordStore,
- &cursorRegistry,
- Ordering::make(order),
- "TestIndex",
- /*isUnique*/ false) {
- static const string randomData("RandomStuff");
-
- // Generate a valid record location for a "fake" record, which we will repeatedly use
- // thoughout the tests.
- OperationContextNoop opCtx;
- StatusWith<RecordId> s = recordStore.insertRecord(
- &opCtx, randomData.c_str(), randomData.length(), Timestamp(), false);
-
- ASSERT_TRUE(s.isOK());
- ASSERT_EQUALS(1, recordStore.numRecords(NULL));
-
- dummyDiskLoc = DiskLoc::fromRecordId(s.getValue());
-}
-
-
-//
-// ArtificialTreeBuilder
-//
-
-template <class OnDiskFormat>
-void ArtificialTreeBuilder<OnDiskFormat>::makeTree(const string& spec) {
- _helper->headManager.setHead(_opCtx, makeTree(fromjson(spec)).toRecordId());
-}
-
-template <class OnDiskFormat>
-DiskLoc ArtificialTreeBuilder<OnDiskFormat>::makeTree(const BSONObj& spec) {
- DiskLoc bucketLoc = _helper->btree._addBucket(_opCtx);
- BucketType* bucket = _helper->btree.getBucket(_opCtx, bucketLoc);
-
- BSONObjIterator i(spec);
- while (i.more()) {
- BSONElement e = i.next();
- DiskLoc child;
- if (e.type() == Object) {
- child = makeTree(e.embeddedObject());
- }
-
- if (e.fieldName() == string("_")) {
- bucket->nextChild = child;
- } else {
- KeyDataOwnedType key(BSON("" << expectedKey(e.fieldName())));
- invariant(_helper->btree.pushBack(bucket, _helper->dummyDiskLoc, key, child));
- }
- }
-
- _helper->btree.fixParentPtrs(_opCtx, bucket, bucketLoc);
- return bucketLoc;
-}
-
-template <class OnDiskFormat>
-void ArtificialTreeBuilder<OnDiskFormat>::checkStructure(const string& spec) const {
- checkStructure(fromjson(spec), DiskLoc::fromRecordId(_helper->headManager.getHead(_opCtx)));
-}
-
-template <class OnDiskFormat>
-void ArtificialTreeBuilder<OnDiskFormat>::push(const DiskLoc bucketLoc,
- const BSONObj& key,
- const DiskLoc child) {
- KeyDataOwnedType k(key);
- BucketType* bucket = _helper->btree.getBucket(_opCtx, bucketLoc);
-
- invariant(_helper->btree.pushBack(bucket, _helper->dummyDiskLoc, k, child));
- _helper->btree.fixParentPtrs(_opCtx, bucket, bucketLoc);
-}
-
-template <class OnDiskFormat>
-void ArtificialTreeBuilder<OnDiskFormat>::checkStructure(const BSONObj& spec,
- const DiskLoc node) const {
- BucketType* bucket = _helper->btree.getBucket(_opCtx, node);
-
- BSONObjIterator j(spec);
- for (int i = 0; i < bucket->n; ++i) {
- ASSERT(j.more());
- BSONElement e = j.next();
- KeyHeaderType kn = BtreeLogic<OnDiskFormat>::getKeyHeader(bucket, i);
- string expected = expectedKey(e.fieldName());
- ASSERT(isPresent(BSON("" << expected), 1));
- ASSERT(isPresent(BSON("" << expected), -1));
-
- // ASSERT_EQUALS(expected, kn.key.toBson().firstElement().valuestr());
- if (kn.prevChildBucket.isNull()) {
- ASSERT(e.type() == jstNULL);
- } else {
- ASSERT(e.type() == Object);
- checkStructure(e.embeddedObject(), kn.prevChildBucket);
- }
- }
- if (bucket->nextChild.isNull()) {
- // maybe should allow '_' field with null value?
- ASSERT(!j.more());
- } else {
- BSONElement e = j.next();
- ASSERT_EQUALS(string("_"), e.fieldName());
- ASSERT(e.type() == Object);
- checkStructure(e.embeddedObject(), bucket->nextChild);
- }
- ASSERT(!j.more());
-}
-
-template <class OnDiskFormat>
-bool ArtificialTreeBuilder<OnDiskFormat>::isPresent(const BSONObj& key, int direction) const {
- int pos;
- DiskLoc loc;
- OperationContextNoop opCtx;
- return _helper->btree.locate(&opCtx, key, _helper->dummyDiskLoc, direction, &pos, &loc);
-}
-
-// Static
-template <class OnDiskFormat>
-string ArtificialTreeBuilder<OnDiskFormat>::expectedKey(const char* spec) {
- if (spec[0] != '$') {
- return spec;
- }
- char* endPtr;
-
- // parsing a long long is a pain, so just allow shorter keys for now
- unsigned long long num = strtol(spec + 1, &endPtr, 16);
- int len = 800;
- if (*endPtr == '$') {
- len = strtol(endPtr + 1, 0, 16);
- }
-
- return bigNumString(num, len);
-}
-
-template <class OnDiskFormat>
-int ArtificialTreeBuilder<OnDiskFormat>::fillBucketToExactSize(const DiskLoc bucketLoc,
- int targetSize,
- char startKey) {
- ASSERT_FALSE(bucketLoc.isNull());
-
- BucketType* bucket = _helper->btree.getBucket(_opCtx, bucketLoc);
- ASSERT_EQUALS(0, bucket->n);
-
- static const int bigSize = KeyDataOwnedType(simpleKey('a', 801)).dataSize();
-
- int size = 0;
- int keyCount = 0;
- while (size < targetSize) {
- int space = targetSize - size;
- int nextSize = space - sizeof(FixedWidthKeyType);
- verify(nextSize > 0);
-
- BSONObj newKey;
- if (nextSize >= bigSize) {
- newKey = simpleKey(startKey++, 801);
- } else {
- newKey = simpleKey(startKey++, nextSize - (bigSize - 801));
- }
-
- push(bucketLoc, newKey, DiskLoc());
-
- size += KeyDataOwnedType(newKey).dataSize() + sizeof(FixedWidthKeyType);
- keyCount += 1;
- }
-
- ASSERT_EQUALS(_helper->btree._packedDataSize(bucket, 0), targetSize);
-
- return keyCount;
-}
-
-//
-// This causes actual code to be generated for the usages of the templates in this file.
-//
-
-// V0 format.
-template struct BtreeLogicTestHelper<BtreeLayoutV0>;
-template class ArtificialTreeBuilder<BtreeLayoutV0>;
-
-// V1 format.
-template struct BtreeLogicTestHelper<BtreeLayoutV1>;
-template class ArtificialTreeBuilder<BtreeLayoutV1>;
-}
diff --git a/src/mongo/db/storage/mmap_v1/btree/btree_test_help.h b/src/mongo/db/storage/mmap_v1/btree/btree_test_help.h
deleted file mode 100644
index c5d48b48b3a..00000000000
--- a/src/mongo/db/storage/mmap_v1/btree/btree_test_help.h
+++ /dev/null
@@ -1,150 +0,0 @@
-/**
- * Copyright (C) 2014 MongoDB Inc.
- *
- * This program is free software: you can redistribute it and/or modify
- * it under the terms of the GNU Affero General Public License, version 3,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU Affero General Public License for more details.
- *
- * You should have received a copy of the GNU Affero General Public License
- * along with this program. If not, see <http://www.gnu.org/licenses/>.
- *
- * As a special exception, the copyright holders give permission to link the
- * code of portions of this program with the OpenSSL library under certain
- * conditions as described in each individual source file and distribute
- * linked combinations including the program with the OpenSSL library. You
- * must comply with the GNU Affero General Public License in all respects for
- * all of the code used other than as permitted herein. If you modify file(s)
- * with this exception, you may extend this exception to your version of the
- * file(s), but you are not obligated to do so. If you do not wish to do so,
- * delete this exception statement from your version. If you delete this
- * exception statement from all source files in the program, then also delete
- * it in the license file.
- */
-
-#pragma once
-
-#include <string>
-
-#include "mongo/db/json.h"
-#include "mongo/db/storage/mmap_v1/btree/btree_logic.h"
-#include "mongo/db/storage/mmap_v1/heap_record_store_btree.h"
-#include "mongo/db/storage/mmap_v1/record_store_v1_test_help.h"
-
-namespace mongo {
-
-/**
- * Generates a string of the specified length containing repeated concatenation of the
- * hexadecimal representation of the input value.
- */
-std::string bigNumString(long long n, int len);
-
-/**
- * Generates key on a field 'a', with the specified number of repetitions of the character.
- */
-BSONObj simpleKey(char c, int n = 1);
-
-/**
- * Simple head manager, which performs no validity checking or persistence.
- */
-class TestHeadManager : public HeadManager {
-public:
- virtual const RecordId getHead(OperationContext* opCtx) const {
- return _head;
- }
-
- virtual void setHead(OperationContext* opCtx, const RecordId newHead) {
- _head = newHead;
- }
-
-private:
- RecordId _head;
-};
-
-
-/**
- * This structure encapsulates a Btree and all the infrastructure needed by it (head manager,
- * record store and a valid disk location to use by the tests).
- */
-template <class OnDiskFormat>
-struct BtreeLogicTestHelper {
- BtreeLogicTestHelper(const BSONObj& order);
-
- // Everything needed for a fully-functional Btree logic
- TestHeadManager headManager;
- HeapRecordStoreBtree recordStore;
- SavedCursorRegistry cursorRegistry;
- BtreeLogic<OnDiskFormat> btree;
- DiskLoc dummyDiskLoc;
-};
-
-
-/**
- * Tool to construct custom tree shapes for tests.
- */
-template <class OnDiskFormat>
-class ArtificialTreeBuilder {
-public:
- typedef typename BtreeLogic<OnDiskFormat>::BucketType BucketType;
- typedef typename BtreeLogic<OnDiskFormat>::KeyDataOwnedType KeyDataOwnedType;
- typedef typename BtreeLogic<OnDiskFormat>::KeyHeaderType KeyHeaderType;
-
- typedef typename OnDiskFormat::FixedWidthKeyType FixedWidthKeyType;
-
- /**
- * The tree builder wraps around the passed-in helper and will invoke methods on it. It
- * does not do any cleanup, so constructing multiple trees over the same helper will
- * cause leaked records.
- */
- ArtificialTreeBuilder(OperationContext* opCtx, BtreeLogicTestHelper<OnDiskFormat>* helper)
- : _opCtx(opCtx), _helper(helper) {}
-
- /**
- * Causes the specified tree shape to be built on the associated helper and the tree's
- * root installed as the head. Uses a custom JSON-based language with the following
- * syntax:
- *
- * Btree := BTreeBucket
- * BtreeBucket := { Child_1_Key: <BtreeBucket | null>,
- * Child_2_Key: <BtreeBucket | null>,
- * ...,
- * _: <BtreeBucket | null> }
- *
- * The _ key name specifies the content of the nextChild pointer. The value null means
- * use a fixed disk loc.
- */
- void makeTree(const std::string& spec);
-
- /**
- * Validates that the structure of the Btree in the helper matches the specification.
- */
- void checkStructure(const std::string& spec) const;
-
- /**
- * Adds the following key to the bucket and fixes up the child pointers.
- */
- void push(const DiskLoc bucketLoc, const BSONObj& key, const DiskLoc child);
-
- /**
- * @return The number of keys inserted.
- */
- int fillBucketToExactSize(const DiskLoc bucketLoc, int targetSize, char startKey);
-
-private:
- DiskLoc makeTree(const BSONObj& spec);
-
- void checkStructure(const BSONObj& spec, const DiskLoc node) const;
-
- bool isPresent(const BSONObj& key, int direction) const;
-
- static std::string expectedKey(const char* spec);
-
- OperationContext* _opCtx;
- BtreeLogicTestHelper<OnDiskFormat>* _helper;
-};
-
-} // namespace mongo
diff --git a/src/mongo/db/storage/mmap_v1/btree/key.cpp b/src/mongo/db/storage/mmap_v1/btree/key.cpp
deleted file mode 100644
index 0c5eacb1998..00000000000
--- a/src/mongo/db/storage/mmap_v1/btree/key.cpp
+++ /dev/null
@@ -1,734 +0,0 @@
-/**
- * Copyright (C) 2011 10gen Inc.
- *
- * This program is free software: you can redistribute it and/or modify
- * it under the terms of the GNU Affero General Public License, version 3,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU Affero General Public License for more details.
- *
- * You should have received a copy of the GNU Affero General Public License
- * along with this program. If not, see <http://www.gnu.org/licenses/>.
- *
- * As a special exception, the copyright holders give permission to link the
- * code of portions of this program with the OpenSSL library under certain
- * conditions as described in each individual source file and distribute
- * linked combinations including the program with the OpenSSL library. You
- * must comply with the GNU Affero General Public License in all respects for
- * all of the code used other than as permitted herein. If you modify file(s)
- * with this exception, you may extend this exception to your version of the
- * file(s), but you are not obligated to do so. If you do not wish to do so,
- * delete this exception statement from your version. If you delete this
- * exception statement from all source files in the program, then also delete
- * it in the license file.
- */
-
-#define MONGO_LOG_DEFAULT_COMPONENT ::mongo::logger::LogComponent::kStorage
-
-#include "mongo/db/storage/mmap_v1/btree/key.h"
-
-#include <cmath>
-
-#include "mongo/base/data_type_endian.h"
-#include "mongo/base/data_view.h"
-#include "mongo/bson/simple_bsonobj_comparator.h"
-#include "mongo/bson/util/builder.h"
-#include "mongo/util/log.h"
-#include "mongo/util/startup_test.h"
-
-
-namespace mongo {
-
-using std::endl;
-using std::numeric_limits;
-using std::min;
-
-extern const Ordering nullOrdering = Ordering::make(BSONObj());
-
-// KeyBson is for V0 (version #0) indexes
-
-int oldCompare(const BSONObj& l, const BSONObj& r, const Ordering& o);
-
-// "old" = pre signed dates & such; i.e. btree V0
-/* must be same canon type when called */
-int oldCompareElementValues(const BSONElement& l, const BSONElement& r) {
- dassert(l.canonicalType() == r.canonicalType());
- int f;
- double x;
-
- switch (l.type()) {
- case EOO:
- case Undefined: // EOO and Undefined are same canonicalType
- case jstNULL:
- case MaxKey:
- case MinKey:
- return 0;
- case Bool:
- return *l.value() - *r.value();
- case bsonTimestamp:
- case Date: {
- const unsigned long long lULL = l.date().toULL();
- const unsigned long long rULL = r.date().toULL();
- // unsigned dates for old version
- if (lULL < rULL)
- return -1;
- return lULL == rULL ? 0 : 1;
- }
- case NumberLong:
- if (r.type() == NumberLong) {
- long long L = l._numberLong();
- long long R = r._numberLong();
- if (L < R)
- return -1;
- if (L == R)
- return 0;
- return 1;
- }
- // else fall through
- case NumberInt:
- case NumberDouble: {
- double left = l.number();
- double right = r.number();
- bool lNan =
- !(left <= numeric_limits<double>::max() && left >= -numeric_limits<double>::max());
- bool rNan = !(right <= numeric_limits<double>::max() &&
- right >= -numeric_limits<double>::max());
- if (lNan) {
- if (rNan) {
- return 0;
- } else {
- return -1;
- }
- } else if (rNan) {
- return 1;
- }
- x = left - right;
- if (x < 0)
- return -1;
- return x == 0 ? 0 : 1;
- }
- case jstOID:
- return memcmp(l.value(), r.value(), OID::kOIDSize);
- case Code:
- case Symbol:
- case String:
- // nulls not allowed in the middle of strings in the old version
- return strcmp(l.valuestr(), r.valuestr());
- case Object:
- case Array:
- return oldCompare(l.embeddedObject(), r.embeddedObject(), nullOrdering);
- case DBRef: {
- int lsz = l.valuesize();
- int rsz = r.valuesize();
- if (lsz - rsz != 0)
- return lsz - rsz;
- return memcmp(l.value(), r.value(), lsz);
- }
- case BinData: {
- int lsz = l.objsize(); // our bin data size in bytes, not including the subtype byte
- int rsz = r.objsize();
- if (lsz - rsz != 0)
- return lsz - rsz;
- return memcmp(l.value() + 4, r.value() + 4, lsz + 1);
- }
- case RegEx: {
- int c = strcmp(l.regex(), r.regex());
- if (c)
- return c;
- return strcmp(l.regexFlags(), r.regexFlags());
- }
- case CodeWScope: {
- f = l.canonicalType() - r.canonicalType();
- if (f)
- return f;
- f = strcmp(l.codeWScopeCode(), r.codeWScopeCode());
- if (f)
- return f;
- f = strcmp(l.codeWScopeScopeDataUnsafe(), r.codeWScopeScopeDataUnsafe());
- if (f)
- return f;
- return 0;
- }
- default:
- log() << "oldCompareElementValues: bad type " << (int)l.type() << endl;
- verify(false);
- }
- return -1;
-}
-
-int oldElemCompare(const BSONElement& l, const BSONElement& r) {
- int lt = (int)l.canonicalType();
- int rt = (int)r.canonicalType();
- int x = lt - rt;
- if (x)
- return x;
- return oldCompareElementValues(l, r);
-}
-
-// pre signed dates & such
-int oldCompare(const BSONObj& l, const BSONObj& r, const Ordering& o) {
- BSONObjIterator i(l);
- BSONObjIterator j(r);
- unsigned mask = 1;
- while (1) {
- // so far, equal...
-
- BSONElement l = i.next();
- BSONElement r = j.next();
- if (l.eoo())
- return r.eoo() ? 0 : -1;
- if (r.eoo())
- return 1;
-
- int x;
- {
- x = oldElemCompare(l, r);
- if (o.descending(mask))
- x = -x;
- }
- if (x != 0)
- return x;
- mask <<= 1;
- }
- return -1;
-}
-
-/* old style compares:
- - dates are unsigned
- - strings no nulls
-*/
-int KeyBson::woCompare(const KeyBson& r, const Ordering& o) const {
- return oldCompare(_o, r._o, o);
-}
-
-// woEqual could be made faster than woCompare but this is for backward compatibility so not worth a
-// big effort
-bool KeyBson::woEqual(const KeyBson& r) const {
- return oldCompare(_o, r._o, nullOrdering) == 0;
-}
-
-// [ ][HASMORE][x][y][canontype_4bits]
-enum CanonicalsEtc {
- cminkey = 1,
- cnull = 2,
- cdouble = 4,
- cstring = 6,
- cbindata = 7,
- coid = 8,
- cfalse = 10,
- ctrue = 11,
- cdate = 12,
- cmaxkey = 14,
- cCANONTYPEMASK = 0xf,
- cY = 0x10,
- cint = cY | cdouble,
- cX = 0x20,
- clong = cX | cdouble,
- cHASMORE = 0x40,
- cNOTUSED = 0x80 // but see IsBSON sentinel - this bit not usable without great care
-};
-
-// bindata bson type
-const unsigned BinDataLenMask = 0xf0; // lengths are powers of 2 of this value
-const unsigned BinDataTypeMask =
- 0x0f; // 0-7 as you would expect, 8-15 are 128+value. see BinDataType.
-const int BinDataLenMax = 32;
-const int BinDataLengthToCode[] = {
- 0x00, 0x10, 0x20, 0x30, 0x40, 0x50, 0x60,
- 0x70, 0x80, -1 /*9*/, 0x90 /*10*/, -1 /*11*/, 0xa0 /*12*/, -1 /*13*/,
- 0xb0 /*14*/, -1 /*15*/, 0xc0 /*16*/, -1, -1, -1, 0xd0 /*20*/,
- -1, -1, -1, 0xe0 /*24*/, -1, -1, -1,
- -1, -1, -1, -1, 0xf0 /*32*/
-};
-const int BinDataCodeToLength[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 10, 12, 14, 16, 20, 24, 32};
-
-int binDataCodeToLength(int codeByte) {
- return BinDataCodeToLength[codeByte >> 4];
-}
-
-/** object cannot be represented in compact format. so store in traditional bson format
- with a leading sentinel byte IsBSON to indicate it's in that format.
-
- Given that the KeyV1Owned constructor already grabbed a bufbuilder, we reuse it here
- so that we don't have to do an extra malloc.
-*/
-void KeyV1Owned::traditional(const BSONObj& obj) {
- b.reset();
- b.appendUChar(IsBSON);
- b.appendBuf(obj.objdata(), obj.objsize());
- _keyData = (const unsigned char*)b.buf();
-}
-
-KeyV1Owned::KeyV1Owned(const KeyV1& rhs) {
- b.appendBuf(rhs.data(), rhs.dataSize());
- _keyData = (const unsigned char*)b.buf();
- dassert(b.len() == dataSize()); // check datasize method is correct
- dassert((*_keyData & cNOTUSED) == 0);
-}
-
-// fromBSON to Key format
-KeyV1Owned::KeyV1Owned(const BSONObj& obj) {
- BSONObjIterator i(obj);
- unsigned char bits = 0;
- while (1) {
- BSONElement e = i.next();
- if (i.more())
- bits |= cHASMORE;
- switch (e.type()) {
- case MinKey:
- b.appendUChar(cminkey | bits);
- break;
- case jstNULL:
- b.appendUChar(cnull | bits);
- break;
- case MaxKey:
- b.appendUChar(cmaxkey | bits);
- break;
- case Bool:
- b.appendUChar((e.boolean() ? ctrue : cfalse) | bits);
- break;
- case jstOID:
- b.appendUChar(coid | bits);
- b.appendBuf(e.__oid().view().view(), OID::kOIDSize);
- break;
- case BinData: {
- int t = e.binDataType();
- // 0-7 and 0x80 to 0x87 are supported by Key
- if ((t & 0x78) == 0 && t != ByteArrayDeprecated) {
- int len;
- const char* d = e.binData(len);
- if (len <= BinDataLenMax) {
- int code = BinDataLengthToCode[len];
- if (code >= 0) {
- if (t >= 128)
- t = (t - 128) | 0x08;
- dassert((code & t) == 0);
- b.appendUChar(cbindata | bits);
- b.appendUChar(code | t);
- b.appendBuf(d, len);
- break;
- }
- }
- }
- traditional(obj);
- return;
- }
- case Date:
- b.appendUChar(cdate | bits);
- b.appendNum(e.date().toMillisSinceEpoch());
- break;
- case String: {
- b.appendUChar(cstring | bits);
- // note we do not store the terminating null, to save space.
- unsigned x = (unsigned)e.valuestrsize() - 1;
- if (x > 255) {
- traditional(obj);
- return;
- }
- b.appendUChar(x);
- b.appendBuf(e.valuestr(), x);
- break;
- }
- case NumberInt:
- b.appendUChar(cint | bits);
- b.appendNum((double)e._numberInt());
- break;
- case NumberLong: {
- long long n = e._numberLong();
- long long m = 2LL << 52;
- DEV {
- long long d = m - 1;
- verify(((long long)((double)-d)) == -d);
- }
- if (n >= m || n <= -m) {
- // can't represent exactly as a double
- traditional(obj);
- return;
- }
- b.appendUChar(clong | bits);
- b.appendNum((double)n);
- break;
- }
- case NumberDouble: {
- double d = e._numberDouble();
- if (std::isnan(d)) {
- traditional(obj);
- return;
- }
- b.appendUChar(cdouble | bits);
- b.appendNum(d);
- break;
- }
- default:
- // if other types involved, store as traditional BSON
- traditional(obj);
- return;
- }
- if (!i.more())
- break;
- bits = 0;
- }
- _keyData = (const unsigned char*)b.buf();
- dassert(b.len() == dataSize()); // check datasize method is correct
- dassert((*_keyData & cNOTUSED) == 0);
-}
-
-BSONObj KeyV1::toBson() const {
- verify(_keyData != 0);
- if (!isCompactFormat())
- return bson();
-
- BSONObjBuilder b(512);
- const unsigned char* p = _keyData;
- while (1) {
- unsigned bits = *p++;
-
- switch (bits & 0x3f) {
- case cminkey:
- b.appendMinKey("");
- break;
- case cnull:
- b.appendNull("");
- break;
- case cfalse:
- b.appendBool("", false);
- break;
- case ctrue:
- b.appendBool("", true);
- break;
- case cmaxkey:
- b.appendMaxKey("");
- break;
- case cstring: {
- unsigned sz = *p++;
- // we build the element ourself as we have to null terminate it
- BufBuilder& bb = b.bb();
- bb.appendNum((char)String);
- bb.appendUChar(0); // fieldname ""
- bb.appendNum(sz + 1);
- bb.appendBuf(p, sz);
- bb.appendUChar(0); // null char at end of string
- p += sz;
- break;
- }
- case coid: {
- OID oid = OID::from(p);
- b.appendOID("", &oid);
- p += OID::kOIDSize;
- break;
- }
- case cbindata: {
- int len = binDataCodeToLength(*p);
- int subtype = (*p) & BinDataTypeMask;
- if (subtype & 0x8) {
- subtype = (subtype & 0x7) | 0x80;
- }
- b.appendBinData("", len, (BinDataType)subtype, ++p);
- p += len;
- break;
- }
- case cdate:
- b.appendDate(
- "",
- Date_t::fromMillisSinceEpoch(ConstDataView(reinterpret_cast<const char*>(p))
- .read<LittleEndian<long long>>()));
- p += 8;
- break;
- case cdouble:
- b.append(
- "",
- ConstDataView(reinterpret_cast<const char*>(p)).read<LittleEndian<double>>());
- p += sizeof(double);
- break;
- case cint:
- b.append("",
- static_cast<int>(ConstDataView(reinterpret_cast<const char*>(p))
- .read<LittleEndian<double>>()));
- p += sizeof(double);
- break;
- case clong:
- b.append("",
- static_cast<long long>(ConstDataView(reinterpret_cast<const char*>(p))
- .read<LittleEndian<double>>()));
- p += sizeof(double);
- break;
- default:
- verify(false);
- }
-
- if ((bits & cHASMORE) == 0)
- break;
- }
- return b.obj();
-}
-
-static int compare(const unsigned char*& l, const unsigned char*& r) {
- int lt = (*l & cCANONTYPEMASK);
- int rt = (*r & cCANONTYPEMASK);
- int x = lt - rt;
- if (x)
- return x;
-
- l++;
- r++;
-
- // same type
- switch (lt) {
- case cdouble: {
- double L = ConstDataView(reinterpret_cast<const char*>(l)).read<LittleEndian<double>>();
- double R = ConstDataView(reinterpret_cast<const char*>(r)).read<LittleEndian<double>>();
- if (L < R)
- return -1;
- if (L != R)
- return 1;
- l += 8;
- r += 8;
- break;
- }
- case cstring: {
- int lsz = *l;
- int rsz = *r;
- int common = min(lsz, rsz);
- l++;
- r++; // skip the size byte
- // use memcmp as we (will) allow zeros in UTF8 strings
- int res = memcmp(l, r, common);
- if (res)
- return res;
- // longer string is the greater one
- int diff = lsz - rsz;
- if (diff)
- return diff;
- l += lsz;
- r += lsz;
- break;
- }
- case cbindata: {
- int L = *l;
- int R = *r;
- int llen = binDataCodeToLength(L);
- int diff = L - R; // checks length and subtype simultaneously
- if (diff) {
- // unfortunately nibbles are backwards to do subtype and len in one check (could bit
- // swap...)
- int rlen = binDataCodeToLength(R);
- if (llen != rlen)
- return llen - rlen;
- return diff;
- }
- // same length, same type
- l++;
- r++;
- int res = memcmp(l, r, llen);
- if (res)
- return res;
- l += llen;
- r += llen;
- break;
- }
- case cdate: {
- long long L =
- ConstDataView(reinterpret_cast<const char*>(l)).read<LittleEndian<long long>>();
- long long R =
- ConstDataView(reinterpret_cast<const char*>(r)).read<LittleEndian<long long>>();
- if (L < R)
- return -1;
- if (L > R)
- return 1;
- l += 8;
- r += 8;
- break;
- }
- case coid: {
- int res = memcmp(l, r, OID::kOIDSize);
- if (res)
- return res;
- l += OID::kOIDSize;
- r += OID::kOIDSize;
- break;
- }
- default:
- // all the others are a match -- e.g. null == null
- ;
- }
-
- return 0;
-}
-
-// at least one of this and right are traditional BSON format
-int NOINLINE_DECL KeyV1::compareHybrid(const KeyV1& right, const Ordering& order) const {
- BSONObj L = toBson();
- BSONObj R = right.toBson();
- return L.woCompare(R, order, /*considerfieldname*/ false);
-}
-
-int KeyV1::woCompare(const KeyV1& right, const Ordering& order) const {
- const unsigned char* l = _keyData;
- const unsigned char* r = right._keyData;
-
- if ((*l | *r) == IsBSON) // only can do this if cNOTUSED maintained
- return compareHybrid(right, order);
-
- unsigned mask = 1;
- while (1) {
- char lval = *l;
- char rval = *r;
- {
- int x = compare(l, r); // updates l and r pointers
- if (x) {
- if (order.descending(mask))
- x = -x;
- return x;
- }
- }
-
- {
- int x = ((int)(lval & cHASMORE)) - ((int)(rval & cHASMORE));
- if (x)
- return x;
- if ((lval & cHASMORE) == 0)
- break;
- }
-
- mask <<= 1;
- }
-
- return 0;
-}
-
-static unsigned sizes[] = {0,
- 1, // cminkey=1,
- 1, // cnull=2,
- 0,
- 9, // cdouble=4,
- 0,
- 0, // cstring=6,
- 0,
- 13, // coid=8,
- 0,
- 1, // cfalse=10,
- 1, // ctrue=11,
- 9, // cdate=12,
- 0,
- 1, // cmaxkey=14,
- 0};
-
-inline unsigned sizeOfElement(const unsigned char* p) {
- unsigned type = *p & cCANONTYPEMASK;
- unsigned sz = sizes[type];
- if (sz == 0) {
- if (type == cstring) {
- sz = ((unsigned)p[1]) + 2;
- } else {
- verify(type == cbindata);
- sz = binDataCodeToLength(p[1]) + 2;
- }
- }
- return sz;
-}
-
-int KeyV1::dataSize() const {
- const unsigned char* p = _keyData;
- if (!isCompactFormat()) {
- return bson().objsize() + 1;
- }
-
- bool more;
- do {
- unsigned z = sizeOfElement(p);
- more = (*p & cHASMORE) != 0;
- p += z;
- } while (more);
- return p - _keyData;
-}
-
-bool KeyV1::woEqual(const KeyV1& right) const {
- const unsigned char* l = _keyData;
- const unsigned char* r = right._keyData;
-
- if ((*l | *r) == IsBSON) {
- return SimpleBSONObjComparator::kInstance.evaluate(toBson() == right.toBson());
- }
-
- while (1) {
- char lval = *l;
- char rval = *r;
- if ((lval & (cCANONTYPEMASK | cHASMORE)) != (rval & (cCANONTYPEMASK | cHASMORE)))
- return false;
- l++;
- r++;
- switch (lval & cCANONTYPEMASK) {
- case coid:
- if (ConstDataView(reinterpret_cast<const char*>(l))
- .read<LittleEndian<uint32_t>>() !=
- ConstDataView(reinterpret_cast<const char*>(r)).read<LittleEndian<uint32_t>>())
- return false;
- l += 4;
- r += 4;
- case cdate:
- if (ConstDataView(reinterpret_cast<const char*>(l))
- .read<LittleEndian<unsigned long long>>() !=
- ConstDataView(reinterpret_cast<const char*>(r))
- .read<LittleEndian<unsigned long long>>())
- return false;
- l += 8;
- r += 8;
- break;
- case cdouble:
- if (ConstDataView(reinterpret_cast<const char*>(l)).read<LittleEndian<double>>() !=
- ConstDataView(reinterpret_cast<const char*>(r)).read<LittleEndian<double>>())
- return false;
- l += 8;
- r += 8;
- break;
- case cstring: {
- if (*l != *r)
- return false; // not same length
- unsigned sz = ((unsigned)*l) + 1;
- if (memcmp(l, r, sz))
- return false;
- l += sz;
- r += sz;
- break;
- }
- case cbindata: {
- if (*l != *r)
- return false; // len or subtype mismatch
- int len = binDataCodeToLength(*l) + 1;
- if (memcmp(l, r, len))
- return false;
- l += len;
- r += len;
- break;
- }
- case cminkey:
- case cnull:
- case cfalse:
- case ctrue:
- case cmaxkey:
- break;
- default:
- verify(false);
- }
- if ((lval & cHASMORE) == 0)
- break;
- }
- return true;
-}
-
-struct CmpUnitTest : public StartupTest {
- void run() {
- char a[2];
- char b[2];
- a[0] = -3;
- a[1] = 0;
- b[0] = 3;
- b[1] = 0;
- verify(strcmp(a, b) > 0 && memcmp(a, b, 2) > 0);
- }
-} cunittest;
-
-} // namespace mongo
diff --git a/src/mongo/db/storage/mmap_v1/btree/key.h b/src/mongo/db/storage/mmap_v1/btree/key.h
deleted file mode 100644
index 906ddcc621b..00000000000
--- a/src/mongo/db/storage/mmap_v1/btree/key.h
+++ /dev/null
@@ -1,167 +0,0 @@
-// @file key.h class(es) representing individual keys in a btree
-
-/**
-* Copyright (C) 2011 10gen Inc.
-*
-* This program is free software: you can redistribute it and/or modify
-* it under the terms of the GNU Affero General Public License, version 3,
-* as published by the Free Software Foundation.
-*
-* This program is distributed in the hope that it will be useful,
-* but WITHOUT ANY WARRANTY; without even the implied warranty of
-* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-* GNU Affero General Public License for more details.
-*
-* You should have received a copy of the GNU Affero General Public License
-* along with this program. If not, see <http://www.gnu.org/licenses/>.
-*
-* As a special exception, the copyright holders give permission to link the
-* code of portions of this program with the OpenSSL library under certain
-* conditions as described in each individual source file and distribute
-* linked combinations including the program with the OpenSSL library. You
-* must comply with the GNU Affero General Public License in all respects for
-* all of the code used other than as permitted herein. If you modify file(s)
-* with this exception, you may extend this exception to your version of the
-* file(s), but you are not obligated to do so. If you do not wish to do so,
-* delete this exception statement from your version. If you delete this
-* exception statement from all source files in the program, then also delete
-* it in the license file.
-*/
-
-#pragma once
-
-#include "mongo/db/jsobj.h"
-#include "mongo/util/debug_util.h"
-
-namespace mongo {
-
-/** Key class for precomputing a small format index key that is denser than a traditional BSONObj.
-
- KeyBson is a legacy wrapper implementation for old BSONObj style keys for v:0 indexes.
-
- KeyV1 is the new implementation.
-*/
-class KeyBson /* "KeyV0" */ {
-public:
- KeyBson() {}
- explicit KeyBson(const char* keyData) : _o(keyData) {}
- explicit KeyBson(const BSONObj& obj) : _o(obj) {}
- int woCompare(const KeyBson& r, const Ordering& o) const;
- BSONObj toBson() const {
- return _o;
- }
- std::string toString() const {
- return _o.toString();
- }
- int dataSize() const {
- return _o.objsize();
- }
- const char* data() const {
- return _o.objdata();
- }
- BSONElement _firstElement() const {
- return _o.firstElement();
- }
- bool isCompactFormat() const {
- return false;
- }
- bool woEqual(const KeyBson& r) const;
- void assign(const KeyBson& rhs) {
- *this = rhs;
- }
- bool isValid() const {
- return true;
- }
-
-private:
- BSONObj _o;
-};
-
-class KeyV1Owned;
-
-// corresponding to BtreeData_V1
-class KeyV1 {
- // disallowed just to make people be careful as we don't own the buffer
- void operator=(const KeyV1&);
- // disallowed as this is not a great idea as KeyV1Owned likely will go out of scope
- KeyV1(const KeyV1Owned&);
-
-public:
- KeyV1() {
- _keyData = 0;
- }
- ~KeyV1() {
- DEV _keyData = (const unsigned char*)1;
- }
-
- KeyV1(const KeyV1& rhs) : _keyData(rhs._keyData) {
- dassert(_keyData > (const unsigned char*)1);
- }
-
- // explicit version of operator= to be safe
- void assign(const KeyV1& rhs) {
- _keyData = rhs._keyData;
- }
-
- /** @param keyData can be a buffer containing data in either BSON format, OR in KeyV1 format.
- when BSON, we are just a wrapper
- */
- explicit KeyV1(const char* keyData) : _keyData((unsigned char*)keyData) {}
-
- int woCompare(const KeyV1& r, const Ordering& o) const;
- bool woEqual(const KeyV1& r) const;
- BSONObj toBson() const;
- std::string toString() const {
- return toBson().toString();
- }
-
- /** get the key data we want to store in the btree bucket */
- const char* data() const {
- return (const char*)_keyData;
- }
-
- /** @return size of data() */
- int dataSize() const;
-
- /** only used by geo, which always has bson keys */
- BSONElement _firstElement() const {
- return bson().firstElement();
- }
- bool isCompactFormat() const {
- return *_keyData != IsBSON;
- }
-
- bool isValid() const {
- return _keyData > (const unsigned char*)1;
- }
-
-protected:
- enum { IsBSON = 0xff };
- const unsigned char* _keyData;
- BSONObj bson() const {
- dassert(!isCompactFormat());
- return BSONObj((const char*)_keyData + 1);
- }
-
-private:
- int compareHybrid(const KeyV1& right, const Ordering& order) const;
-};
-
-class KeyV1Owned : public KeyV1 {
- void operator=(const KeyV1Owned&);
-
-public:
- /** @obj a BSON object to be translated to KeyV1 format. If the object isn't
- representable in KeyV1 format (which happens, intentionally, at times)
- it will stay as bson herein.
- */
- KeyV1Owned(const BSONObj& obj);
-
- /** makes a copy (memcpy's the whole thing) */
- KeyV1Owned(const KeyV1& rhs);
-
-private:
- StackBufBuilder b;
- void traditional(const BSONObj& obj); // store as traditional bson not as compact format
-};
-};
diff --git a/src/mongo/db/storage/mmap_v1/catalog/hashtab.cpp b/src/mongo/db/storage/mmap_v1/catalog/hashtab.cpp
deleted file mode 100644
index df766917fac..00000000000
--- a/src/mongo/db/storage/mmap_v1/catalog/hashtab.cpp
+++ /dev/null
@@ -1,85 +0,0 @@
-/**
- * Copyright (C) 2009-2014 MongoDB Inc.
- *
- * This program is free software: you can redistribute it and/or modify
- * it under the terms of the GNU Affero General Public License, version 3,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU Affero General Public License for more details.
- *
- * You should have received a copy of the GNU Affero General Public License
- * along with this program. If not, see <http://www.gnu.org/licenses/>.
- *
- * As a special exception, the copyright holders give permission to link the
- * code of portions of this program with the OpenSSL library under certain
- * conditions as described in each individual source file and distribute
- * linked combinations including the program with the OpenSSL library. You
- * must comply with the GNU Affero General Public License in all respects for
- * all of the code used other than as permitted herein. If you modify file(s)
- * with this exception, you may extend this exception to your version of the
- * file(s), but you are not obligated to do so. If you do not wish to do so,
- * delete this exception statement from your version. If you delete this
- * exception statement from all source files in the program, then also delete
- * it in the license file.
- */
-
-#define MONGO_LOG_DEFAULT_COMPONENT ::mongo::logger::LogComponent::kIndex
-
-#include "mongo/platform/basic.h"
-
-#include "mongo/db/storage/mmap_v1/catalog/hashtab.h"
-
-#include "mongo/util/log.h"
-
-namespace mongo {
-
-int NamespaceHashTable::_find(const Namespace& k, bool& found) const {
- found = false;
- int h = k.hash();
- int i = h % n;
- int start = i;
- int chain = 0;
- int firstNonUsed = -1;
- while (1) {
- if (!_nodes(i).inUse()) {
- if (firstNonUsed < 0)
- firstNonUsed = i;
- }
-
- if (_nodes(i).hash == h && _nodes(i).key == k) {
- if (chain >= 200)
- log() << "warning: hashtable " << _name << " long chain " << std::endl;
- found = true;
- return i;
- }
- chain++;
- i = (i + 1) % n;
- if (i == start) {
- // shouldn't get here / defensive for infinite loops
- log() << "error: hashtable " << _name << " is full n:" << n << std::endl;
- return -1;
- }
- if (chain >= maxChain) {
- if (firstNonUsed >= 0)
- return firstNonUsed;
- log() << "error: hashtable " << _name << " max chain reached:" << maxChain << std::endl;
- return -1;
- }
- }
-}
-
-/* buf must be all zeroes on initialization. */
-NamespaceHashTable::NamespaceHashTable(void* buf, int buflen, const char* name)
- : _name(name), _buf(buf) {
- n = buflen / sizeof(Node);
- if ((n & 1) == 0) {
- n--;
- }
-
- maxChain = (int)(n * 0.05);
-}
-
-} // namespace mongo
diff --git a/src/mongo/db/storage/mmap_v1/catalog/hashtab.h b/src/mongo/db/storage/mmap_v1/catalog/hashtab.h
deleted file mode 100644
index f873e6a4d3a..00000000000
--- a/src/mongo/db/storage/mmap_v1/catalog/hashtab.h
+++ /dev/null
@@ -1,138 +0,0 @@
-/**
- * Copyright (C) 2009-2014 MongoDB Inc.
- *
- * This program is free software: you can redistribute it and/or modify
- * it under the terms of the GNU Affero General Public License, version 3,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU Affero General Public License for more details.
- *
- * You should have received a copy of the GNU Affero General Public License
- * along with this program. If not, see <http://www.gnu.org/licenses/>.
- *
- * As a special exception, the copyright holders give permission to link the
- * code of portions of this program with the OpenSSL library under certain
- * conditions as described in each individual source file and distribute
- * linked combinations including the program with the OpenSSL library. You
- * must comply with the GNU Affero General Public License in all respects for
- * all of the code used other than as permitted herein. If you modify file(s)
- * with this exception, you may extend this exception to your version of the
- * file(s), but you are not obligated to do so. If you do not wish to do so,
- * delete this exception statement from your version. If you delete this
- * exception statement from all source files in the program, then also delete
- * it in the license file.
- */
-
-#pragma once
-
-#include "mongo/base/static_assert.h"
-#include "mongo/db/operation_context.h"
-#include "mongo/db/storage/mmap_v1/catalog/namespace.h"
-#include "mongo/db/storage/mmap_v1/catalog/namespace_details.h"
-#include "mongo/stdx/functional.h"
-
-namespace mongo {
-
-/**
- * Simple, fixed size hash table used for namespace mapping (effectively the contents of the
- * MMAP V1 .ns file). Uses a contiguous block of memory, so you can put it in a memory mapped
- * file very easily.
- */
-class NamespaceHashTable {
- MONGO_DISALLOW_COPYING(NamespaceHashTable);
-
-public:
- typedef stdx::function<void(const Namespace& k, NamespaceDetails& v)> IteratorCallback;
-
-
- /* buf must be all zeroes on initialization. */
- NamespaceHashTable(void* buf, int buflen, const char* name);
-
- NamespaceDetails* get(const Namespace& k) const {
- bool found;
- int i = _find(k, found);
- if (found) {
- return &_nodes(i).value;
- }
-
- return 0;
- }
-
- void kill(OperationContext* opCtx, const Namespace& k) {
- bool found;
- int i = _find(k, found);
- if (i >= 0 && found) {
- Node* n = &_nodes(i);
- n = opCtx->recoveryUnit()->writing(n);
- n->key.kill();
- n->setUnused();
- }
- }
-
- /** returns false if too full */
- bool put(OperationContext* opCtx, const Namespace& k, const NamespaceDetails& value) {
- bool found;
- int i = _find(k, found);
- if (i < 0)
- return false;
-
- Node* n = opCtx->recoveryUnit()->writing(&_nodes(i));
- if (!found) {
- n->key = k;
- n->hash = k.hash();
- } else {
- invariant(n->hash == k.hash());
- }
-
- n->value = value;
- return true;
- }
-
- void iterAll(IteratorCallback callback) {
- for (int i = 0; i < n; i++) {
- if (_nodes(i).inUse()) {
- callback(_nodes(i).key, _nodes(i).value);
- }
- }
- }
-
-
-private:
-#pragma pack(1)
- struct Node {
- int hash;
- Namespace key;
- NamespaceDetails value;
-
- bool inUse() const {
- return hash != 0;
- }
-
- void setUnused() {
- hash = 0;
- }
- };
-#pragma pack()
-
- MONGO_STATIC_ASSERT(sizeof(Node) == 628);
-
-
- int _find(const Namespace& k, bool& found) const;
-
- Node& _nodes(int i) const {
- Node* nodes = (Node*)_buf;
- return nodes[i];
- }
-
-
- const char* _name;
- void* const _buf;
-
- int n; // number of hashtable buckets
- int maxChain;
-};
-
-} // namespace mongo
diff --git a/src/mongo/db/storage/mmap_v1/catalog/index_details.cpp b/src/mongo/db/storage/mmap_v1/catalog/index_details.cpp
deleted file mode 100644
index fa9093196f8..00000000000
--- a/src/mongo/db/storage/mmap_v1/catalog/index_details.cpp
+++ /dev/null
@@ -1,39 +0,0 @@
-// index_details.cpp
-
-/**
-* Copyright (C) 2008 10gen Inc.
-*
-* This program is free software: you can redistribute it and/or modify
-* it under the terms of the GNU Affero General Public License, version 3,
-* as published by the Free Software Foundation.
-*
-* This program is distributed in the hope that it will be useful,
-* but WITHOUT ANY WARRANTY; without even the implied warranty of
-* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-* GNU Affero General Public License for more details.
-*
-* You should have received a copy of the GNU Affero General Public License
-* along with this program. If not, see <http://www.gnu.org/licenses/>.
-*
-* As a special exception, the copyright holders give permission to link the
-* code of portions of this program with the OpenSSL library under certain
-* conditions as described in each individual source file and distribute
-* linked combinations including the program with the OpenSSL library. You
-* must comply with the GNU Affero General Public License in all respects for
-* all of the code used other than as permitted herein. If you modify file(s)
-* with this exception, you may extend this exception to your version of the
-* file(s), but you are not obligated to do so. If you do not wish to do so,
-* delete this exception statement from your version. If you delete this
-* exception statement from all source files in the program, then also delete
-* it in the license file.
-*/
-
-#include "mongo/db/storage/mmap_v1/catalog/index_details.h"
-
-namespace mongo {
-
-void IndexDetails::_reset() {
- head.setInvalid();
- info.setInvalid();
-}
-}
diff --git a/src/mongo/db/storage/mmap_v1/catalog/index_details.h b/src/mongo/db/storage/mmap_v1/catalog/index_details.h
deleted file mode 100644
index 1ee5387c57c..00000000000
--- a/src/mongo/db/storage/mmap_v1/catalog/index_details.h
+++ /dev/null
@@ -1,70 +0,0 @@
-// index_details.h
-
-/**
-* Copyright (C) 2008 10gen Inc.
-*
-* This program is free software: you can redistribute it and/or modify
-* it under the terms of the GNU Affero General Public License, version 3,
-* as published by the Free Software Foundation.
-*
-* This program is distributed in the hope that it will be useful,
-* but WITHOUT ANY WARRANTY; without even the implied warranty of
-* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-* GNU Affero General Public License for more details.
-*
-* You should have received a copy of the GNU Affero General Public License
-* along with this program. If not, see <http://www.gnu.org/licenses/>.
-*
-* As a special exception, the copyright holders give permission to link the
-* code of portions of this program with the OpenSSL library under certain
-* conditions as described in each individual source file and distribute
-* linked combinations including the program with the OpenSSL library. You
-* must comply with the GNU Affero General Public License in all respects for
-* all of the code used other than as permitted herein. If you modify file(s)
-* with this exception, you may extend this exception to your version of the
-* file(s), but you are not obligated to do so. If you do not wish to do so,
-* delete this exception statement from your version. If you delete this
-* exception statement from all source files in the program, then also delete
-* it in the license file.
-*/
-
-#pragma once
-
-#include "mongo/db/storage/mmap_v1/diskloc.h"
-
-namespace mongo {
-
-/* Details about a particular index. There is one of these effectively for each object in
- system.namespaces (although this also includes the head pointer, which is not in that
- collection).
-
- This is an internal part of the catalog. Nothing outside of the catalog should use this.
-
- ** MemoryMapped in NamespaceDetails ** (i.e., this is on disk data)
- */
-#pragma pack(1)
-struct IndexDetails {
- /**
- * btree head disk location
- */
- DiskLoc head;
-
- /* Location of index info object. Format:
-
- { name:"nameofindex", ns:"parentnsname", key: {keypattobject}
- [, unique: <bool>, background: <bool>, v:<version>]
- }
-
- This object is in the system.indexes collection. Note that since we
- have a pointer to the object here, the object in system.indexes MUST NEVER MOVE.
- */
- DiskLoc info;
-
- /**
- * makes head and info invalid
- */
- void _reset();
-};
-#pragma pack()
-
-} // namespace mongo
diff --git a/src/mongo/db/storage/mmap_v1/catalog/namespace.h b/src/mongo/db/storage/mmap_v1/catalog/namespace.h
deleted file mode 100644
index e7f2ba636a5..00000000000
--- a/src/mongo/db/storage/mmap_v1/catalog/namespace.h
+++ /dev/null
@@ -1,168 +0,0 @@
-/**
- * Copyright (C) 2017 MongoDB Inc.
- *
- * This program is free software: you can redistribute it and/or modify
- * it under the terms of the GNU Affero General Public License, version 3,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU Affero General Public License for more details.
- *
- * You should have received a copy of the GNU Affero General Public License
- * along with this program. If not, see <http://www.gnu.org/licenses/>.
- *
- * As a special exception, the copyright holders give permission to link the
- * code of portions of this program with the OpenSSL library under certain
- * conditions as described in each individual source file and distribute
- * linked combinations including the program with the OpenSSL library. You
- * must comply with the GNU Affero General Public License in all respects for
- * all of the code used other than as permitted herein. If you modify file(s)
- * with this exception, you may extend this exception to your version of the
- * file(s), but you are not obligated to do so. If you do not wish to do so,
- * delete this exception statement from your version. If you delete this
- * exception statement from all source files in the program, then also delete
- * it in the license file.
- */
-
-#pragma once
-
-#include <boost/filesystem/path.hpp>
-#include <cstring>
-#include <string>
-
-#include "mongo/base/string_data.h"
-#include "mongo/db/namespace_string.h"
-#include "mongo/util/assert_util.h"
-#include "mongo/util/mongoutils/str.h"
-
-namespace mongo {
-
-#pragma pack(1)
-/**
- * This is used for storing a namespace on disk in a fixed witdh form and should only be used for
- * that, not for passing internally.
- *
- * If you need to pass internally, please use NamespaceString.
- */
-class Namespace {
-public:
- Namespace(StringData ns) {
- *this = ns;
- }
-
- Namespace& operator=(StringData ns) {
- // We fill the remaining space with all zeroes here. As the full Namespace struct is in the
- // datafiles (the .ns files specifically), that is helpful as then they are deterministic in
- // the bytes they have for a given sequence of operations. This makes testing and debugging
- // the data files easier.
- //
- // If profiling indicates this method is a significant bottleneck, we could have a version
- // we use for reads which does not fill with zeroes, and keep the zeroing behavior on
- // writes.
- memset(buf, 0, sizeof(buf));
- uassert(10080,
- str::stream() << "ns name " << ns << " (size: " << ns.size()
- << ") too long, max size is 127 bytes",
- ns.size() <= MaxNsLen);
- uassert(
- 17380, "ns name can't contain embedded '\0' byte", ns.find('\0') == std::string::npos);
- ns.copyTo(buf, true);
- return *this;
- }
-
- void kill() {
- buf[0] = 0x7f;
- }
-
- bool operator==(const char* r) const {
- return strcmp(buf, r) == 0;
- }
- bool operator==(const Namespace& r) const {
- return strcmp(buf, r.buf) == 0;
- }
- bool operator!=(const char* r) const {
- return strcmp(buf, r) != 0;
- }
- bool operator!=(const Namespace& r) const {
- return strcmp(buf, r.buf) != 0;
- }
-
- bool hasDollarSign() const {
- return strchr(buf, '$') != NULL;
- }
-
- /**
- * Value returned is always > 0
- */
- int hash() const {
- unsigned x = 0;
- const char* p = buf;
- while (*p) {
- x = x * 131 + *p;
- p++;
- }
- return (x & 0x7fffffff) | 0x8000000; // must be > 0
- }
-
- size_t size() const {
- return strlen(buf);
- }
-
- std::string toString() const {
- return buf;
- }
- operator std::string() const {
- return buf;
- }
-
- /**
- * NamespaceDetails::Extra was added after fact to allow chaining of data blocks to support more
- * than 10 indexes (more than 10 IndexDetails). It's a bit hacky because of this late addition
- * with backward file support.
- */
- std::string extraName(int i) const {
- char ex[] = "$extra";
- ex[5] += i;
- const std::string s = std::string(buf) + ex;
- massert(10348, "$extra: ns name too long", s.size() <= MaxNsLen);
- return s;
- }
-
- /**
- * Returns whether the namespace ends with "$extr...". When true it represents an extra block
- * not a normal NamespaceDetails block.
- */
- bool isExtra() const {
- const char* p = strstr(buf, "$extr");
- return p && p[5] &&
- p[6] == 0; //== 0 is important in case an index uses name "$extra_1" for example
- }
-
- enum MaxNsLenValue {
- // Maximum possible length of name any namespace, including special ones like $extra. This
- // includes room for the NUL byte so it can be used when sizing buffers.
- MaxNsLenWithNUL = 128,
-
- // MaxNsLenWithNUL excluding the NUL byte. Use this when comparing std::string lengths.
- MaxNsLen = MaxNsLenWithNUL - 1,
-
- // Maximum allowed length of fully qualified namespace name of any real collection. Does not
- // include NUL so it can be directly compared to std::string lengths.
- MaxNsCollectionLen = MaxNsLen - 7 /*strlen(".$extra")*/,
- };
-
-private:
- char buf[MaxNsLenWithNUL];
-};
-#pragma pack()
-
-namespace {
-MONGO_STATIC_ASSERT(sizeof(Namespace) == 128);
-MONGO_STATIC_ASSERT(Namespace::MaxNsLenWithNUL == MaxDatabaseNameLen);
-MONGO_STATIC_ASSERT((int)Namespace::MaxNsLenWithNUL == (int)NamespaceString::MaxNsLenWithNUL);
-MONGO_STATIC_ASSERT((int)Namespace::MaxNsLen == (int)NamespaceString::MaxNsLen);
-MONGO_STATIC_ASSERT((int)Namespace::MaxNsCollectionLen == (int)NamespaceString::MaxNsCollectionLen);
-} // namespace
-} // namespace mongo
diff --git a/src/mongo/db/storage/mmap_v1/catalog/namespace_details.cpp b/src/mongo/db/storage/mmap_v1/catalog/namespace_details.cpp
deleted file mode 100644
index df75d2ba8aa..00000000000
--- a/src/mongo/db/storage/mmap_v1/catalog/namespace_details.cpp
+++ /dev/null
@@ -1,242 +0,0 @@
-/**
-* Copyright (C) 2008 10gen Inc.
-*
-* This program is free software: you can redistribute it and/or modify
-* it under the terms of the GNU Affero General Public License, version 3,
-* as published by the Free Software Foundation.
-*
-* This program is distributed in the hope that it will be useful,
-* but WITHOUT ANY WARRANTY; without even the implied warranty of
-* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-* GNU Affero General Public License for more details.
-*
-* You should have received a copy of the GNU Affero General Public License
-* along with this program. If not, see <http://www.gnu.org/licenses/>.
-*
-* As a special exception, the copyright holders give permission to link the
-* code of portions of this program with the OpenSSL library under certain
-* conditions as described in each individual source file and distribute
-* linked combinations including the program with the OpenSSL library. You
-* must comply with the GNU Affero General Public License in all respects for
-* all of the code used other than as permitted herein. If you modify file(s)
-* with this exception, you may extend this exception to your version of the
-* file(s), but you are not obligated to do so. If you do not wish to do so,
-* delete this exception statement from your version. If you delete this
-* exception statement from all source files in the program, then also delete
-* it in the license file.
-*/
-
-#include "mongo/platform/basic.h"
-
-#include "mongo/db/storage/mmap_v1/catalog/namespace_details.h"
-
-#include <algorithm>
-#include <list>
-
-#include "mongo/base/counter.h"
-#include "mongo/db/catalog/collection.h"
-#include "mongo/db/catalog/collection_options.h"
-#include "mongo/db/clientcursor.h"
-#include "mongo/db/commands/server_status.h"
-#include "mongo/db/concurrency/locker.h"
-#include "mongo/db/db.h"
-#include "mongo/db/index_legacy.h"
-#include "mongo/db/json.h"
-#include "mongo/db/operation_context.h"
-#include "mongo/db/ops/delete.h"
-#include "mongo/db/ops/update.h"
-#include "mongo/db/storage/mmap_v1/catalog/namespace_index.h"
-#include "mongo/scripting/engine.h"
-#include "mongo/util/startup_test.h"
-
-namespace mongo {
-
-NamespaceDetails::NamespaceDetails(const DiskLoc& loc, bool capped) {
- MONGO_STATIC_ASSERT(sizeof(NamespaceDetails::Extra) <= sizeof(NamespaceDetails));
-
- /* be sure to initialize new fields here -- doesn't default to zeroes the way we use it */
- firstExtent = lastExtent = capExtent = loc;
- stats.datasize = stats.nrecords = 0;
- lastExtentSize = 0;
- nIndexes = 0;
- isCapped = capped;
- maxDocsInCapped = 0x7fffffff; // no limit (value is for pre-v2.3.2 compatibility)
- paddingFactorOldDoNotUse = 1.0;
- systemFlagsOldDoNotUse = 0;
- userFlags = 0;
- capFirstNewRecord = DiskLoc();
- // Signal that we are on first allocation iteration through extents.
- capFirstNewRecord.setInvalid();
- // For capped case, signal that we are doing initial extent allocation.
- if (capped) {
- // WAS: cappedLastDelRecLastExtent().setInvalid();
- deletedListSmall[1].setInvalid();
- }
- verify(sizeof(_dataFileVersion) == 2);
- _dataFileVersion = 0;
- _indexFileVersion = 0;
- multiKeyIndexBits = 0;
- _reservedA = 0;
- _extraOffset = 0;
- indexBuildsInProgress = 0;
- memset(_reserved, 0, sizeof(_reserved));
-}
-
-NamespaceDetails::Extra* NamespaceDetails::allocExtra(OperationContext* opCtx,
- StringData ns,
- NamespaceIndex& ni,
- int nindexessofar) {
- // Namespace details must always be changed under an exclusive DB lock
- const NamespaceString nss(ns);
- invariant(opCtx->lockState()->isDbLockedForMode(nss.db(), MODE_X));
-
- int i = (nindexessofar - NIndexesBase) / NIndexesExtra;
- verify(i >= 0 && i <= 1);
-
- Namespace fullns(ns);
- Namespace extrans(fullns.extraName(i)); // throws AssertionException if ns name too long
-
- massert(10351, "allocExtra: extra already exists", ni.details(extrans) == 0);
-
- Extra temp;
- temp.init();
-
- ni.add_ns(opCtx, extrans, reinterpret_cast<NamespaceDetails*>(&temp));
- Extra* e = reinterpret_cast<NamespaceDetails::Extra*>(ni.details(extrans));
-
- long ofs = e->ofsFrom(this);
- if (i == 0) {
- verify(_extraOffset == 0);
- *opCtx->recoveryUnit()->writing(&_extraOffset) = ofs;
- verify(extra() == e);
- } else {
- Extra* hd = extra();
- verify(hd->next(this) == 0);
- hd->setNext(opCtx, ofs);
- }
- return e;
-}
-
-IndexDetails& NamespaceDetails::idx(int idxNo, bool missingExpected) {
- if (idxNo < NIndexesBase) {
- IndexDetails& id = _indexes[idxNo];
- return id;
- }
- Extra* e = extra();
- if (!e) {
- if (missingExpected)
- uasserted(13283, "Missing Extra");
- massert(14045, "missing Extra", e);
- }
- int i = idxNo - NIndexesBase;
- if (i >= NIndexesExtra) {
- e = e->next(this);
- if (!e) {
- if (missingExpected)
- uasserted(14823, "missing extra");
- massert(14824, "missing Extra", e);
- }
- i -= NIndexesExtra;
- }
- return e->details[i];
-}
-
-
-const IndexDetails& NamespaceDetails::idx(int idxNo, bool missingExpected) const {
- if (idxNo < NIndexesBase) {
- const IndexDetails& id = _indexes[idxNo];
- return id;
- }
- const Extra* e = extra();
- if (!e) {
- if (missingExpected)
- uasserted(17421, "Missing Extra");
- massert(17422, "missing Extra", e);
- }
- int i = idxNo - NIndexesBase;
- if (i >= NIndexesExtra) {
- e = e->next(this);
- if (!e) {
- if (missingExpected)
- uasserted(17423, "missing extra");
- massert(17424, "missing Extra", e);
- }
- i -= NIndexesExtra;
- }
- return e->details[i];
-}
-
-NamespaceDetails::IndexIterator::IndexIterator(const NamespaceDetails* _d,
- bool includeBackgroundInProgress) {
- d = _d;
- i = 0;
- n = d->nIndexes;
- if (includeBackgroundInProgress)
- n += d->indexBuildsInProgress;
-}
-
-// must be called when renaming a NS to fix up extra
-void NamespaceDetails::copyingFrom(OperationContext* opCtx,
- StringData thisns,
- NamespaceIndex& ni,
- NamespaceDetails* src) {
- _extraOffset = 0; // we are a copy -- the old value is wrong. fixing it up below.
- Extra* se = src->extra();
- int n = NIndexesBase;
- if (se) {
- Extra* e = allocExtra(opCtx, thisns, ni, n);
- while (1) {
- n += NIndexesExtra;
- e->copy(this, *se);
- se = se->next(src);
- if (se == 0)
- break;
- Extra* nxt = allocExtra(opCtx, thisns, ni, n);
- e->setNext(opCtx, nxt->ofsFrom(this));
- e = nxt;
- }
- verify(_extraOffset);
- }
-}
-
-NamespaceDetails* NamespaceDetails::writingWithoutExtra(OperationContext* opCtx) {
- return opCtx->recoveryUnit()->writing(this);
-}
-
-
-// XXX - this method should go away
-NamespaceDetails* NamespaceDetails::writingWithExtra(OperationContext* opCtx) {
- for (Extra* e = extra(); e; e = e->next(this)) {
- opCtx->recoveryUnit()->writing(e);
- }
- return writingWithoutExtra(opCtx);
-}
-
-void NamespaceDetails::setMaxCappedDocs(OperationContext* opCtx, long long max) {
- massert(16499,
- "max in a capped collection has to be < 2^31 or -1",
- CollectionOptions::validMaxCappedDocs(&max));
- maxDocsInCapped = max;
-}
-
-/* ------------------------------------------------------------------------- */
-
-
-int NamespaceDetails::_catalogFindIndexByName(OperationContext* opCtx,
- const Collection* coll,
- StringData name,
- bool includeBackgroundInProgress) const {
- IndexIterator i = ii(includeBackgroundInProgress);
- while (i.more()) {
- const BSONObj obj = coll->docFor(opCtx, i.next().info.toRecordId()).value();
- if (name == obj.getStringField("name"))
- return i.pos() - 1;
- }
- return -1;
-}
-
-void NamespaceDetails::Extra::setNext(OperationContext* opCtx, long ofs) {
- *opCtx->recoveryUnit()->writing(&_next) = ofs;
-}
-
-} // namespace mongo
diff --git a/src/mongo/db/storage/mmap_v1/catalog/namespace_details.h b/src/mongo/db/storage/mmap_v1/catalog/namespace_details.h
deleted file mode 100644
index cf82703a25d..00000000000
--- a/src/mongo/db/storage/mmap_v1/catalog/namespace_details.h
+++ /dev/null
@@ -1,254 +0,0 @@
-/**
- * Copyright (C) 2008 10gen Inc.
- *
- * This program is free software: you can redistribute it and/or modify
- * it under the terms of the GNU Affero General Public License, version 3,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU Affero General Public License for more details.
- *
- * You should have received a copy of the GNU Affero General Public License
- * along with this program. If not, see <http://www.gnu.org/licenses/>.
- *
- * As a special exception, the copyright holders give permission to link the
- * code of portions of this program with the OpenSSL library under certain
- * conditions as described in each individual source file and distribute
- * linked combinations including the program with the OpenSSL library. You
- * must comply with the GNU Affero General Public License in all respects for
- * all of the code used other than as permitted herein. If you modify file(s)
- * with this exception, you may extend this exception to your version of the
- * file(s), but you are not obligated to do so. If you do not wish to do so,
- * delete this exception statement from your version. If you delete this
- * exception statement from all source files in the program, then also delete
- * it in the license file.
- */
-
-#pragma once
-
-#include "mongo/base/static_assert.h"
-#include "mongo/db/namespace_string.h"
-#include "mongo/db/storage/mmap_v1/catalog/index_details.h"
-#include "mongo/db/storage/mmap_v1/catalog/namespace.h"
-#include "mongo/db/storage/mmap_v1/diskloc.h"
-
-namespace mongo {
-
-class Collection;
-class NamespaceIndex;
-class OperationContext;
-
-#pragma pack(1)
-/* NamespaceDetails : this is the "header" for a collection that has all its details.
- It's in the .ns file and this is a memory mapped region (thus the pack pragma above).
-*/
-class NamespaceDetails {
-public:
- enum { NIndexesMax = 64, NIndexesExtra = 30, NIndexesBase = 10 };
-
- // deleted lists -- linked lists of deleted records -- are placed in 'buckets' of various
- // sizes so you can look for a deleted record of about the right size. These buckets are
- // split into small and large groups for compatibility with old versions.
- static const int SmallBuckets = 18;
- static const int LargeBuckets = 8;
-
-
- /*-------- data fields, as present on disk : */
-
- DiskLoc firstExtent;
- DiskLoc lastExtent;
-
- /* NOTE: capped collections v1 override the meaning of deletedList.
- deletedList[0] points to a list of free records (DeletedRecord's) for all extents in
- the capped namespace.
- deletedList[1] points to the last record in the prev extent. When the "current extent"
- changes, this value is updated. !deletedList[1].isValid() when this value is not
- yet computed.
- */
- DiskLoc deletedListSmall[SmallBuckets];
- DiskLoc deletedListLegacyGrabBag; // old implementations put records of multiple sizes here.
-
- // ofs 168 (8 byte aligned)
- struct Stats {
- // datasize and nrecords MUST Be adjacent code assumes!
- long long datasize; // this includes padding, but not record headers
- long long nrecords;
- } stats;
-
-
- int lastExtentSize;
-
- int nIndexes;
-
- // ofs 192
- IndexDetails _indexes[NIndexesBase];
-
-public:
- // ofs 352 (16 byte aligned)
- int isCapped; // there is wasted space here if I'm right (ERH)
-
- int maxDocsInCapped; // max # of objects for a capped table, -1 for inf.
-
- double paddingFactorOldDoNotUse;
- // ofs 368 (16)
- int systemFlagsOldDoNotUse; // things that the system sets/cares about
-
- DiskLoc capExtent; // the "current" extent we're writing too for a capped collection
- DiskLoc capFirstNewRecord;
-
- // NamespaceDetails version. So we can do backward compatibility in the future. See filever.h
- unsigned short _dataFileVersion;
- unsigned short _indexFileVersion;
-
- unsigned long long multiKeyIndexBits;
-
- // ofs 400 (16)
- unsigned long long _reservedA;
- long long _extraOffset; // where the $extra info is located (bytes relative to this)
-
-public:
- int indexBuildsInProgress; // Number of indexes currently being built
-
- int userFlags;
-
- DiskLoc deletedListLarge[LargeBuckets];
-
- // Think carefully before using this. We need at least 8 bytes reserved to leave room for a
- // DiskLoc pointing to more data (eg in a dummy MmapV1RecordHeader or Extent). There is still
- // _reservedA above, but these are the final two reserved 8-byte regions.
- char _reserved[8];
- /*-------- end data 496 bytes */
-public:
- explicit NamespaceDetails(const DiskLoc& loc, bool _capped);
-
- class Extra {
- long long _next;
-
- public:
- IndexDetails details[NIndexesExtra];
-
- private:
- unsigned reserved2;
- unsigned reserved3;
- Extra(const Extra&) {
- verify(false);
- }
- Extra& operator=(const Extra& r) {
- verify(false);
- return *this;
- }
-
- public:
- Extra() {}
- long ofsFrom(NamespaceDetails* d) {
- return ((char*)this) - ((char*)d);
- }
- void init() {
- memset(this, 0, sizeof(Extra));
- }
- Extra* next(const NamespaceDetails* d) const {
- if (_next == 0)
- return 0;
- return (Extra*)(((char*)d) + _next);
- }
- void setNext(OperationContext* opCtx, long ofs);
- void copy(NamespaceDetails* d, const Extra& e) {
- memcpy(this, &e, sizeof(Extra));
- _next = 0;
- }
- };
- Extra* extra() const {
- if (_extraOffset == 0)
- return 0;
- return (Extra*)(((char*)this) + _extraOffset);
- }
- /* add extra space for indexes when more than 10 */
- Extra* allocExtra(OperationContext* opCtx,
- StringData ns,
- NamespaceIndex& ni,
- int nindexessofar);
-
- void copyingFrom(OperationContext* opCtx,
- StringData thisns,
- NamespaceIndex& ni,
- NamespaceDetails* src); // must be called when renaming a NS to fix up extra
-
-public:
- void setMaxCappedDocs(OperationContext* opCtx, long long max);
-
- enum UserFlags {
- Flag_UsePowerOf2Sizes = 1 << 0,
- Flag_NoPadding = 1 << 1,
- };
-
- IndexDetails& idx(int idxNo, bool missingExpected = false);
- const IndexDetails& idx(int idxNo, bool missingExpected = false) const;
-
- class IndexIterator {
- public:
- int pos() {
- return i;
- } // note this is the next one to come
- bool more() {
- return i < n;
- }
- const IndexDetails& next() {
- return d->idx(i++);
- }
-
- private:
- friend class NamespaceDetails;
- int i, n;
- const NamespaceDetails* d;
- IndexIterator(const NamespaceDetails* _d, bool includeBackgroundInProgress);
- };
-
- IndexIterator ii(bool includeBackgroundInProgress = false) const {
- return IndexIterator(this, includeBackgroundInProgress);
- }
-
- /**
- * This fetches the IndexDetails for the next empty index slot. The caller must populate
- * returned object. This handles allocating extra index space, if necessary.
- */
- IndexDetails& getNextIndexDetails(OperationContext* opCtx, Collection* collection);
-
- NamespaceDetails* writingWithoutExtra(OperationContext* opCtx);
-
- /** Make all linked Extra objects writeable as well */
- NamespaceDetails* writingWithExtra(OperationContext* opCtx);
-
- /**
- * Returns the offset of the specified index name within the array of indexes. Must be
- * passed-in the owning collection to resolve the index record entries to objects.
- *
- * @return > 0 if index name was found, -1 otherwise.
- */
- int _catalogFindIndexByName(OperationContext* opCtx,
- const Collection* coll,
- StringData name,
- bool includeBackgroundInProgress) const;
-
-private:
- /**
- * swaps all meta data for 2 indexes
- * a and b are 2 index ids, whose contents will be swapped
- * must have a lock on the entire collection to do this
- */
- void swapIndex(OperationContext* opCtx, int a, int b);
-
- friend class IndexCatalog;
- friend class IndexCatalogEntry;
-
- /** Update cappedLastDelRecLastExtent() after capExtent changed in cappedTruncateAfter() */
- void cappedTruncateLastDelUpdate();
- MONGO_STATIC_ASSERT(NIndexesMax <= NIndexesBase + NIndexesExtra * 2);
- MONGO_STATIC_ASSERT(NIndexesMax <= 64); // multiKey bits
- MONGO_STATIC_ASSERT(sizeof(NamespaceDetails::Extra) == 496);
-}; // NamespaceDetails
-MONGO_STATIC_ASSERT(sizeof(NamespaceDetails) == 496);
-#pragma pack()
-
-} // namespace mongo
diff --git a/src/mongo/db/storage/mmap_v1/catalog/namespace_details_collection_entry.cpp b/src/mongo/db/storage/mmap_v1/catalog/namespace_details_collection_entry.cpp
deleted file mode 100644
index a61effdf205..00000000000
--- a/src/mongo/db/storage/mmap_v1/catalog/namespace_details_collection_entry.cpp
+++ /dev/null
@@ -1,488 +0,0 @@
-// namespace_details_collection_entry.h
-
-/**
-* Copyright (C) 2014 MongoDB Inc.
-*
-* This program is free software: you can redistribute it and/or modify
-* it under the terms of the GNU Affero General Public License, version 3,
-* as published by the Free Software Foundation.
-*
-* This program is distributed in the hope that it will be useful,
-* but WITHOUT ANY WARRANTY; without even the implied warranty of
-* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-* GNU Affero General Public License for more details.
-*
-* You should have received a copy of the GNU Affero General Public License
-* along with this program. If not, see <http://www.gnu.org/licenses/>.
-*
-* As a special exception, the copyright holders give permission to link the
-* code of portions of this program with the OpenSSL library under certain
-* conditions as described in each individual source file and distribute
-* linked combinations including the program with the OpenSSL library. You
-* must comply with the GNU Affero General Public License in all respects for
-* all of the code used other than as permitted herein. If you modify file(s)
-* with this exception, you may extend this exception to your version of the
-* file(s), but you are not obligated to do so. If you do not wish to do so,
-* delete this exception statement from your version. If you delete this
-* exception statement from all source files in the program, then also delete
-* it in the license file.
-*/
-
-#define MONGO_LOG_DEFAULT_COMPONENT ::mongo::logger::LogComponent::kStorage
-
-#include "mongo/db/storage/mmap_v1/catalog/namespace_details_collection_entry.h"
-
-#include "mongo/db/catalog/database.h"
-#include "mongo/db/catalog/database_holder.h"
-#include "mongo/db/catalog/uuid_catalog.h"
-#include "mongo/db/index/index_descriptor.h"
-#include "mongo/db/ops/update.h"
-#include "mongo/db/record_id.h"
-#include "mongo/db/storage/mmap_v1/catalog/namespace_details.h"
-#include "mongo/db/storage/mmap_v1/catalog/namespace_details_rsv1_metadata.h"
-#include "mongo/db/storage/mmap_v1/data_file.h"
-#include "mongo/db/storage/mmap_v1/mmap_v1_database_catalog_entry.h"
-#include "mongo/db/storage/record_store.h"
-#include "mongo/util/log.h"
-#include "mongo/util/startup_test.h"
-
-namespace mongo {
-
-using std::string;
-
-NamespaceDetailsCollectionCatalogEntry::NamespaceDetailsCollectionCatalogEntry(
- StringData ns,
- NamespaceDetails* details,
- RecordStore* namespacesRecordStore,
- RecordId namespacesRecordId,
- RecordStore* indexRecordStore,
- MMAPV1DatabaseCatalogEntry* db)
- : CollectionCatalogEntry(ns),
- _details(details),
- _namespacesRecordStore(namespacesRecordStore),
- _indexRecordStore(indexRecordStore),
- _db(db) {
- setNamespacesRecordId(nullptr, namespacesRecordId);
-}
-
-CollectionOptions NamespaceDetailsCollectionCatalogEntry::getCollectionOptions(
- OperationContext* opCtx) const {
- CollectionOptions options = _db->getCollectionOptions(opCtx, _namespacesRecordId);
-
- if (options.flagsSet) {
- if (options.flags != _details->userFlags) {
- warning() << "system.namespaces and NamespaceDetails disagree about userFlags."
- << " system.namespaces: " << options.flags
- << " NamespaceDetails: " << _details->userFlags;
- dassert(options.flags == _details->userFlags);
- }
- }
-
- // Fill in the actual flags from the NamespaceDetails.
- // Leaving flagsSet alone since it indicates whether the user actively set the flags.
- options.flags = _details->userFlags;
-
- return options;
-}
-
-int NamespaceDetailsCollectionCatalogEntry::getTotalIndexCount(OperationContext* opCtx) const {
- return _details->nIndexes + _details->indexBuildsInProgress;
-}
-
-int NamespaceDetailsCollectionCatalogEntry::getCompletedIndexCount(OperationContext* opCtx) const {
- return _details->nIndexes;
-}
-
-int NamespaceDetailsCollectionCatalogEntry::getMaxAllowedIndexes() const {
- return NamespaceDetails::NIndexesMax;
-}
-
-void NamespaceDetailsCollectionCatalogEntry::getAllIndexes(OperationContext* opCtx,
- std::vector<std::string>* names) const {
- NamespaceDetails::IndexIterator i = _details->ii(true);
- while (i.more()) {
- const IndexDetails& id = i.next();
- const BSONObj obj = _indexRecordStore->dataFor(opCtx, id.info.toRecordId()).toBson();
- names->push_back(obj.getStringField("name"));
- }
-}
-
-void NamespaceDetailsCollectionCatalogEntry::getReadyIndexes(
- OperationContext* opCtx, std::vector<std::string>* names) const {
- NamespaceDetails::IndexIterator i = _details->ii(true);
- while (i.more()) {
- const IndexDetails& id = i.next();
- const BSONObj obj = _indexRecordStore->dataFor(opCtx, id.info.toRecordId()).toBson();
- const char* idxName = obj.getStringField("name");
- if (isIndexReady(opCtx, StringData(idxName))) {
- names->push_back(idxName);
- }
- }
-}
-
-bool NamespaceDetailsCollectionCatalogEntry::isIndexMultikey(OperationContext* opCtx,
- StringData idxName,
- MultikeyPaths* multikeyPaths) const {
- // TODO SERVER-22727: Populate 'multikeyPaths' with path components that cause 'idxName' to be
- // multikey.
- int idxNo = _findIndexNumber(opCtx, idxName);
- invariant(idxNo >= 0);
- return isIndexMultikey(idxNo);
-}
-
-bool NamespaceDetailsCollectionCatalogEntry::isIndexMultikey(int idxNo) const {
- return (_details->multiKeyIndexBits & (((unsigned long long)1) << idxNo)) != 0;
-}
-
-bool NamespaceDetailsCollectionCatalogEntry::setIndexIsMultikey(
- OperationContext* opCtx, StringData indexName, const MultikeyPaths& multikeyPaths) {
- // TODO SERVER-22727: Store new path components from 'multikeyPaths' that cause 'indexName' to
- // be multikey.
- int idxNo = _findIndexNumber(opCtx, indexName);
- invariant(idxNo >= 0);
- const bool multikey = true;
- return setIndexIsMultikey(opCtx, idxNo, multikey);
-}
-
-bool NamespaceDetailsCollectionCatalogEntry::setIndexIsMultikey(OperationContext* opCtx,
- int idxNo,
- bool multikey) {
- unsigned long long mask = 1ULL << idxNo;
-
- if (multikey) {
- // Shortcut if the bit is already set correctly
- if (_details->multiKeyIndexBits & mask) {
- return false;
- }
-
- *opCtx->recoveryUnit()->writing(&_details->multiKeyIndexBits) |= mask;
- } else {
- // Shortcut if the bit is already set correctly
- if (!(_details->multiKeyIndexBits & mask)) {
- return false;
- }
-
- // Invert mask: all 1's except a 0 at the ith bit
- mask = ~mask;
- *opCtx->recoveryUnit()->writing(&_details->multiKeyIndexBits) &= mask;
- }
-
- return true;
-}
-
-RecordId NamespaceDetailsCollectionCatalogEntry::getIndexHead(OperationContext* opCtx,
- StringData idxName) const {
- int idxNo = _findIndexNumber(opCtx, idxName);
- invariant(idxNo >= 0);
- return _details->idx(idxNo).head.toRecordId();
-}
-
-BSONObj NamespaceDetailsCollectionCatalogEntry::getIndexSpec(OperationContext* opCtx,
- StringData idxName) const {
- int idxNo = _findIndexNumber(opCtx, idxName);
- invariant(idxNo >= 0);
- const IndexDetails& id = _details->idx(idxNo);
- return _indexRecordStore->dataFor(opCtx, id.info.toRecordId()).toBson();
-}
-
-void NamespaceDetailsCollectionCatalogEntry::setIndexHead(OperationContext* opCtx,
- StringData idxName,
- const RecordId& newHead) {
- int idxNo = _findIndexNumber(opCtx, idxName);
- invariant(idxNo >= 0);
- *opCtx->recoveryUnit()->writing(&_details->idx(idxNo).head) = DiskLoc::fromRecordId(newHead);
-}
-
-bool NamespaceDetailsCollectionCatalogEntry::isIndexReady(OperationContext* opCtx,
- StringData idxName) const {
- int idxNo = _findIndexNumber(opCtx, idxName);
- invariant(idxNo >= 0);
- return idxNo < getCompletedIndexCount(opCtx);
-}
-
-KVPrefix NamespaceDetailsCollectionCatalogEntry::getIndexPrefix(OperationContext* opCtx,
- StringData indexName) const {
- return KVPrefix::kNotPrefixed;
-}
-
-int NamespaceDetailsCollectionCatalogEntry::_findIndexNumber(OperationContext* opCtx,
- StringData idxName) const {
- NamespaceDetails::IndexIterator i = _details->ii(true);
- while (i.more()) {
- const IndexDetails& id = i.next();
- int idxNo = i.pos() - 1;
- const BSONObj obj = _indexRecordStore->dataFor(opCtx, id.info.toRecordId()).toBson();
- if (idxName == obj.getStringField("name"))
- return idxNo;
- }
- return -1;
-}
-
-/* remove bit from a bit array - actually remove its slot, not a clear
- note: this function does not work with x == 63 -- that is ok
- but keep in mind in the future if max indexes were extended to
- exactly 64 it would be a problem
-*/
-unsigned long long removeAndSlideBit(unsigned long long b, int x) {
- unsigned long long tmp = b;
- return (tmp & ((((unsigned long long)1) << x) - 1)) | ((tmp >> (x + 1)) << x);
-}
-
-class IndexUpdateTest : public StartupTest {
-public:
- void run() {
- verify(removeAndSlideBit(1, 0) == 0);
- verify(removeAndSlideBit(2, 0) == 1);
- verify(removeAndSlideBit(2, 1) == 0);
- verify(removeAndSlideBit(255, 1) == 127);
- verify(removeAndSlideBit(21, 2) == 9);
- verify(removeAndSlideBit(0x4000000000000001ULL, 62) == 1);
- }
-} iu_unittest;
-
-Status NamespaceDetailsCollectionCatalogEntry::removeIndex(OperationContext* opCtx,
- StringData indexName) {
- int idxNo = _findIndexNumber(opCtx, indexName);
- if (idxNo < 0)
- return Status(ErrorCodes::NamespaceNotFound, "index not found to remove");
-
- RecordId infoLocation = _details->idx(idxNo).info.toRecordId();
-
- { // sanity check
- BSONObj info = _indexRecordStore->dataFor(opCtx, infoLocation).toBson();
- invariant(info["name"].String() == indexName);
- }
-
- { // drop the namespace
- string indexNamespace = IndexDescriptor::makeIndexNamespace(ns().ns(), indexName);
- Status status = _db->dropCollection(opCtx, indexNamespace);
- if (!status.isOK()) {
- return status;
- }
- }
-
- { // all info in the .ns file
- NamespaceDetails* d = _details->writingWithExtra(opCtx);
-
- // fix the _multiKeyIndexBits, by moving all bits above me down one
- d->multiKeyIndexBits = removeAndSlideBit(d->multiKeyIndexBits, idxNo);
-
- if (idxNo >= d->nIndexes)
- d->indexBuildsInProgress--;
- else
- d->nIndexes--;
-
- for (int i = idxNo; i < getTotalIndexCount(opCtx); i++)
- d->idx(i) = d->idx(i + 1);
-
- d->idx(getTotalIndexCount(opCtx)) = IndexDetails();
- }
-
- // Someone may be querying the system.indexes namespace directly, so we need to invalidate
- // its cursors.
- MMAPV1DatabaseCatalogEntry::invalidateSystemCollectionRecord(
- opCtx, NamespaceString(_db->name(), "system.indexes"), infoLocation);
-
- // remove from system.indexes
- _indexRecordStore->deleteRecord(opCtx, infoLocation);
-
- return Status::OK();
-}
-
-Status NamespaceDetailsCollectionCatalogEntry::prepareForIndexBuild(
- OperationContext* opCtx, const IndexDescriptor* desc, bool isBackgroundSecondaryBuild) {
- BSONObj spec = desc->infoObj();
- // 1) entry in system.indexs
- // TODO SERVER-30638: using timestamp 0 for these inserts.
- StatusWith<RecordId> systemIndexesEntry =
- _indexRecordStore->insertRecord(opCtx, spec.objdata(), spec.objsize(), Timestamp(), false);
- if (!systemIndexesEntry.isOK())
- return systemIndexesEntry.getStatus();
-
- // 2) NamespaceDetails mods
- IndexDetails* id;
- try {
- id = &_details->idx(getTotalIndexCount(opCtx), true);
- } catch (DBException&) {
- _details->allocExtra(opCtx, ns().ns(), _db->_namespaceIndex, getTotalIndexCount(opCtx));
- id = &_details->idx(getTotalIndexCount(opCtx), false);
- }
-
- const DiskLoc infoLoc = DiskLoc::fromRecordId(systemIndexesEntry.getValue());
- *opCtx->recoveryUnit()->writing(&id->info) = infoLoc;
- *opCtx->recoveryUnit()->writing(&id->head) = DiskLoc();
-
- opCtx->recoveryUnit()->writingInt(_details->indexBuildsInProgress) += 1;
-
- // 3) indexes entry in .ns file and system.namespaces
- _db->createNamespaceForIndex(opCtx, desc->indexNamespace());
-
- // TODO SERVER-22727: Create an entry for path-level multikey info when creating the new index.
-
- // Mark the collation feature as in use if the index has a non-simple collation.
- if (spec["collation"]) {
- _db->markCollationFeatureAsInUse(opCtx);
- }
-
- return Status::OK();
-}
-
-void NamespaceDetailsCollectionCatalogEntry::indexBuildSuccess(OperationContext* opCtx,
- StringData indexName) {
- int idxNo = _findIndexNumber(opCtx, indexName);
- fassert(17202, idxNo >= 0);
-
- // Make sure the newly created index is relocated to nIndexes, if it isn't already there
- if (idxNo != getCompletedIndexCount(opCtx)) {
- int toIdxNo = getCompletedIndexCount(opCtx);
-
- //_details->swapIndex( opCtx, idxNo, toIdxNo );
-
- // flip main meta data
- IndexDetails temp = _details->idx(idxNo);
- *opCtx->recoveryUnit()->writing(&_details->idx(idxNo)) = _details->idx(toIdxNo);
- *opCtx->recoveryUnit()->writing(&_details->idx(toIdxNo)) = temp;
-
- // flip multi key bits
- bool tempMultikey = isIndexMultikey(idxNo);
- setIndexIsMultikey(opCtx, idxNo, isIndexMultikey(toIdxNo));
- setIndexIsMultikey(opCtx, toIdxNo, tempMultikey);
-
- idxNo = toIdxNo;
- invariant((idxNo == _findIndexNumber(opCtx, indexName)));
- }
-
- opCtx->recoveryUnit()->writingInt(_details->indexBuildsInProgress) -= 1;
- opCtx->recoveryUnit()->writingInt(_details->nIndexes) += 1;
-
- invariant(isIndexReady(opCtx, indexName));
-}
-
-void NamespaceDetailsCollectionCatalogEntry::updateTTLSetting(OperationContext* opCtx,
- StringData idxName,
- long long newExpireSeconds) {
- int idx = _findIndexNumber(opCtx, idxName);
- invariant(idx >= 0);
-
- IndexDetails& indexDetails = _details->idx(idx);
-
- BSONObj obj = _indexRecordStore->dataFor(opCtx, indexDetails.info.toRecordId()).toBson();
- const BSONElement oldExpireSecs = obj.getField("expireAfterSeconds");
-
- // Important that we set the new value in-place. We are writing directly to the
- // object here so must be careful not to overwrite with a longer numeric type.
-
- char* nonConstPtr = const_cast<char*>(oldExpireSecs.value());
- switch (oldExpireSecs.type()) {
- case EOO:
- massert(16631, "index does not have an 'expireAfterSeconds' field", false);
- break;
- case NumberInt:
- *opCtx->recoveryUnit()->writing(reinterpret_cast<int*>(nonConstPtr)) = newExpireSeconds;
- break;
- case NumberDouble:
- *opCtx->recoveryUnit()->writing(reinterpret_cast<double*>(nonConstPtr)) =
- newExpireSeconds;
- break;
- case NumberLong:
- *opCtx->recoveryUnit()->writing(reinterpret_cast<long long*>(nonConstPtr)) =
- newExpireSeconds;
- break;
- default:
- massert(16632, "current 'expireAfterSeconds' is not a number", false);
- }
-}
-
-void NamespaceDetailsCollectionCatalogEntry::_updateSystemNamespaces(OperationContext* opCtx,
- const BSONObj& update) {
- if (!_namespacesRecordStore)
- return;
-
- RecordData entry = _namespacesRecordStore->dataFor(opCtx, _namespacesRecordId);
- const BSONObj newEntry = applyUpdateOperators(opCtx, entry.releaseToBson(), update);
-
- Status result = _namespacesRecordStore->updateRecord(
- opCtx, _namespacesRecordId, newEntry.objdata(), newEntry.objsize(), false, NULL);
-
- if (ErrorCodes::NeedsDocumentMove == result) {
- // TODO SERVER-30638: using timestamp 0 for these inserts.
- StatusWith<RecordId> newLocation = _namespacesRecordStore->insertRecord(
- opCtx, newEntry.objdata(), newEntry.objsize(), Timestamp(), false);
- fassert(40074, newLocation.getStatus().isOK());
-
- // Invalidate old namespace record
- MMAPV1DatabaseCatalogEntry::invalidateSystemCollectionRecord(
- opCtx, NamespaceString(_db->name(), "system.namespaces"), _namespacesRecordId);
-
- _namespacesRecordStore->deleteRecord(opCtx, _namespacesRecordId);
-
- setNamespacesRecordId(opCtx, newLocation.getValue());
- } else {
- fassert(17486, result.isOK());
- }
-}
-
-void NamespaceDetailsCollectionCatalogEntry::updateFlags(OperationContext* opCtx, int newValue) {
- NamespaceDetailsRSV1MetaData md(ns().ns(), _details);
- md.replaceUserFlags(opCtx, newValue);
- _updateSystemNamespaces(opCtx, BSON("$set" << BSON("options.flags" << newValue)));
-}
-
-bool NamespaceDetailsCollectionCatalogEntry::isEqualToMetadataUUID(OperationContext* opCtx,
- OptionalCollectionUUID uuid) {
- if (ns().coll() == "system.namespaces") {
- return true;
- }
- RecordData namespaceData;
- invariant(_namespacesRecordStore->findRecord(opCtx, _namespacesRecordId, &namespaceData));
-
- auto namespacesBson = namespaceData.releaseToBson();
- if (ns().coll() == "system.indexes") {
- return !uuid && (!namespacesBson["options"].isABSONObj() ||
- namespacesBson["options"].Obj()["uuid"].eoo());
- }
- auto optionsObj = namespacesBson["options"].Obj();
- return !optionsObj["uuid"].eoo() && UUID::parse(optionsObj["uuid"]).getValue() == uuid;
-}
-
-void NamespaceDetailsCollectionCatalogEntry::updateValidator(OperationContext* opCtx,
- const BSONObj& validator,
- StringData validationLevel,
- StringData validationAction) {
- _updateSystemNamespaces(
- opCtx,
- BSON("$set" << BSON("options.validator" << validator << "options.validationLevel"
- << validationLevel
- << "options.validationAction"
- << validationAction)));
-}
-
-void NamespaceDetailsCollectionCatalogEntry::setIsTemp(OperationContext* opCtx, bool isTemp) {
- _updateSystemNamespaces(opCtx, BSON("$set" << BSON("options.temp" << isTemp)));
-}
-
-
-void NamespaceDetailsCollectionCatalogEntry::setNamespacesRecordId(OperationContext* opCtx,
- RecordId newId) {
- if (newId.isNull()) {
- invariant(ns().coll() == "system.namespaces" || ns().coll() == "system.indexes");
- } else {
- // 'opCtx' is allowed to be null, but we don't need an OperationContext in MMAP, so that's
- // OK.
- auto namespaceEntry = _namespacesRecordStore->dataFor(opCtx, newId).releaseToBson();
- invariant(namespaceEntry["name"].String() == ns().ns());
-
- // Register RecordId change for rollback if we're not initializing.
- if (opCtx && !_namespacesRecordId.isNull()) {
- auto oldNamespacesRecordId = _namespacesRecordId;
- opCtx->recoveryUnit()->onRollback([=] { _namespacesRecordId = oldNamespacesRecordId; });
- }
- _namespacesRecordId = newId;
- }
-}
-
-void NamespaceDetailsCollectionCatalogEntry::updateCappedSize(OperationContext* opCtx,
- long long size) {
- MONGO_UNREACHABLE;
-}
-}
diff --git a/src/mongo/db/storage/mmap_v1/catalog/namespace_details_collection_entry.h b/src/mongo/db/storage/mmap_v1/catalog/namespace_details_collection_entry.h
deleted file mode 100644
index 3c349a890cc..00000000000
--- a/src/mongo/db/storage/mmap_v1/catalog/namespace_details_collection_entry.h
+++ /dev/null
@@ -1,147 +0,0 @@
-// namespace_details_collection_entry.h
-
-#pragma once
-
-/**
-* Copyright (C) 2014 MongoDB Inc.
-*
-* This program is free software: you can redistribute it and/or modify
-* it under the terms of the GNU Affero General Public License, version 3,
-* as published by the Free Software Foundation.
-*
-* This program is distributed in the hope that it will be useful,
-* but WITHOUT ANY WARRANTY; without even the implied warranty of
-* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-* GNU Affero General Public License for more details.
-*
-* You should have received a copy of the GNU Affero General Public License
-* along with this program. If not, see <http://www.gnu.org/licenses/>.
-*
-* As a special exception, the copyright holders give permission to link the
-* code of portions of this program with the OpenSSL library under certain
-* conditions as described in each individual source file and distribute
-* linked combinations including the program with the OpenSSL library. You
-* must comply with the GNU Affero General Public License in all respects for
-* all of the code used other than as permitted herein. If you modify file(s)
-* with this exception, you may extend this exception to your version of the
-* file(s), but you are not obligated to do so. If you do not wish to do so,
-* delete this exception statement from your version. If you delete this
-* exception statement from all source files in the program, then also delete
-* it in the license file.
-*/
-
-#include "mongo/base/string_data.h"
-#include "mongo/bson/bsonobj.h"
-#include "mongo/db/catalog/collection_catalog_entry.h"
-#include "mongo/db/server_options.h"
-#include "mongo/db/storage/mmap_v1/diskloc.h"
-
-namespace mongo {
-
-class NamespaceDetails;
-
-class MMAPV1DatabaseCatalogEntry;
-;
-class RecordStore;
-class OperationContext;
-
-class NamespaceDetailsCollectionCatalogEntry : public CollectionCatalogEntry {
-public:
- NamespaceDetailsCollectionCatalogEntry(StringData ns,
- NamespaceDetails* details,
- RecordStore* namespacesRecordStore,
- RecordId namespacesRecordId,
- RecordStore* indexRecordStore,
- MMAPV1DatabaseCatalogEntry* db);
-
- ~NamespaceDetailsCollectionCatalogEntry() {}
-
- CollectionOptions getCollectionOptions(OperationContext* opCtx) const final;
-
- int getTotalIndexCount(OperationContext* opCtx) const final;
-
- int getCompletedIndexCount(OperationContext* opCtx) const final;
-
- int getMaxAllowedIndexes() const final;
-
- void getAllIndexes(OperationContext* opCtx, std::vector<std::string>* names) const final;
-
- void getReadyIndexes(OperationContext* opCtx, std::vector<std::string>* names) const final;
-
- BSONObj getIndexSpec(OperationContext* opCtx, StringData idxName) const final;
-
- bool isIndexMultikey(OperationContext* opCtx,
- StringData indexName,
- MultikeyPaths* multikeyPaths) const final;
- bool isIndexMultikey(int idxNo) const;
-
- bool setIndexIsMultikey(OperationContext* opCtx, int idxNo, bool multikey = true);
- bool setIndexIsMultikey(OperationContext* opCtx,
- StringData indexName,
- const MultikeyPaths& multikeyPaths) final;
-
- RecordId getIndexHead(OperationContext* opCtx, StringData indexName) const final;
-
- void setIndexHead(OperationContext* opCtx, StringData indexName, const RecordId& newHead) final;
-
- bool isIndexReady(OperationContext* opCtx, StringData indexName) const final;
-
- KVPrefix getIndexPrefix(OperationContext* opCtx, StringData indexName) const final;
-
- Status removeIndex(OperationContext* opCtx, StringData indexName) final;
-
- Status prepareForIndexBuild(OperationContext* opCtx,
- const IndexDescriptor* spec,
- bool isBackgroundSecondaryBuild) final;
-
- void indexBuildSuccess(OperationContext* opCtx, StringData indexName) final;
-
- void updateTTLSetting(OperationContext* opCtx,
- StringData idxName,
- long long newExpireSeconds) final;
-
- void updateFlags(OperationContext* opCtx, int newValue) final;
-
- bool isEqualToMetadataUUID(OperationContext* opCtx, OptionalCollectionUUID uuid);
-
- void updateValidator(OperationContext* opCtx,
- const BSONObj& validator,
- StringData validationLevel,
- StringData validationAction) final;
-
- void setIsTemp(OperationContext* opCtx, bool isTemp) final;
-
- void updateCappedSize(OperationContext* opCtx, long long size) final;
-
- // not part of interface, but available to my storage engine
-
- int _findIndexNumber(OperationContext* opCtx, StringData indexName) const;
-
- RecordId getNamespacesRecordId() {
- return _namespacesRecordId;
- }
-
- /**
- * 'opCtx' is only allowed to be null when called from the constructor.
- */
- void setNamespacesRecordId(OperationContext* opCtx, RecordId newId);
-
-private:
- NamespaceDetails* _details;
- RecordStore* _namespacesRecordStore;
-
- // Where this entry lives in the _namespacesRecordStore.
- RecordId _namespacesRecordId;
-
- RecordStore* _indexRecordStore;
- MMAPV1DatabaseCatalogEntry* _db;
-
- /**
- * Updates the entry for this namespace in '_namespacesRecordStore', updating
- * '_namespacesRecordId' if necessary.
- */
- void _updateSystemNamespaces(OperationContext* opCtx, const BSONObj& update);
-
- friend class MMAPV1DatabaseCatalogEntry;
-};
-}
diff --git a/src/mongo/db/storage/mmap_v1/catalog/namespace_details_rsv1_metadata.cpp b/src/mongo/db/storage/mmap_v1/catalog/namespace_details_rsv1_metadata.cpp
deleted file mode 100644
index 7d5f1805d68..00000000000
--- a/src/mongo/db/storage/mmap_v1/catalog/namespace_details_rsv1_metadata.cpp
+++ /dev/null
@@ -1,200 +0,0 @@
-// namespace_details_rsv1_metadata.cpp
-
-/**
- * Copyright (C) 2014 MongoDB Inc.
- *
- * This program is free software: you can redistribute it and/or modify
- * it under the terms of the GNU Affero General Public License, version 3,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU Affero General Public License for more details.
- *
- * You should have received a copy of the GNU Affero General Public License
- * along with this program. If not, see <http://www.gnu.org/licenses/>.
- *
- * As a special exception, the copyright holders give permission to link the
- * code of portions of this program with the OpenSSL library under certain
- * conditions as described in each individual source file and distribute
- * linked combinations including the program with the OpenSSL library. You
- * must comply with the GNU Affero General Public License in all respects for
- * all of the code used other than as permitted herein. If you modify file(s)
- * with this exception, you may extend this exception to your version of the
- * file(s), but you are not obligated to do so. If you do not wish to do so,
- * delete this exception statement from your version. If you delete this
- * exception statement from all source files in the program, then also delete
- * it in the license file.
- */
-
-#include "mongo/db/storage/mmap_v1/catalog/namespace_details_rsv1_metadata.h"
-
-#include "mongo/base/static_assert.h"
-#include "mongo/db/operation_context.h"
-
-namespace mongo {
-
-using std::unique_ptr;
-using std::numeric_limits;
-
-MONGO_STATIC_ASSERT(RecordStoreV1Base::Buckets ==
- NamespaceDetails::SmallBuckets + NamespaceDetails::LargeBuckets);
-
-NamespaceDetailsRSV1MetaData::NamespaceDetailsRSV1MetaData(StringData ns, NamespaceDetails* details)
- : _ns(ns.toString()), _details(details) {}
-
-const DiskLoc& NamespaceDetailsRSV1MetaData::capExtent() const {
- return _details->capExtent;
-}
-
-void NamespaceDetailsRSV1MetaData::setCapExtent(OperationContext* opCtx, const DiskLoc& loc) {
- *opCtx->recoveryUnit()->writing(&_details->capExtent) = loc;
-}
-
-const DiskLoc& NamespaceDetailsRSV1MetaData::capFirstNewRecord() const {
- return _details->capFirstNewRecord;
-}
-
-void NamespaceDetailsRSV1MetaData::setCapFirstNewRecord(OperationContext* opCtx,
- const DiskLoc& loc) {
- *opCtx->recoveryUnit()->writing(&_details->capFirstNewRecord) = loc;
-}
-
-bool NamespaceDetailsRSV1MetaData::capLooped() const {
- return _details->capFirstNewRecord.isValid();
-}
-
-long long NamespaceDetailsRSV1MetaData::dataSize() const {
- return _details->stats.datasize;
-}
-long long NamespaceDetailsRSV1MetaData::numRecords() const {
- return _details->stats.nrecords;
-}
-
-void NamespaceDetailsRSV1MetaData::incrementStats(OperationContext* opCtx,
- long long dataSizeIncrement,
- long long numRecordsIncrement) {
- // durability todo : this could be a bit annoying / slow to record constantly
- NamespaceDetails::Stats* s = opCtx->recoveryUnit()->writing(&_details->stats);
- s->datasize += dataSizeIncrement;
- s->nrecords += numRecordsIncrement;
-}
-
-void NamespaceDetailsRSV1MetaData::setStats(OperationContext* opCtx,
- long long dataSize,
- long long numRecords) {
- NamespaceDetails::Stats* s = opCtx->recoveryUnit()->writing(&_details->stats);
- s->datasize = dataSize;
- s->nrecords = numRecords;
-}
-
-DiskLoc NamespaceDetailsRSV1MetaData::deletedListEntry(int bucket) const {
- invariant(bucket >= 0 && bucket < RecordStoreV1Base::Buckets);
- const DiskLoc head = (bucket < NamespaceDetails::SmallBuckets)
- ? _details->deletedListSmall[bucket]
- : _details->deletedListLarge[bucket - NamespaceDetails::SmallBuckets];
-
- if (head == DiskLoc(0, 0)) {
- // This will happen the first time we use a "large" bucket since they were previously
- // zero-initialized.
- return DiskLoc();
- }
-
- return head;
-}
-
-void NamespaceDetailsRSV1MetaData::setDeletedListEntry(OperationContext* opCtx,
- int bucket,
- const DiskLoc& loc) {
- DiskLoc* head = (bucket < NamespaceDetails::SmallBuckets)
- ? &_details->deletedListSmall[bucket]
- : &_details->deletedListLarge[bucket - NamespaceDetails::SmallBuckets];
- *opCtx->recoveryUnit()->writing(head) = loc;
-}
-
-DiskLoc NamespaceDetailsRSV1MetaData::deletedListLegacyGrabBag() const {
- return _details->deletedListLegacyGrabBag;
-}
-
-void NamespaceDetailsRSV1MetaData::setDeletedListLegacyGrabBag(OperationContext* opCtx,
- const DiskLoc& loc) {
- *opCtx->recoveryUnit()->writing(&_details->deletedListLegacyGrabBag) = loc;
-}
-
-void NamespaceDetailsRSV1MetaData::orphanDeletedList(OperationContext* opCtx) {
- for (int i = 0; i < RecordStoreV1Base::Buckets; i++) {
- setDeletedListEntry(opCtx, i, DiskLoc());
- }
- setDeletedListLegacyGrabBag(opCtx, DiskLoc());
-}
-
-const DiskLoc& NamespaceDetailsRSV1MetaData::firstExtent(OperationContext* opCtx) const {
- return _details->firstExtent;
-}
-
-void NamespaceDetailsRSV1MetaData::setFirstExtent(OperationContext* opCtx, const DiskLoc& loc) {
- *opCtx->recoveryUnit()->writing(&_details->firstExtent) = loc;
-}
-
-const DiskLoc& NamespaceDetailsRSV1MetaData::lastExtent(OperationContext* opCtx) const {
- return _details->lastExtent;
-}
-
-void NamespaceDetailsRSV1MetaData::setLastExtent(OperationContext* opCtx, const DiskLoc& loc) {
- *opCtx->recoveryUnit()->writing(&_details->lastExtent) = loc;
-}
-
-bool NamespaceDetailsRSV1MetaData::isCapped() const {
- return _details->isCapped;
-}
-
-bool NamespaceDetailsRSV1MetaData::isUserFlagSet(int flag) const {
- return _details->userFlags & flag;
-}
-
-int NamespaceDetailsRSV1MetaData::userFlags() const {
- return _details->userFlags;
-}
-
-bool NamespaceDetailsRSV1MetaData::setUserFlag(OperationContext* opCtx, int flag) {
- if ((_details->userFlags & flag) == flag)
- return false;
-
- opCtx->recoveryUnit()->writingInt(_details->userFlags) |= flag;
- return true;
-}
-
-bool NamespaceDetailsRSV1MetaData::clearUserFlag(OperationContext* opCtx, int flag) {
- if ((_details->userFlags & flag) == 0)
- return false;
-
- opCtx->recoveryUnit()->writingInt(_details->userFlags) &= ~flag;
- return true;
-}
-
-bool NamespaceDetailsRSV1MetaData::replaceUserFlags(OperationContext* opCtx, int flags) {
- if (_details->userFlags == flags)
- return false;
-
- opCtx->recoveryUnit()->writingInt(_details->userFlags) = flags;
- return true;
-}
-
-int NamespaceDetailsRSV1MetaData::lastExtentSize(OperationContext* opCtx) const {
- return _details->lastExtentSize;
-}
-
-void NamespaceDetailsRSV1MetaData::setLastExtentSize(OperationContext* opCtx, int newMax) {
- if (_details->lastExtentSize == newMax)
- return;
- opCtx->recoveryUnit()->writingInt(_details->lastExtentSize) = newMax;
-}
-
-long long NamespaceDetailsRSV1MetaData::maxCappedDocs() const {
- invariant(_details->isCapped);
- if (_details->maxDocsInCapped == 0x7fffffff)
- return numeric_limits<long long>::max();
- return _details->maxDocsInCapped;
-}
-}
diff --git a/src/mongo/db/storage/mmap_v1/catalog/namespace_details_rsv1_metadata.h b/src/mongo/db/storage/mmap_v1/catalog/namespace_details_rsv1_metadata.h
deleted file mode 100644
index 26f0a16803f..00000000000
--- a/src/mongo/db/storage/mmap_v1/catalog/namespace_details_rsv1_metadata.h
+++ /dev/null
@@ -1,102 +0,0 @@
-// namespace_details_rsv1_metadata.h
-
-/**
- * Copyright (C) 2014 MongoDB Inc.
- *
- * This program is free software: you can redistribute it and/or modify
- * it under the terms of the GNU Affero General Public License, version 3,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU Affero General Public License for more details.
- *
- * You should have received a copy of the GNU Affero General Public License
- * along with this program. If not, see <http://www.gnu.org/licenses/>.
- *
- * As a special exception, the copyright holders give permission to link the
- * code of portions of this program with the OpenSSL library under certain
- * conditions as described in each individual source file and distribute
- * linked combinations including the program with the OpenSSL library. You
- * must comply with the GNU Affero General Public License in all respects for
- * all of the code used other than as permitted herein. If you modify file(s)
- * with this exception, you may extend this exception to your version of the
- * file(s), but you are not obligated to do so. If you do not wish to do so,
- * delete this exception statement from your version. If you delete this
- * exception statement from all source files in the program, then also delete
- * it in the license file.
- */
-
-#pragma once
-
-#include <string>
-
-#include "mongo/base/string_data.h"
-#include "mongo/db/storage/mmap_v1/catalog/namespace_details.h"
-#include "mongo/db/storage/mmap_v1/record_store_v1_base.h"
-
-namespace mongo {
-
-class RecordStore;
-
-/*
- * NOTE: NamespaceDetails will become a struct
- * all dur, etc... will move here
- */
-class NamespaceDetailsRSV1MetaData : public RecordStoreV1MetaData {
-public:
- explicit NamespaceDetailsRSV1MetaData(StringData ns, NamespaceDetails* details);
-
- virtual ~NamespaceDetailsRSV1MetaData() {}
-
- virtual const DiskLoc& capExtent() const;
- virtual void setCapExtent(OperationContext* opCtx, const DiskLoc& loc);
-
- virtual const DiskLoc& capFirstNewRecord() const;
- virtual void setCapFirstNewRecord(OperationContext* opCtx, const DiskLoc& loc);
-
- virtual bool capLooped() const;
-
- virtual long long dataSize() const;
- virtual long long numRecords() const;
-
- virtual void incrementStats(OperationContext* opCtx,
- long long dataSizeIncrement,
- long long numRecordsIncrement);
-
- virtual void setStats(OperationContext* opCtx, long long dataSize, long long numRecords);
-
- virtual DiskLoc deletedListEntry(int bucket) const;
- virtual void setDeletedListEntry(OperationContext* opCtx, int bucket, const DiskLoc& loc);
-
- virtual DiskLoc deletedListLegacyGrabBag() const;
- virtual void setDeletedListLegacyGrabBag(OperationContext* opCtx, const DiskLoc& loc);
-
- virtual void orphanDeletedList(OperationContext* opCtx);
-
- virtual const DiskLoc& firstExtent(OperationContext* opCtx) const;
- virtual void setFirstExtent(OperationContext* opCtx, const DiskLoc& loc);
-
- virtual const DiskLoc& lastExtent(OperationContext* opCtx) const;
- virtual void setLastExtent(OperationContext* opCtx, const DiskLoc& loc);
-
- virtual bool isCapped() const;
-
- virtual bool isUserFlagSet(int flag) const;
- virtual int userFlags() const;
- virtual bool setUserFlag(OperationContext* opCtx, int flag);
- virtual bool clearUserFlag(OperationContext* opCtx, int flag);
- virtual bool replaceUserFlags(OperationContext* opCtx, int flags);
-
- virtual int lastExtentSize(OperationContext* opCtx) const;
- virtual void setLastExtentSize(OperationContext* opCtx, int newMax);
-
- virtual long long maxCappedDocs() const;
-
-private:
- std::string _ns;
- NamespaceDetails* _details;
- RecordStore* _namespaceRecordStore;
-};
-}
diff --git a/src/mongo/db/storage/mmap_v1/catalog/namespace_index.cpp b/src/mongo/db/storage/mmap_v1/catalog/namespace_index.cpp
deleted file mode 100644
index 90fce6f33f5..00000000000
--- a/src/mongo/db/storage/mmap_v1/catalog/namespace_index.cpp
+++ /dev/null
@@ -1,249 +0,0 @@
-// namespace_index.cpp
-
-/**
- * Copyright (C) 2013 10gen Inc.
- *
- * This program is free software: you can redistribute it and/or modify
- * it under the terms of the GNU Affero General Public License, version 3,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU Affero General Public License for more details.
- *
- * You should have received a copy of the GNU Affero General Public License
- * along with this program. If not, see <http://www.gnu.org/licenses/>.
- *
- * As a special exception, the copyright holders give permission to link the
- * code of portions of this program with the OpenSSL library under certain
- * conditions as described in each individual source file and distribute
- * linked combinations including the program with the OpenSSL library. You
- * must comply with the GNU Affero General Public License in all respects for
- * all of the code used other than as permitted herein. If you modify file(s)
- * with this exception, you may extend this exception to your version of the
- * file(s), but you are not obligated to do so. If you do not wish to do so,
- * delete this exception statement from your version. If you delete this
- * exception statement from all source files in the program, then also delete
- * it in the license file.
- */
-
-#define MONGO_LOG_DEFAULT_COMPONENT ::mongo::logger::LogComponent::kStorage
-
-#include "mongo/platform/basic.h"
-
-#include "mongo/db/storage/mmap_v1/catalog/namespace_index.h"
-
-#include <boost/filesystem/operations.hpp>
-
-#include "mongo/db/operation_context.h"
-#include "mongo/db/storage/mmap_v1/catalog/hashtab.h"
-#include "mongo/db/storage/mmap_v1/catalog/namespace_details.h"
-#include "mongo/db/storage/mmap_v1/dur.h"
-#include "mongo/db/storage/mmap_v1/mmap_v1_options.h"
-#include "mongo/util/exit.h"
-#include "mongo/util/file.h"
-#include "mongo/util/log.h"
-#include "mongo/util/timer.h"
-
-namespace mongo {
-
-using std::endl;
-using std::list;
-using std::string;
-
-NamespaceIndex::NamespaceIndex(OperationContext* opCtx,
- const std::string& dir,
- const std::string& database)
- : _dir(dir), _database(database), _f(opCtx, MongoFile::Options::SEQUENTIAL), _ht(nullptr) {}
-
-NamespaceIndex::~NamespaceIndex() {}
-
-NamespaceDetails* NamespaceIndex::details(StringData ns) const {
- const Namespace n(ns);
- return details(n);
-}
-
-NamespaceDetails* NamespaceIndex::details(const Namespace& ns) const {
- return _ht->get(ns);
-}
-
-void NamespaceIndex::add_ns(OperationContext* opCtx,
- StringData ns,
- const DiskLoc& loc,
- bool capped) {
- NamespaceDetails details(loc, capped);
- add_ns(opCtx, ns, &details);
-}
-
-void NamespaceIndex::add_ns(OperationContext* opCtx,
- StringData ns,
- const NamespaceDetails* details) {
- Namespace n(ns);
- add_ns(opCtx, n, details);
-}
-
-void NamespaceIndex::add_ns(OperationContext* opCtx,
- const Namespace& ns,
- const NamespaceDetails* details) {
- const NamespaceString nss(ns.toString());
- invariant(opCtx->lockState()->isDbLockedForMode(nss.db(), MODE_X));
-
- massert(17315, "no . in ns", nsIsFull(nss.toString()));
-
- uassert(10081, "too many namespaces/collections", _ht->put(opCtx, ns, *details));
-}
-
-void NamespaceIndex::kill_ns(OperationContext* opCtx, StringData ns) {
- const NamespaceString nss(ns.toString());
- invariant(opCtx->lockState()->isDbLockedForMode(nss.db(), MODE_X));
-
- const Namespace n(ns);
- _ht->kill(opCtx, n);
-
- if (ns.size() <= Namespace::MaxNsCollectionLen) {
- // Larger namespace names don't have room for $extras so they can't exist. The code
- // below would cause an "$extra: ns too large" error and stacktrace to be printed to the
- // log even though everything is fine.
- for (int i = 0; i <= 1; i++) {
- try {
- Namespace extra(n.extraName(i));
- _ht->kill(opCtx, extra);
- } catch (DBException&) {
- LOG(3) << "caught exception in kill_ns" << endl;
- }
- }
- }
-}
-
-bool NamespaceIndex::pathExists() const {
- return boost::filesystem::exists(path());
-}
-
-boost::filesystem::path NamespaceIndex::path() const {
- boost::filesystem::path ret(_dir);
- if (storageGlobalParams.directoryperdb)
- ret /= _database;
- ret /= (_database + ".ns");
- return ret;
-}
-
-void NamespaceIndex::getCollectionNamespaces(list<string>* tofill) const {
- _ht->iterAll([tofill](const Namespace& k, NamespaceDetails& v) {
- if (!k.hasDollarSign() || k == "local.oplog.$main") {
- // we call out local.oplog.$main specifically as its the only "normal"
- // collection that has a $, so we make sure it gets added
- tofill->push_back(k.toString());
- }
- });
-}
-
-void NamespaceIndex::maybeMkdir() const {
- if (!storageGlobalParams.directoryperdb)
- return;
- boost::filesystem::path dir(_dir);
- dir /= _database;
- if (!boost::filesystem::exists(dir))
- MONGO_ASSERT_ON_EXCEPTION_WITH_MSG(boost::filesystem::create_directory(dir),
- "create dir for db ");
-}
-
-void NamespaceIndex::init(OperationContext* opCtx) {
- invariant(!_ht.get());
-
- unsigned long long len = 0;
-
- const boost::filesystem::path nsPath = path();
- const std::string pathString = nsPath.string();
-
- void* p = 0;
-
- if (boost::filesystem::exists(nsPath)) {
- if (_f.open(opCtx, pathString)) {
- len = _f.length();
-
- if (len % (1024 * 1024) != 0) {
- StringBuilder sb;
- sb << "Invalid length: " << len << " for .ns file: " << pathString
- << ". Cannot open database";
-
- log() << sb.str();
- uassert(10079, sb.str(), len % (1024 * 1024) == 0);
- }
-
- p = _f.getView();
- }
- } else {
- uassert(ErrorCodes::IllegalOperation,
- "Cannot create a database in read-only mode.",
- !storageGlobalParams.readOnly);
-
- // use mmapv1GlobalOptions.lenForNewNsFiles, we are making a new database
- massert(10343,
- "bad mmapv1GlobalOptions.lenForNewNsFiles",
- mmapv1GlobalOptions.lenForNewNsFiles >= 1024 * 1024);
-
- maybeMkdir();
-
- unsigned long long l = mmapv1GlobalOptions.lenForNewNsFiles;
- log() << "allocating new ns file " << pathString << ", filling with zeroes..." << endl;
-
- Timer timer;
- {
- // Due to SERVER-15369 we need to explicitly write zero-bytes to the NS file.
- const unsigned long long kBlockSize = 1024 * 1024;
- invariant(l % kBlockSize == 0); // ns files can only be multiples of 1MB
- const std::vector<char> zeros(kBlockSize, 0);
-
- File file;
- file.open(pathString.c_str());
-
- massert(18825, str::stream() << "couldn't create file " << pathString, file.is_open());
-
- for (fileofs ofs = 0; ofs < l && !file.bad(); ofs += kBlockSize) {
- file.write(ofs, &zeros[0], kBlockSize);
- }
-
- if (file.bad()) {
- try {
- boost::filesystem::remove(pathString);
- } catch (const std::exception& e) {
- StringBuilder ss;
- ss << "error removing file: " << e.what();
- massert(18909, ss.str(), 0);
- }
- } else {
- file.fsync();
- }
-
- massert(18826, str::stream() << "failure writing file " << pathString, !file.bad());
- }
-
- if (_f.create(opCtx, pathString, l)) {
- // The writes done in this function must not be rolled back. This will leave the
- // file empty, but available for future use. That is why we go directly to the
- // global dur dirty list rather than going through the OperationContext.
- getDur().createdFile(pathString, l);
-
- // Commit the journal and all changes to disk so that even if exceptions occur
- // during subsequent initialization, we won't have uncommited changes during file
- // close.
- getDur().commitNow(opCtx);
-
- len = l;
- invariant(len == mmapv1GlobalOptions.lenForNewNsFiles);
-
- p = _f.getView();
- }
-
- log() << "done allocating ns file " << pathString << ", "
- << "size: " << (len / 1024 / 1024) << "MB, "
- << "took " << static_cast<double>(timer.millis()) / 1000.0 << " seconds";
- }
-
- invariant(p, str::stream() << "error couldn't open file " << pathString << " terminating");
-
- invariant(len <= 0x7fffffff);
- _ht.reset(new NamespaceHashTable(p, (int)len, "namespace index"));
-}
-}
diff --git a/src/mongo/db/storage/mmap_v1/catalog/namespace_index.h b/src/mongo/db/storage/mmap_v1/catalog/namespace_index.h
deleted file mode 100644
index 5b7766b4035..00000000000
--- a/src/mongo/db/storage/mmap_v1/catalog/namespace_index.h
+++ /dev/null
@@ -1,100 +0,0 @@
-// namespace_index.h
-
-/**
- * Copyright (C) 2013 10gen Inc.
- *
- * This program is free software: you can redistribute it and/or modify
- * it under the terms of the GNU Affero General Public License, version 3,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU Affero General Public License for more details.
- *
- * You should have received a copy of the GNU Affero General Public License
- * along with this program. If not, see <http://www.gnu.org/licenses/>.
- *
- * As a special exception, the copyright holders give permission to link the
- * code of portions of this program with the OpenSSL library under certain
- * conditions as described in each individual source file and distribute
- * linked combinations including the program with the OpenSSL library. You
- * must comply with the GNU Affero General Public License in all respects for
- * all of the code used other than as permitted herein. If you modify file(s)
- * with this exception, you may extend this exception to your version of the
- * file(s), but you are not obligated to do so. If you do not wish to do so,
- * delete this exception statement from your version. If you delete this
- * exception statement from all source files in the program, then also delete
- * it in the license file.
- */
-
-#pragma once
-
-#include <list>
-#include <string>
-
-#include "mongo/base/disallow_copying.h"
-#include "mongo/db/storage/mmap_v1/catalog/namespace.h"
-#include "mongo/db/storage/mmap_v1/diskloc.h"
-#include "mongo/db/storage/mmap_v1/durable_mapped_file.h"
-
-namespace mongo {
-
-class NamespaceDetails;
-class NamespaceHashTable;
-class OperationContext;
-
-/* NamespaceIndex is the ".ns" file you see in the data directory. It is the "system catalog"
- if you will: at least the core parts. (Additional info in system.* collections.)
-*/
-class NamespaceIndex {
- MONGO_DISALLOW_COPYING(NamespaceIndex);
-
-public:
- NamespaceIndex(OperationContext* opCtx, const std::string& dir, const std::string& database);
- ~NamespaceIndex();
-
- /**
- * Must be called before destruction.
- */
- void close(OperationContext* opCtx) {
- LockMongoFilesExclusive lock(opCtx);
- _f.close(opCtx);
- }
-
- /* returns true if the file represented by this file exists on disk */
- bool pathExists() const;
-
- void init(OperationContext* opCtx);
-
- void add_ns(OperationContext* opCtx, StringData ns, const DiskLoc& loc, bool capped);
- void add_ns(OperationContext* opCtx, StringData ns, const NamespaceDetails* details);
- void add_ns(OperationContext* opCtx, const Namespace& ns, const NamespaceDetails* details);
-
- NamespaceDetails* details(StringData ns) const;
- NamespaceDetails* details(const Namespace& ns) const;
-
- void kill_ns(OperationContext* opCtx, StringData ns);
-
- bool allocated() const {
- return _ht.get() != 0;
- }
-
- void getCollectionNamespaces(std::list<std::string>* tofill) const;
-
- boost::filesystem::path path() const;
-
- unsigned long long fileLength() const {
- return _f.length();
- }
-
-private:
- void maybeMkdir() const;
-
- const std::string _dir;
- const std::string _database;
-
- DurableMappedFile _f;
- std::unique_ptr<NamespaceHashTable> _ht;
-};
-}
diff --git a/src/mongo/db/storage/mmap_v1/catalog/namespace_test.cpp b/src/mongo/db/storage/mmap_v1/catalog/namespace_test.cpp
deleted file mode 100644
index 85cd79be43b..00000000000
--- a/src/mongo/db/storage/mmap_v1/catalog/namespace_test.cpp
+++ /dev/null
@@ -1,68 +0,0 @@
-// namespace_test.h
-
-/**
-* Copyright (C) 2008 10gen Inc.
-*
-* This program is free software: you can redistribute it and/or modify
-* it under the terms of the GNU Affero General Public License, version 3,
-* as published by the Free Software Foundation.
-*
-* This program is distributed in the hope that it will be useful,
-* but WITHOUT ANY WARRANTY; without even the implied warranty of
-* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-* GNU Affero General Public License for more details.
-*
-* You should have received a copy of the GNU Affero General Public License
-* along with this program. If not, see <http://www.gnu.org/licenses/>.
-*
-* As a special exception, the copyright holders give permission to link the
-* code of portions of this program with the OpenSSL library under certain
-* conditions as described in each individual source file and distribute
-* linked combinations including the program with the OpenSSL library. You
-* must comply with the GNU Affero General Public License in all respects for
-* all of the code used other than as permitted herein. If you modify file(s)
-* with this exception, you may extend this exception to your version of the
-* file(s), but you are not obligated to do so. If you do not wish to do so,
-* delete this exception statement from your version. If you delete this
-* exception statement from all source files in the program, then also delete
-* it in the license file.
-*/
-
-#include "mongo/unittest/unittest.h"
-
-#include "mongo/db/storage/mmap_v1/catalog/namespace.h"
-
-namespace mongo {
-
-using std::string;
-
-TEST(NamespaceTest, Basics) {
- Namespace foo("foo.bar");
- Namespace bar("bar.foo");
-
- ASSERT_EQUALS(foo.toString(), foo.toString());
- ASSERT_EQUALS(foo.hash(), foo.hash());
-
- ASSERT_NOT_EQUALS(foo.hash(), bar.hash());
-
- ASSERT(foo == foo);
- ASSERT(!(foo != foo));
- ASSERT(foo != bar);
- ASSERT(!(foo == bar));
-}
-
-TEST(NamespaceTest, ExtraName) {
- Namespace foo("foo.bar");
- ASSERT_FALSE(foo.isExtra());
-
- string str0 = foo.extraName(0);
- ASSERT_EQUALS("foo.bar$extra", str0);
- Namespace ex0(str0);
- ASSERT_TRUE(ex0.isExtra());
-
- string str1 = foo.extraName(1);
- ASSERT_EQUALS("foo.bar$extrb", str1);
- Namespace ex1(str1);
- ASSERT_TRUE(ex1.isExtra());
-}
-}
diff --git a/src/mongo/db/storage/mmap_v1/commit_notifier.cpp b/src/mongo/db/storage/mmap_v1/commit_notifier.cpp
deleted file mode 100644
index 697c2dd1cdc..00000000000
--- a/src/mongo/db/storage/mmap_v1/commit_notifier.cpp
+++ /dev/null
@@ -1,72 +0,0 @@
-/**
- * Copyright (C) 2016 MongoDB Inc.
- *
- * This program is free software: you can redistribute it and/or modify
- * it under the terms of the GNU Affero General Public License, version 3,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU Affero General Public License for more details.
- *
- * You should have received a copy of the GNU Affero General Public License
- * along with this program. If not, see <http://www.gnu.org/licenses/>.
- *
- * As a special exception, the copyright holders give permission to link the
- * code of portions of this program with the OpenSSL library under certain
- * conditions as described in each individual source file and distribute
- * linked combinations including the program with the OpenSSL library. You
- * must comply with the GNU Affero General Public License in all respects
- * for all of the code used other than as permitted herein. If you modify
- * file(s) with this exception, you may extend this exception to your
- * version of the file(s), but you are not obligated to do so. If you do not
- * wish to do so, delete this exception statement from your version. If you
- * delete this exception statement from all source files in the program,
- * then also delete it in the license file.
- */
-
-#include "mongo/platform/basic.h"
-
-#include "mongo/db/storage/mmap_v1/commit_notifier.h"
-
-#include "mongo/util/assert_util.h"
-
-namespace mongo {
-
-CommitNotifier::CommitNotifier() = default;
-
-CommitNotifier::~CommitNotifier() {
- invariant(!_nWaiting);
-}
-
-CommitNotifier::When CommitNotifier::now() {
- stdx::lock_guard<stdx::mutex> lock(_mutex);
- return ++_lastReturned;
-}
-
-void CommitNotifier::waitFor(When e) {
- stdx::unique_lock<stdx::mutex> lock(_mutex);
- ++_nWaiting;
- while (_lastDone < e) {
- _condition.wait(lock);
- }
-}
-
-void CommitNotifier::awaitBeyondNow() {
- stdx::unique_lock<stdx::mutex> lock(_mutex);
- ++_nWaiting;
- When e = ++_lastReturned;
- while (_lastDone <= e) {
- _condition.wait(lock);
- }
-}
-
-void CommitNotifier::notifyAll(When e) {
- stdx::unique_lock<stdx::mutex> lock(_mutex);
- _lastDone = e;
- _nWaiting = 0;
- _condition.notify_all();
-}
-
-} // namespace mongo
diff --git a/src/mongo/db/storage/mmap_v1/commit_notifier.h b/src/mongo/db/storage/mmap_v1/commit_notifier.h
deleted file mode 100644
index bbb40a14576..00000000000
--- a/src/mongo/db/storage/mmap_v1/commit_notifier.h
+++ /dev/null
@@ -1,83 +0,0 @@
-/**
- * Copyright (C) 2016 MongoDB Inc.
- *
- * This program is free software: you can redistribute it and/or modify
- * it under the terms of the GNU Affero General Public License, version 3,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU Affero General Public License for more details.
- *
- * You should have received a copy of the GNU Affero General Public License
- * along with this program. If not, see <http://www.gnu.org/licenses/>.
- *
- * As a special exception, the copyright holders give permission to link the
- * code of portions of this program with the OpenSSL library under certain
- * conditions as described in each individual source file and distribute
- * linked combinations including the program with the OpenSSL library. You
- * must comply with the GNU Affero General Public License in all respects
- * for all of the code used other than as permitted herein. If you modify
- * file(s) with this exception, you may extend this exception to your
- * version of the file(s), but you are not obligated to do so. If you do not
- * wish to do so, delete this exception statement from your version. If you
- * delete this exception statement from all source files in the program,
- * then also delete it in the license file.
- */
-
-#pragma once
-
-#include "mongo/base/disallow_copying.h"
-#include "mongo/stdx/condition_variable.h"
-#include "mongo/stdx/mutex.h"
-
-namespace mongo {
-
-/**
- * Establishes a synchronization point between threads. N threads are waits and one is notifier.
- */
-class CommitNotifier {
- MONGO_DISALLOW_COPYING(CommitNotifier);
-
-public:
- typedef unsigned long long When;
-
- CommitNotifier();
- ~CommitNotifier();
-
- When now();
-
- /**
- * Awaits the next notifyAll() call by another thread. notifications that precede this call are
- * ignored -- we are looking for a fresh event.
- */
- void waitFor(When e);
-
- /**
- * A bit faster than waitFor(now()).
- */
- void awaitBeyondNow();
-
- /**
- * May be called multiple times. Notifies all waiters.
- */
- void notifyAll(When e);
-
- /**
- * Returns how many threads are blocked in the waitFor/awaitBeyondNow calls.
- */
- unsigned nWaiting() const {
- return _nWaiting;
- }
-
-private:
- stdx::mutex _mutex;
- stdx::condition_variable _condition;
-
- When _lastDone{0};
- When _lastReturned{0};
- unsigned _nWaiting{0};
-};
-
-} // namespace mongo
diff --git a/src/mongo/db/storage/mmap_v1/compress.cpp b/src/mongo/db/storage/mmap_v1/compress.cpp
deleted file mode 100644
index 8f8dce527ed..00000000000
--- a/src/mongo/db/storage/mmap_v1/compress.cpp
+++ /dev/null
@@ -1,57 +0,0 @@
-// @file compress.cpp
-
-/**
-* Copyright (C) 2012 10gen Inc.
-*
-* This program is free software: you can redistribute it and/or modify
-* it under the terms of the GNU Affero General Public License, version 3,
-* as published by the Free Software Foundation.
-*
-* This program is distributed in the hope that it will be useful,
-* but WITHOUT ANY WARRANTY; without even the implied warranty of
-* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-* GNU Affero General Public License for more details.
-*
-* You should have received a copy of the GNU Affero General Public License
-* along with this program. If not, see <http://www.gnu.org/licenses/>.
-*
-* As a special exception, the copyright holders give permission to link the
-* code of portions of this program with the OpenSSL library under certain
-* conditions as described in each individual source file and distribute
-* linked combinations including the program with the OpenSSL library. You
-* must comply with the GNU Affero General Public License in all respects
-* for all of the code used other than as permitted herein. If you modify
-* file(s) with this exception, you may extend this exception to your
-* version of the file(s), but you are not obligated to do so. If you do not
-* wish to do so, delete this exception statement from your version. If you
-* delete this exception statement from all source files in the program,
-* then also delete it in the license file.
-*/
-
-#include "mongo/platform/basic.h"
-
-#include "mongo/db/storage/mmap_v1/compress.h"
-
-#include <snappy.h>
-
-namespace mongo {
-
-void rawCompress(const char* input,
- size_t input_length,
- char* compressed,
- size_t* compressed_length) {
- snappy::RawCompress(input, input_length, compressed, compressed_length);
-}
-
-size_t maxCompressedLength(size_t source_len) {
- return snappy::MaxCompressedLength(source_len);
-}
-
-size_t compress(const char* input, size_t input_length, std::string* output) {
- return snappy::Compress(input, input_length, output);
-}
-
-bool uncompress(const char* compressed, size_t compressed_length, std::string* uncompressed) {
- return snappy::Uncompress(compressed, compressed_length, uncompressed);
-}
-}
diff --git a/src/mongo/db/storage/mmap_v1/compress.h b/src/mongo/db/storage/mmap_v1/compress.h
deleted file mode 100644
index 8ff828a93a6..00000000000
--- a/src/mongo/db/storage/mmap_v1/compress.h
+++ /dev/null
@@ -1,46 +0,0 @@
-// @file compress.h
-
-/**
-* Copyright (C) 2012 10gen Inc.
-*
-* This program is free software: you can redistribute it and/or modify
-* it under the terms of the GNU Affero General Public License, version 3,
-* as published by the Free Software Foundation.
-*
-* This program is distributed in the hope that it will be useful,
-* but WITHOUT ANY WARRANTY; without even the implied warranty of
-* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-* GNU Affero General Public License for more details.
-*
-* You should have received a copy of the GNU Affero General Public License
-* along with this program. If not, see <http://www.gnu.org/licenses/>.
-*
-* As a special exception, the copyright holders give permission to link the
-* code of portions of this program with the OpenSSL library under certain
-* conditions as described in each individual source file and distribute
-* linked combinations including the program with the OpenSSL library. You
-* must comply with the GNU Affero General Public License in all respects
-* for all of the code used other than as permitted herein. If you modify
-* file(s) with this exception, you may extend this exception to your
-* version of the file(s), but you are not obligated to do so. If you do not
-* wish to do so, delete this exception statement from your version. If you
-* delete this exception statement from all source files in the program,
-* then also delete it in the license file.
-*/
-
-#pragma once
-
-#include <string>
-
-namespace mongo {
-
-size_t compress(const char* input, size_t input_length, std::string* output);
-
-bool uncompress(const char* compressed, size_t compressed_length, std::string* uncompressed);
-
-size_t maxCompressedLength(size_t source_len);
-void rawCompress(const char* input,
- size_t input_length,
- char* compressed,
- size_t* compressed_length);
-}
diff --git a/src/mongo/db/storage/mmap_v1/data_file.cpp b/src/mongo/db/storage/mmap_v1/data_file.cpp
deleted file mode 100644
index 46af46c0a47..00000000000
--- a/src/mongo/db/storage/mmap_v1/data_file.cpp
+++ /dev/null
@@ -1,253 +0,0 @@
-// data_file.cpp
-
-/**
-* Copyright (C) 2013 10gen Inc.
-*
-* This program is free software: you can redistribute it and/or modify
-* it under the terms of the GNU Affero General Public License, version 3,
-* as published by the Free Software Foundation.
-*
-* This program is distributed in the hope that it will be useful,
-* but WITHOUT ANY WARRANTY; without even the implied warranty of
-* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-* GNU Affero General Public License for more details.
-*
-* You should have received a copy of the GNU Affero General Public License
-* along with this program. If not, see <http://www.gnu.org/licenses/>.
-*
-* As a special exception, the copyright holders give permission to link the
-* code of portions of this program with the OpenSSL library under certain
-* conditions as described in each individual source file and distribute
-* linked combinations including the program with the OpenSSL library. You
-* must comply with the GNU Affero General Public License in all respects for
-* all of the code used other than as permitted herein. If you modify file(s)
-* with this exception, you may extend this exception to your version of the
-* file(s), but you are not obligated to do so. If you do not wish to do so,
-* delete this exception statement from your version. If you delete this
-* exception statement from all source files in the program, then also delete
-* it in the license file.
-*/
-
-#define MONGO_LOG_DEFAULT_COMPONENT ::mongo::logger::LogComponent::kStorage
-
-#include "mongo/platform/basic.h"
-
-#include "mongo/db/storage/mmap_v1/data_file.h"
-
-#include <boost/filesystem/operations.hpp>
-#include <utility>
-#include <vector>
-
-#include "mongo/base/static_assert.h"
-#include "mongo/db/operation_context.h"
-#include "mongo/db/storage/mmap_v1/dur.h"
-#include "mongo/db/storage/mmap_v1/durable_mapped_file.h"
-#include "mongo/db/storage/mmap_v1/file_allocator.h"
-#include "mongo/db/storage/mmap_v1/mmap_v1_options.h"
-#include "mongo/util/log.h"
-
-namespace mongo {
-
-using std::endl;
-
-namespace {
-
-void data_file_check(void* _mb) {
- if (sizeof(char*) == 4) {
- uassert(10084,
- "can't map file memory - mongo requires 64 bit build for larger datasets",
- _mb != NULL);
- } else {
- uassert(10085, "can't map file memory", _mb != NULL);
- }
-}
-
-} // namespace
-
-
-MONGO_STATIC_ASSERT(DataFileHeader::HeaderSize == 8192);
-MONGO_STATIC_ASSERT(sizeof(static_cast<DataFileHeader*>(NULL)->data) == 4);
-MONGO_STATIC_ASSERT(sizeof(DataFileHeader) - sizeof(static_cast<DataFileHeader*>(NULL)->data) ==
- DataFileHeader::HeaderSize);
-
-
-int DataFile::maxSize() {
- if (sizeof(int*) == 4) {
- return 512 * 1024 * 1024;
- } else if (mmapv1GlobalOptions.smallfiles) {
- return 0x7ff00000 >> 2;
- } else {
- return 0x7ff00000;
- }
-}
-
-NOINLINE_DECL void DataFile::badOfs(int ofs) const {
- msgasserted(13440,
- str::stream() << "bad offset:" << ofs << " accessing file: " << mmf.filename()
- << ". See http://dochub.mongodb.org/core/data-recovery");
-}
-
-int DataFile::_defaultSize() const {
- int size;
-
- if (_fileNo <= 4) {
- size = (64 * 1024 * 1024) << _fileNo;
- } else {
- size = 0x7ff00000;
- }
-
- if (mmapv1GlobalOptions.smallfiles) {
- size = size >> 2;
- }
-
- return size;
-}
-
-/** @return true if found and opened. if uninitialized (prealloc only) does not open. */
-Status DataFile::openExisting(OperationContext* opCtx, const char* filename) {
- invariant(_mb == 0);
-
- if (!boost::filesystem::exists(filename)) {
- return Status(ErrorCodes::InvalidPath, "DataFile::openExisting - file does not exist");
- }
-
- if (!mmf.open(opCtx, filename)) {
- return Status(ErrorCodes::InternalError, "DataFile::openExisting - mmf.open failed");
- }
-
- // The mapped view of the file should never be NULL if the open call above succeeded.
- _mb = mmf.getView();
- invariant(_mb);
-
- const uint64_t sz = mmf.length();
- invariant(sz <= 0x7fffffff);
- invariant(sz % 4096 == 0);
-
- if (sz < 64 * 1024 * 1024 && !mmapv1GlobalOptions.smallfiles) {
- if (sz >= 16 * 1024 * 1024 && sz % (1024 * 1024) == 0) {
- log() << "info openExisting file size " << sz
- << " but mmapv1GlobalOptions.smallfiles=false: " << filename << endl;
- } else {
- log() << "openExisting size " << sz << " less than minimum file size expectation "
- << filename << endl;
- verify(false);
- }
- }
-
- data_file_check(_mb);
- return Status::OK();
-}
-
-void DataFile::open(OperationContext* opCtx,
- const char* filename,
- int minSize,
- bool preallocateOnly) {
- long size = _defaultSize();
-
- while (size < minSize) {
- if (size < maxSize() / 2) {
- size *= 2;
- } else {
- size = maxSize();
- break;
- }
- }
-
- if (size > maxSize()) {
- size = maxSize();
- }
-
- invariant(size >= 64 * 1024 * 1024 || mmapv1GlobalOptions.smallfiles);
- invariant(size % 4096 == 0);
-
- if (preallocateOnly) {
- if (mmapv1GlobalOptions.prealloc) {
- FileAllocator::get()->requestAllocation(filename, size);
- }
- return;
- }
-
- {
- invariant(_mb == 0);
- unsigned long long sz = size;
- if (mmf.create(opCtx, filename, sz)) {
- _mb = mmf.getView();
- }
-
- invariant(sz <= 0x7fffffff);
- size = (int)sz;
- }
-
- data_file_check(_mb);
- header()->init(opCtx, _fileNo, size, filename);
-}
-
-void DataFile::flush(bool sync) {
- mmf.flush(sync);
-}
-
-DiskLoc DataFile::allocExtentArea(OperationContext* opCtx, int size) {
- // The header would be NULL if file open failed. However, if file open failed we should
- // never be entering here.
- invariant(header());
- invariant(size <= header()->unusedLength);
-
- int offset = header()->unused.getOfs();
-
- DataFileHeader* h = header();
- *opCtx->recoveryUnit()->writing(&h->unused) = DiskLoc(_fileNo, offset + size);
- opCtx->recoveryUnit()->writingInt(h->unusedLength) = h->unusedLength - size;
-
- return DiskLoc(_fileNo, offset);
-}
-
-// -------------------------------------------------------------------------------
-
-void DataFileHeader::init(OperationContext* opCtx,
- int fileno,
- int filelength,
- const char* filename) {
- if (uninitialized()) {
- DEV log() << "datafileheader::init initializing " << filename << " n:" << fileno << endl;
-
- massert(13640,
- str::stream() << "DataFileHeader looks corrupt at file open filelength:"
- << filelength
- << " fileno:"
- << fileno,
- filelength > 32768);
-
- // The writes done in this function must not be rolled back. If the containing
- // UnitOfWork rolls back it should roll back to the state *after* these writes. This
- // will leave the file empty, but available for future use. That is why we go directly
- // to the global dur dirty list rather than going through the RecoveryUnit.
- getDur().createdFile(filename, filelength);
-
- typedef std::pair<void*, unsigned> Intent;
- std::vector<Intent> intent;
- intent.push_back(std::make_pair(this, sizeof(DataFileHeader)));
- privateViews.makeWritable(this, sizeof(DataFileHeader));
- getDur().declareWriteIntents(intent);
-
- fileLength = filelength;
- version = DataFileVersion::defaultForNewFiles();
- unused.set(fileno, HeaderSize);
- unusedLength = fileLength - HeaderSize - 16;
- freeListStart.Null();
- freeListEnd.Null();
- } else {
- checkUpgrade(opCtx);
- }
-}
-
-void DataFileHeader::checkUpgrade(OperationContext* opCtx) {
- if (freeListStart == DiskLoc(0, 0)) {
- // we are upgrading from 2.4 to 2.6
- invariant(freeListEnd == DiskLoc(0, 0)); // both start and end should be (0,0) or real
- WriteUnitOfWork wunit(opCtx);
- *opCtx->recoveryUnit()->writing(&freeListStart) = DiskLoc();
- *opCtx->recoveryUnit()->writing(&freeListEnd) = DiskLoc();
- wunit.commit();
- }
-}
-}
diff --git a/src/mongo/db/storage/mmap_v1/data_file.h b/src/mongo/db/storage/mmap_v1/data_file.h
deleted file mode 100644
index 60dc095791e..00000000000
--- a/src/mongo/db/storage/mmap_v1/data_file.h
+++ /dev/null
@@ -1,264 +0,0 @@
-// data_file.h
-
-/**
-* Copyright (C) 2013 10gen Inc.
-*
-* This program is free software: you can redistribute it and/or modify
-* it under the terms of the GNU Affero General Public License, version 3,
-* as published by the Free Software Foundation.
-*
-* This program is distributed in the hope that it will be useful,
-* but WITHOUT ANY WARRANTY; without even the implied warranty of
-* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-* GNU Affero General Public License for more details.
-*
-* You should have received a copy of the GNU Affero General Public License
-* along with this program. If not, see <http://www.gnu.org/licenses/>.
-*
-* As a special exception, the copyright holders give permission to link the
-* code of portions of this program with the OpenSSL library under certain
-* conditions as described in each individual source file and distribute
-* linked combinations including the program with the OpenSSL library. You
-* must comply with the GNU Affero General Public License in all respects for
-* all of the code used other than as permitted herein. If you modify file(s)
-* with this exception, you may extend this exception to your version of the
-* file(s), but you are not obligated to do so. If you do not wish to do so,
-* delete this exception statement from your version. If you delete this
-* exception statement from all source files in the program, then also delete
-* it in the license file.
-*/
-
-#pragma once
-
-#include "mongo/bson/util/builder.h"
-#include "mongo/db/storage/mmap_v1/diskloc.h"
-#include "mongo/db/storage/mmap_v1/durable_mapped_file.h"
-#include "mongo/platform/bits.h"
-
-namespace mongo {
-
-class OperationContext;
-
-#pragma pack(1)
-class DataFileVersion {
-public:
- DataFileVersion(uint32_t major, uint32_t minor) : _major(major), _minor(minor) {}
-
- static DataFileVersion defaultForNewFiles() {
- return DataFileVersion(kCurrentMajor, kIndexes24AndNewer | kMayHave30Freelist);
- }
-
- Status isCompatibleWithCurrentCode() const {
- if (_major != kCurrentMajor) {
- StringBuilder sb;
- sb << "The data files have major version " << _major
- << ", but this version of mongod only supports version " << kCurrentMajor;
- return {ErrorCodes::MustUpgrade, sb.str()};
- }
-
- uint32_t unrecognizedMinorBits = _minor & ~kUsedMinorFlagsMask;
- if (unrecognizedMinorBits) {
- StringBuilder sb;
- sb << "The data files use features not recognized by this version of mongod; the"
- " feature bits in positions [ ";
- bool firstIteration = true;
- while (unrecognizedMinorBits) {
- const int lowestSetBitPosition = countTrailingZeros64(unrecognizedMinorBits);
- if (!firstIteration) {
- sb << ", ";
- }
- sb << lowestSetBitPosition;
- unrecognizedMinorBits ^= (1 << lowestSetBitPosition);
- firstIteration = false;
- }
- sb << " ] aren't recognized by this version of mongod";
-
- return {ErrorCodes::MustUpgrade, sb.str()};
- }
-
- const uint32_t indexCleanliness = _minor & kIndexPluginMask;
- if (indexCleanliness != kIndexes24AndNewer && indexCleanliness != kIndexes22AndOlder) {
- StringBuilder sb;
- sb << "The data files have index plugin version " << indexCleanliness
- << ", but this version of mongod only supports versions " << kIndexes22AndOlder
- << " and " << kIndexes24AndNewer;
- return {ErrorCodes::MustUpgrade, sb.str()};
- }
-
- // We are compatible with either setting of kMayHave30Freelist.
-
- return Status::OK();
- }
-
- bool is24IndexClean() const {
- return (_minor & kIndexPluginMask) == kIndexes24AndNewer;
- }
- void setIs24IndexClean() {
- _minor = ((_minor & ~kIndexPluginMask) | kIndexes24AndNewer);
- }
-
- bool mayHave30Freelist() const {
- return _minor & kMayHave30Freelist;
- }
- void setMayHave30Freelist() {
- _minor |= kMayHave30Freelist;
- }
-
- bool getMayHaveCollationMetadata() const {
- return _minor & kMayHaveCollationMetadata;
- }
- void setMayHaveCollationMetadata() {
- _minor |= kMayHaveCollationMetadata;
- }
-
- uint32_t majorRaw() const {
- return _major;
- }
- uint32_t minorRaw() const {
- return _minor;
- }
-
-private:
- static const uint32_t kCurrentMajor = 4;
-
- // minor layout:
- // first 4 bits - index plugin cleanliness.
- // see IndexCatalog::_upgradeDatabaseMinorVersionIfNeeded for details
- // 5th bit - 1 if started with 3.0-style freelist implementation (SERVER-14081)
- // 6th bit - 1 if indexes or collections with a collation have been created.
- // 7th through 31st bit - reserved and must be set to 0.
- static const uint32_t kIndexPluginMask = 0xf;
- static const uint32_t kIndexes22AndOlder = 5;
- static const uint32_t kIndexes24AndNewer = 6;
-
- static const uint32_t kMayHave30Freelist = (1 << 4);
-
- static const uint32_t kMayHaveCollationMetadata = (1 << 5);
-
- // All set bits we know about are covered by this mask.
- static const uint32_t kUsedMinorFlagsMask =
- kIndexPluginMask | kMayHave30Freelist | kMayHaveCollationMetadata;
-
- uint32_t _major;
- uint32_t _minor;
-};
-
-// Note: Intentionally not defining relational operators for DataFileVersion as there is no
-// total ordering of all versions now that '_minor' is used as a bit vector.
-#pragma pack()
-
-/* a datafile - i.e. the "dbname.<#>" files :
-
- ----------------------
- DataFileHeader
- ----------------------
- Extent (for a particular namespace)
- MmapV1RecordHeader
- ...
- MmapV1RecordHeader (some chained for unused space)
- ----------------------
- more Extents...
- ----------------------
-*/
-#pragma pack(1)
-class DataFileHeader {
-public:
- DataFileVersion version;
- int fileLength;
- /**
- * unused is the portion of the file that doesn't belong to any allocated extents. -1 = no more
- */
- DiskLoc unused;
- int unusedLength;
- DiskLoc freeListStart;
- DiskLoc freeListEnd;
- char reserved[8192 - 4 * 4 - 8 * 3];
-
- char data[4]; // first extent starts here
-
- enum { HeaderSize = 8192 };
-
- bool uninitialized() const {
- return version.majorRaw() == 0;
- }
-
- void init(OperationContext* opCtx, int fileno, int filelength, const char* filename);
-
- void checkUpgrade(OperationContext* opCtx);
-
- bool isEmpty() const {
- return uninitialized() || (unusedLength == fileLength - HeaderSize - 16);
- }
-};
-#pragma pack()
-
-
-class DataFile {
-public:
- DataFile(OperationContext* opCtx, int fn) : _fileNo(fn), mmf(opCtx), _mb(NULL) {}
-
- /** @return true if found and opened. if uninitialized (prealloc only) does not open. */
- Status openExisting(OperationContext* opCtx, const char* filename);
-
- /** creates if DNE */
- void open(OperationContext* opCtx,
- const char* filename,
- int requestedDataSize = 0,
- bool preallocateOnly = false);
-
- /**
- * Must be called before destruction.
- */
- void close(OperationContext* opCtx) {
- LockMongoFilesExclusive lock(opCtx);
- mmf.close(opCtx);
- }
-
- DiskLoc allocExtentArea(OperationContext* opCtx, int size);
-
- DataFileHeader* getHeader() {
- return header();
- }
- const DataFileHeader* getHeader() const {
- return header();
- }
-
- HANDLE getFd() {
- return mmf.getFd();
- }
- unsigned long long length() const {
- return mmf.length();
- }
-
- /* return max size an extent may be */
- static int maxSize();
-
- /** fsync */
- void flush(bool sync);
-
-private:
- friend class MmapV1ExtentManager;
-
-
- void badOfs(int) const;
- int _defaultSize() const;
-
- void grow(DiskLoc dl, int size);
-
- char* p() const {
- return (char*)_mb;
- }
- DataFileHeader* header() {
- return static_cast<DataFileHeader*>(_mb);
- }
- const DataFileHeader* header() const {
- return static_cast<DataFileHeader*>(_mb);
- }
-
-
- const int _fileNo;
-
- DurableMappedFile mmf;
- void* _mb; // the memory mapped view
-};
-}
diff --git a/src/mongo/db/storage/mmap_v1/data_file_sync.cpp b/src/mongo/db/storage/mmap_v1/data_file_sync.cpp
deleted file mode 100644
index 975b1c3413e..00000000000
--- a/src/mongo/db/storage/mmap_v1/data_file_sync.cpp
+++ /dev/null
@@ -1,137 +0,0 @@
-/**
- * Copyright (C) 2014 MongoDB Inc.
- *
- * This program is free software: you can redistribute it and/or modify
- * it under the terms of the GNU Affero General Public License, version 3,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU Affero General Public License for more details.
- *
- * You should have received a copy of the GNU Affero General Public License
- * along with this program. If not, see <http://www.gnu.org/licenses/>.
- *
- * As a special exception, the copyright holders give permission to link the
- * code of portions of this program with the OpenSSL library under certain
- * conditions as described in each individual source file and distribute
- * linked combinations including the program with the OpenSSL library. You
- * must comply with the GNU Affero General Public License in all respects for
- * all of the code used other than as permitted herein. If you modify file(s)
- * with this exception, you may extend this exception to your version of the
- * file(s), but you are not obligated to do so. If you do not wish to do so,
- * delete this exception statement from your version. If you delete this
- * exception statement from all source files in the program, then also delete
- * it in the license file.
- */
-
-#define MONGO_LOG_DEFAULT_COMPONENT ::mongo::logger::LogComponent::kStorage
-
-#include "mongo/platform/basic.h"
-
-#include "mongo/db/storage/mmap_v1/data_file_sync.h"
-
-#include "mongo/db/client.h"
-#include "mongo/db/commands/server_status_metric.h"
-#include "mongo/db/operation_context.h"
-#include "mongo/db/service_context.h"
-#include "mongo/db/storage/mmap_v1/dur_journal.h"
-#include "mongo/db/storage/mmap_v1/mmap.h"
-#include "mongo/db/storage/mmap_v1/mmap_v1_options.h"
-#include "mongo/db/storage/storage_options.h"
-#include "mongo/util/exit.h"
-#include "mongo/util/log.h"
-
-namespace mongo {
-
-using std::endl;
-
-DataFileSync dataFileSync;
-
-DataFileSync::DataFileSync()
- : ServerStatusSection("backgroundFlushing"), _total_time(0), _flushes(0), _last() {}
-
-void DataFileSync::run() {
- Client::initThread(name().c_str());
-
- if (storageGlobalParams.syncdelay == 0) {
- log() << "warning: --syncdelay 0 is not recommended and can have strange performance"
- << endl;
- } else if (storageGlobalParams.syncdelay == 1) {
- log() << "--syncdelay 1" << endl;
- } else if (storageGlobalParams.syncdelay != 60) {
- LOG(1) << "--syncdelay " << storageGlobalParams.syncdelay.load() << endl;
- }
- int time_flushing = 0;
- while (!globalInShutdownDeprecated()) {
- if (storageGlobalParams.syncdelay == 0) {
- // in case at some point we add an option to change at runtime
- sleepsecs(5);
- continue;
- }
-
- sleepmillis(
- (long long)std::max(0.0, (storageGlobalParams.syncdelay * 1000) - time_flushing));
-
- if (globalInShutdownDeprecated()) {
- // occasional issue trying to flush during shutdown when sleep interrupted
- break;
- }
-
- auto opCtx = cc().makeOperationContext();
- Date_t start = jsTime();
- StorageEngine* storageEngine = getGlobalServiceContext()->getStorageEngine();
-
- dur::notifyPreDataFileFlush();
- int numFiles = storageEngine->flushAllFiles(opCtx.get(), true);
- dur::notifyPostDataFileFlush();
-
- time_flushing = durationCount<Milliseconds>(jsTime() - start);
-
- _flushed(time_flushing);
-
- if (shouldLog(logger::LogSeverity::Debug(1)) || time_flushing >= 10000) {
- log() << "flushing mmaps took " << time_flushing << "ms "
- << " for " << numFiles << " files" << endl;
- }
- }
-}
-
-BSONObj DataFileSync::generateSection(OperationContext* opCtx,
- const BSONElement& configElement) const {
- if (!running()) {
- return BSONObj();
- }
-
- BSONObjBuilder b;
- b.appendNumber("flushes", _flushes);
- b.appendNumber("total_ms", _total_time);
- b.appendNumber("average_ms", (_flushes ? (_total_time / double(_flushes)) : 0.0));
- b.appendNumber("last_ms", _last_time);
- b.append("last_finished", _last);
- return b.obj();
-}
-
-void DataFileSync::_flushed(int ms) {
- _flushes++;
- _total_time += ms;
- _last_time = ms;
- _last = jsTime();
-}
-
-
-class MemJournalServerStatusMetric : public ServerStatusMetric {
-public:
- MemJournalServerStatusMetric() : ServerStatusMetric(".mem.mapped") {}
- virtual void appendAtLeaf(BSONObjBuilder& b) const {
- int m = MemoryMappedFile::totalMappedLengthInMB();
- b.appendNumber("mapped", m);
-
- if (storageGlobalParams.dur) {
- m *= 2;
- b.appendNumber("mappedWithJournal", m);
- }
- }
-} memJournalServerStatusMetric;
-}
diff --git a/src/mongo/db/storage/mmap_v1/data_file_sync.h b/src/mongo/db/storage/mmap_v1/data_file_sync.h
deleted file mode 100644
index a26624f2c41..00000000000
--- a/src/mongo/db/storage/mmap_v1/data_file_sync.h
+++ /dev/null
@@ -1,65 +0,0 @@
-/**
- * Copyright (C) 2014 MongoDB Inc.
- *
- * This program is free software: you can redistribute it and/or modify
- * it under the terms of the GNU Affero General Public License, version 3,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU Affero General Public License for more details.
- *
- * You should have received a copy of the GNU Affero General Public License
- * along with this program. If not, see <http://www.gnu.org/licenses/>.
- *
- * As a special exception, the copyright holders give permission to link the
- * code of portions of this program with the OpenSSL library under certain
- * conditions as described in each individual source file and distribute
- * linked combinations including the program with the OpenSSL library. You
- * must comply with the GNU Affero General Public License in all respects for
- * all of the code used other than as permitted herein. If you modify file(s)
- * with this exception, you may extend this exception to your version of the
- * file(s), but you are not obligated to do so. If you do not wish to do so,
- * delete this exception statement from your version. If you delete this
- * exception statement from all source files in the program, then also delete
- * it in the license file.
- */
-
-#pragma once
-
-#include "mongo/db/commands/server_status.h"
-#include "mongo/util/background.h"
-
-namespace mongo {
-
-/**
- * does background async flushes of mmapped files
- */
-class DataFileSync : public BackgroundJob, public ServerStatusSection {
-public:
- DataFileSync();
-
- virtual bool includeByDefault() const {
- return true;
- }
- virtual std::string name() const {
- return "DataFileSync";
- }
-
- void run();
-
- virtual BSONObj generateSection(OperationContext* opCtx,
- const BSONElement& configElement) const;
-
-private:
- void _flushed(int ms);
-
- long long _total_time;
- long long _flushes;
- int _last_time;
- Date_t _last;
-};
-
-extern DataFileSync dataFileSync;
-}
diff --git a/src/mongo/db/storage/mmap_v1/data_file_version_test.cpp b/src/mongo/db/storage/mmap_v1/data_file_version_test.cpp
deleted file mode 100644
index 40627007a19..00000000000
--- a/src/mongo/db/storage/mmap_v1/data_file_version_test.cpp
+++ /dev/null
@@ -1,123 +0,0 @@
-/**
- * Copyright (C) 2016 MongoDB Inc.
- *
- * This program is free software: you can redistribute it and/or modify
- * it under the terms of the GNU Affero General Public License, version 3,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU Affero General Public License for more details.
- *
- * You should have received a copy of the GNU Affero General Public License
- * along with this program. If not, see <http://www.gnu.org/licenses/>.
- *
- * As a special exception, the copyright holders give permission to link the
- * code of portions of this program with the OpenSSL library under certain
- * conditions as described in each individual source file and distribute
- * linked combinations including the program with the OpenSSL library. You
- * must comply with the GNU Affero General Public License in all respects for
- * all of the code used other than as permitted herein. If you modify file(s)
- * with this exception, you may extend this exception to your version of the
- * file(s), but you are not obligated to do so. If you do not wish to do so,
- * delete this exception statement from your version. If you delete this
- * exception statement from all source files in the program, then also delete
- * it in the license file.
- */
-
-#include "mongo/platform/basic.h"
-
-#include "mongo/db/storage/mmap_v1/data_file.h"
-
-#include "mongo/unittest/unittest.h"
-
-namespace mongo {
-namespace {
-
-TEST(DataFileVersionTest, DefaultForNewFilesIsCompatibleWithCurrentCode) {
- auto version = DataFileVersion::defaultForNewFiles();
- ASSERT_OK(version.isCompatibleWithCurrentCode());
-}
-
-TEST(DataFileVersionTest, CanSetIs24IndexClean) {
- const uint32_t major = 4;
- const uint32_t minor = 5;
- DataFileVersion version(major, minor);
- ASSERT_OK(version.isCompatibleWithCurrentCode());
-
- ASSERT_FALSE(version.is24IndexClean());
- version.setIs24IndexClean();
- ASSERT_TRUE(version.is24IndexClean());
-}
-
-TEST(DataFileVersionTest, CanSetMayHave30Freelist) {
- const uint32_t major = 4;
- const uint32_t minor = 5;
- DataFileVersion version(major, minor);
- ASSERT_OK(version.isCompatibleWithCurrentCode());
-
- ASSERT_FALSE(version.mayHave30Freelist());
- version.setMayHave30Freelist();
- ASSERT_TRUE(version.mayHave30Freelist());
-}
-
-TEST(DataFileVersionTest, CanSetMayHaveCollationMetadata) {
- auto version = DataFileVersion::defaultForNewFiles();
- ASSERT_OK(version.isCompatibleWithCurrentCode());
-
- ASSERT_FALSE(version.getMayHaveCollationMetadata());
- version.setMayHaveCollationMetadata();
- ASSERT_TRUE(version.getMayHaveCollationMetadata());
- ASSERT_OK(version.isCompatibleWithCurrentCode());
-}
-
-TEST(DataFileVersionTest, MustUpgradeWhenMajorVersionIsUnsupported) {
- const uint32_t major = 5;
- const uint32_t minor = 6;
- DataFileVersion version(major, minor);
- auto status = version.isCompatibleWithCurrentCode();
- ASSERT_EQ(ErrorCodes::MustUpgrade, status.code());
- ASSERT_EQ(
- "The data files have major version 5, but this version of mongod only supports version 4",
- status.reason());
-}
-
-TEST(DataFileVersionTest, MustUpgradeWhenSingleMinorFeatureBitIsUnrecognized) {
- const uint32_t major = 4;
- const uint32_t minor = 6 | (1 << 10);
- DataFileVersion version(major, minor);
- auto status = version.isCompatibleWithCurrentCode();
- ASSERT_EQ(ErrorCodes::MustUpgrade, status.code());
- ASSERT_EQ(
- "The data files use features not recognized by this version of mongod; the feature bits in"
- " positions [ 10 ] aren't recognized by this version of mongod",
- status.reason());
-}
-
-TEST(DataFileVersionTest, MustUpgradeWhenMultipleMinorFeatureBitsAreUnrecognized) {
- const uint32_t major = 4;
- const uint32_t minor = 6 | (1 << 10) | (1 << 14) | (1 << 15);
- DataFileVersion version(major, minor);
- auto status = version.isCompatibleWithCurrentCode();
- ASSERT_EQ(ErrorCodes::MustUpgrade, status.code());
- ASSERT_EQ(
- "The data files use features not recognized by this version of mongod; the feature bits in"
- " positions [ 10, 14, 15 ] aren't recognized by this version of mongod",
- status.reason());
-}
-
-TEST(DataFileVersionTest, MustUpgradeWhenIndexPluginVersionIsUnsupported) {
- const uint32_t major = 4;
- const uint32_t minor = 7;
- DataFileVersion version(major, minor);
- auto status = version.isCompatibleWithCurrentCode();
- ASSERT_EQ(ErrorCodes::MustUpgrade, status.code());
- ASSERT_EQ(
- "The data files have index plugin version 7, but this version of mongod only supports"
- " versions 5 and 6",
- status.reason());
-}
-
-} // namespace
-} // namespace mongo
diff --git a/src/mongo/db/storage/mmap_v1/diskloc.h b/src/mongo/db/storage/mmap_v1/diskloc.h
deleted file mode 100644
index 24ff75c7609..00000000000
--- a/src/mongo/db/storage/mmap_v1/diskloc.h
+++ /dev/null
@@ -1,222 +0,0 @@
-/**
-* Copyright (C) 2008 10gen Inc.
-*
-* This program is free software: you can redistribute it and/or modify
-* it under the terms of the GNU Affero General Public License, version 3,
-* as published by the Free Software Foundation.
-*
-* This program is distributed in the hope that it will be useful,
-* but WITHOUT ANY WARRANTY; without even the implied warranty of
-* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-* GNU Affero General Public License for more details.
-*
-* You should have received a copy of the GNU Affero General Public License
-* along with this program. If not, see <http://www.gnu.org/licenses/>.
-*
-* As a special exception, the copyright holders give permission to link the
-* code of portions of this program with the OpenSSL library under certain
-* conditions as described in each individual source file and distribute
-* linked combinations including the program with the OpenSSL library. You
-* must comply with the GNU Affero General Public License in all respects for
-* all of the code used other than as permitted herein. If you modify file(s)
-* with this exception, you may extend this exception to your version of the
-* file(s), but you are not obligated to do so. If you do not wish to do so,
-* delete this exception statement from your version. If you delete this
-* exception statement from all source files in the program, then also delete
-* it in the license file.
-*/
-
-/* @file diskloc.h
-
- Storage subsystem management.
- Lays out our datafiles on disk, manages disk space.
-*/
-
-#pragma once
-
-#include <boost/functional/hash.hpp>
-#include <cstdint>
-
-#include "mongo/db/jsobj.h"
-#include "mongo/db/record_id.h"
-
-namespace mongo {
-
-template <class Version>
-class BtreeBucket;
-
-#pragma pack(1)
-/** represents a disk location/offset on disk in a database. 64 bits.
- it is assumed these will be passed around by value a lot so don't do anything to make them large
- (such as adding a virtual function)
- */
-class DiskLoc {
- // this will be volume, file #, etc. but is a logical value could be anything depending on
- // storage engine
- int _a;
- int ofs;
-
-public:
- enum SentinelValues {
- /* note NullOfs is different. todo clean up. see refs to NullOfs in code - use is valid but
- * outside DiskLoc context so confusing as-is. */
- NullOfs = -1,
-
- // Caps the number of files that may be allocated in a database, allowing about 32TB of
- // data per db. Note that the DiskLoc and DiskLoc56Bit types supports more files than
- // this value, as does the data storage format.
- MaxFiles = 16000,
-
- // How invalid DiskLocs are represented in RecordIds.
- InvalidRepr = -2LL,
- };
-
- DiskLoc(int a, int Ofs) : _a(a), ofs(Ofs) {}
- DiskLoc() {
- Null();
- }
-
- // Minimum allowed DiskLoc. No MmapV1RecordHeader may begin at this location because file and
- // extent headers must precede Records in a file.
- static DiskLoc min() {
- return DiskLoc(0, 0);
- }
-
- // Maximum allowed DiskLoc.
- // No MmapV1RecordHeader may begin at this location because the minimum size of a
- // MmapV1RecordHeader is larger than one byte. Also, the last bit is not able to be used
- // because mmapv1 uses that for "used".
- static DiskLoc max() {
- return DiskLoc(0x7fffffff, 0x7ffffffe);
- }
-
- bool questionable() const {
- return ofs < -1 || _a < -1 || _a > 524288;
- }
-
- bool isNull() const {
- return _a == -1;
- }
- DiskLoc& Null() {
- _a = -1;
- /* note NullOfs is different. todo clean up. see refs to NullOfs in code - use is valid but
- * outside DiskLoc context so confusing as-is. */
- ofs = 0;
- return *this;
- }
- void assertOk() const {
- verify(!isNull());
- }
- DiskLoc& setInvalid() {
- _a = -2;
- ofs = 0;
- return *this;
- }
- bool isValid() const {
- return _a != -2;
- }
-
- std::string toString() const {
- if (isNull())
- return "null";
- std::stringstream ss;
- ss << _a << ':' << std::hex << ofs;
- return ss.str();
- }
-
- BSONObj toBSONObj() const {
- return BSON("file" << _a << "offset" << ofs);
- }
-
- int a() const {
- return _a;
- }
-
- int& GETOFS() {
- return ofs;
- }
- int getOfs() const {
- return ofs;
- }
- void set(int a, int b) {
- _a = a;
- ofs = b;
- }
-
- void inc(int amt) {
- verify(!isNull());
- ofs += amt;
- }
-
- bool sameFile(DiskLoc b) {
- return _a == b._a;
- }
-
- bool operator==(const DiskLoc& b) const {
- return _a == b._a && ofs == b.ofs;
- }
- bool operator!=(const DiskLoc& b) const {
- return !(*this == b);
- }
- int compare(const DiskLoc& b) const {
- int x = _a - b._a;
- if (x)
- return x;
- return ofs - b.ofs;
- }
-
- static DiskLoc fromRecordId(RecordId id) {
- if (id.isNormal())
- return DiskLoc((id.repr() >> 32), uint32_t(id.repr()));
-
- if (id.isNull())
- return DiskLoc();
-
- if (id == RecordId::max())
- return DiskLoc::max();
-
- if (id == RecordId::min())
- return DiskLoc::min();
-
- dassert(id.repr() == InvalidRepr);
- return DiskLoc().setInvalid();
- }
-
- RecordId toRecordId() const {
- if (_a >= 0) {
- if (*this == DiskLoc::min())
- return RecordId::min();
-
- if (*this == DiskLoc::max())
- return RecordId::max();
-
- return RecordId(uint64_t(_a) << 32 | uint32_t(ofs));
- }
-
- if (isNull())
- return RecordId();
-
- dassert(!isValid());
- return RecordId(InvalidRepr);
- }
-};
-#pragma pack()
-
-inline bool operator<(const DiskLoc& rhs, const DiskLoc& lhs) {
- return rhs.compare(lhs) < 0;
-}
-inline bool operator<=(const DiskLoc& rhs, const DiskLoc& lhs) {
- return rhs.compare(lhs) <= 0;
-}
-inline bool operator>(const DiskLoc& rhs, const DiskLoc& lhs) {
- return rhs.compare(lhs) > 0;
-}
-inline bool operator>=(const DiskLoc& rhs, const DiskLoc& lhs) {
- return rhs.compare(lhs) >= 0;
-}
-
-inline std::ostream& operator<<(std::ostream& stream, const DiskLoc& loc) {
- return stream << loc.toString();
-}
-
-} // namespace mongo
diff --git a/src/mongo/db/storage/mmap_v1/dur.cpp b/src/mongo/db/storage/mmap_v1/dur.cpp
deleted file mode 100644
index 835f4302647..00000000000
--- a/src/mongo/db/storage/mmap_v1/dur.cpp
+++ /dev/null
@@ -1,917 +0,0 @@
-/**
- * Copyright (C) 2009-2014 MongoDB Inc.
- *
- * This program is free software: you can redistribute it and/or modify
- * it under the terms of the GNU Affero General Public License, version 3,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU Affero General Public License for more details.
- *
- * You should have received a copy of the GNU Affero General Public License
- * along with this program. If not, see <http://www.gnu.org/licenses/>.
- *
- * As a special exception, the copyright holders give permission to link the
- * code of portions of this program with the OpenSSL library under certain
- * conditions as described in each individual source file and distribute
- * linked combinations including the program with the OpenSSL library. You
- * must comply with the GNU Affero General Public License in all respects for
- * all of the code used other than as permitted herein. If you modify file(s)
- * with this exception, you may extend this exception to your version of the
- * file(s), but you are not obligated to do so. If you do not wish to do so,
- * delete this exception statement from your version. If you delete this
- * exception statement from all source files in the program, then also delete
- * it in the license file.
- */
-
-/*
- phases:
-
- PREPLOGBUFFER
- we will build an output buffer ourself and then use O_DIRECT
- we could be in read lock for this
- for very large objects write directly to redo log in situ?
- WRITETOJOURNAL
- we could be unlocked (the main db lock that is...) for this, with sufficient care, but there
- is some complexity have to handle falling behind which would use too much ram (going back
- into a read lock would suffice to stop that). for now (1.7.5/1.8.0) we are in read lock which
- is not ideal.
- WRITETODATAFILES
- actually write to the database data files in this phase. currently done by memcpy'ing the
- writes back to the non-private MMF. alternatively one could write to the files the
- traditional way; however the way our storage engine works that isn't any faster (actually
- measured a tiny bit slower).
- REMAPPRIVATEVIEW
- we could in a write lock quickly flip readers back to the main view, then stay in read lock
- and do our real remapping. with many files (e.g., 1000), remapping could be time consuming
- (several ms), so we don't want to be too frequent. there could be a slow down immediately
- after remapping as fresh copy-on-writes for commonly written pages will
- be required. so doing these remaps fractionally is helpful.
-
- mutexes:
-
- READLOCK dbMutex (big 'R')
- LOCK groupCommitMutex
- PREPLOGBUFFER()
- READLOCK mmmutex
- commitJob.reset()
- UNLOCK dbMutex // now other threads can write
- WRITETOJOURNAL()
- WRITETODATAFILES()
- UNLOCK mmmutex
- UNLOCK groupCommitMutex
-
- every Nth groupCommit, at the end, we REMAPPRIVATEVIEW() at the end of the work. because of
- that we are in W lock for that groupCommit, which is nonideal of course.
-
- @see https://docs.google.com/drawings/edit?id=1TklsmZzm7ohIZkwgeK6rMvsdaR13KjtJYMsfLr175Zc
-*/
-
-#define MONGO_LOG_DEFAULT_COMPONENT ::mongo::logger::LogComponent::kJournal
-
-#include "mongo/platform/basic.h"
-
-#include "mongo/db/storage/mmap_v1/dur.h"
-
-#include <iomanip>
-#include <utility>
-
-#include "mongo/base/static_assert.h"
-#include "mongo/db/client.h"
-#include "mongo/db/commands/server_status.h"
-#include "mongo/db/concurrency/lock_state.h"
-#include "mongo/db/operation_context.h"
-#include "mongo/db/storage/mmap_v1/aligned_builder.h"
-#include "mongo/db/storage/mmap_v1/commit_notifier.h"
-#include "mongo/db/storage/mmap_v1/dur_commitjob.h"
-#include "mongo/db/storage/mmap_v1/dur_journal.h"
-#include "mongo/db/storage/mmap_v1/dur_journal_writer.h"
-#include "mongo/db/storage/mmap_v1/dur_recover.h"
-#include "mongo/db/storage/mmap_v1/dur_stats.h"
-#include "mongo/db/storage/mmap_v1/durable_mapped_file.h"
-#include "mongo/db/storage/mmap_v1/mmap_v1_options.h"
-#include "mongo/db/storage/storage_options.h"
-#include "mongo/stdx/condition_variable.h"
-#include "mongo/stdx/mutex.h"
-#include "mongo/stdx/thread.h"
-#include "mongo/util/clock_source.h"
-#include "mongo/util/exit.h"
-#include "mongo/util/log.h"
-#include "mongo/util/timer.h"
-
-namespace mongo {
-
-using std::endl;
-using std::fixed;
-using std::hex;
-using std::set;
-using std::setprecision;
-using std::setw;
-using std::string;
-using std::stringstream;
-
-namespace dur {
-
-namespace {
-
-// Used to activate the flush thread
-stdx::mutex flushMutex;
-stdx::condition_variable flushRequested;
-
-// This is waited on for getlasterror acknowledgements. It means that data has been written to
-// the journal, but not necessarily applied to the shared view, so it is all right to
-// acknowledge the user operation, but NOT all right to delete the journal files for example.
-CommitNotifier commitNotify;
-
-// This is waited on for complete flush. It means that data has been both written to journal
-// and applied to the shared view, so it is allowed to delete the journal files. Used for
-// fsync:true, close DB, shutdown acknowledgements.
-CommitNotifier applyToDataFilesNotify;
-
-// When set, the flush thread will exit
-AtomicUInt32 shutdownRequested(0);
-
-enum {
- // How many commit cycles to do before considering doing a remap
- NumCommitsBeforeRemap = 10,
-
- // How many outstanding journal flushes should be allowed before applying writer back
- // pressure. Size of 1 allows two journal blocks to be in the process of being written -
- // one on the journal writer's buffer and one blocked waiting to be picked up.
- NumAsyncJournalWrites = 1,
-};
-
-// Remap loop state
-unsigned remapFileToStartAt;
-
-// How frequently to reset the durability statistics
-enum { DurStatsResetIntervalMillis = 3 * 1000 };
-
-// Size sanity checks
-MONGO_STATIC_ASSERT(UncommittedBytesLimit > BSONObjMaxInternalSize * 3);
-MONGO_STATIC_ASSERT(sizeof(void*) == 4 || UncommittedBytesLimit > BSONObjMaxInternalSize * 6);
-
-
-/**
- * MMAP V1 durability server status section.
- */
-class DurSSS : public ServerStatusSection {
-public:
- DurSSS() : ServerStatusSection("dur") {}
-
- virtual bool includeByDefault() const {
- return true;
- }
-
- virtual BSONObj generateSection(OperationContext* opCtx,
- const BSONElement& configElement) const {
- if (!getDur().isDurable()) {
- return BSONObj();
- }
-
- return dur::stats.asObj();
- }
-
-} durSSS;
-
-
-/**
- * A no-op durability interface. Used for the case when journaling is not enabled.
- */
-class NonDurableImpl : public DurableInterface {
-public:
- NonDurableImpl() {}
-
- // DurableInterface virtual methods
- virtual void* writingPtr(void* x, unsigned len) {
- return x;
- }
- virtual void declareWriteIntent(void*, unsigned) {}
- virtual void declareWriteIntents(const std::vector<std::pair<void*, unsigned>>& intents) {}
- virtual void createdFile(const std::string& filename, unsigned long long len) {}
- virtual bool waitUntilDurable() {
- return false;
- }
- virtual bool commitNow(OperationContext* opCtx) {
- return false;
- }
- virtual bool commitIfNeeded() {
- return false;
- }
- virtual void syncDataAndTruncateJournal(OperationContext* opCtx) {}
- virtual bool isDurable() const {
- return false;
- }
- virtual void closingFileNotification() {}
- virtual void commitAndStopDurThread(OperationContext* opCtx) {}
-};
-
-
-/**
- * The actual durability interface, when journaling is enabled.
- */
-class DurableImpl : public DurableInterface {
-public:
- DurableImpl() {}
-
- // DurableInterface virtual methods
- virtual void declareWriteIntents(const std::vector<std::pair<void*, unsigned>>& intents);
- virtual void createdFile(const std::string& filename, unsigned long long len);
- virtual bool waitUntilDurable();
- virtual bool commitNow(OperationContext* opCtx);
- virtual bool commitIfNeeded();
- virtual void syncDataAndTruncateJournal(OperationContext* opCtx);
- virtual bool isDurable() const {
- return true;
- }
- virtual void closingFileNotification();
- virtual void commitAndStopDurThread(OperationContext* opCtx);
-
- void start(ClockSource* cs, int64_t serverStartMs);
-
-private:
- stdx::thread _durThreadHandle;
-};
-
-
-/**
- * Diagnostic to check that the private view and the non-private view are in sync after
- * applying the journal changes. This function is very slow and only runs when paranoid checks
- * are enabled.
- *
- * Must be called under at least S flush lock to ensure that there are no concurrent writes
- * happening.
- */
-void debugValidateFileMapsMatch(const DurableMappedFile* mmf) {
- const unsigned char* p = (const unsigned char*)mmf->getView();
- const unsigned char* w = (const unsigned char*)mmf->view_write();
-
- // Ignore pre-allocated files that are not fully created yet
- if (!p || !w) {
- return;
- }
-
- if (memcmp(p, w, (unsigned)mmf->length()) == 0) {
- return;
- }
-
- unsigned low = 0xffffffff;
- unsigned high = 0;
-
- log() << "DurParanoid mismatch in " << mmf->filename();
-
- int logged = 0;
- unsigned lastMismatch = 0xffffffff;
-
- for (unsigned i = 0; i < mmf->length(); i++) {
- if (p[i] != w[i]) {
- if (lastMismatch != 0xffffffff && lastMismatch + 1 != i) {
- // Separate blocks of mismatches
- log() << std::endl;
- }
-
- lastMismatch = i;
-
- if (++logged < 60) {
- if (logged == 1) {
- // For .ns files to find offset in record
- log() << "ofs % 628 = 0x" << hex << (i % 628) << endl;
- }
-
- stringstream ss;
- ss << "mismatch ofs:" << hex << i << "\tfilemap:" << setw(2) << (unsigned)w[i]
- << "\tprivmap:" << setw(2) << (unsigned)p[i];
-
- if (p[i] > 32 && p[i] <= 126) {
- ss << '\t' << p[i];
- }
-
- log() << ss.str() << endl;
- }
-
- if (logged == 60) {
- log() << "..." << endl;
- }
-
- if (i < low)
- low = i;
- if (i > high)
- high = i;
- }
- }
-
- if (low != 0xffffffff) {
- std::stringstream ss;
- ss << "journal error warning views mismatch " << mmf->filename() << ' ' << hex << low
- << ".." << high << " len:" << high - low + 1;
-
- log() << ss.str() << endl;
- log() << "priv loc: " << (void*)(p + low) << ' ' << endl;
-
- severe() << "Written data does not match in-memory view. Missing WriteIntent?";
- MONGO_UNREACHABLE;
- }
-}
-
-
-/**
- * Main code of the remap private view function.
- */
-void remapPrivateViewImpl(OperationContext* opCtx, double fraction) {
- LOG(4) << "journal REMAPPRIVATEVIEW" << endl;
-
-// There is no way that the set of files can change while we are in this method, because
-// we hold the flush lock in X mode. For files to go away, a database needs to be dropped,
-// which means acquiring the flush lock in at least IX mode.
-//
-// However, the record fetcher logic unfortunately operates without any locks and on
-// Windows and Solaris remap is not atomic and there is a window where the record fetcher
-// might get an access violation. That's why we acquire the mongo files mutex here in X
-// mode and the record fetcher takes in in S-mode (see MmapV1RecordFetcher for more
-// detail).
-//
-// See SERVER-5723 for performance improvement.
-// See SERVER-5680 to see why this code is necessary on Windows.
-// See SERVER-8795 to see why this code is necessary on Solaris.
-#if defined(_WIN32) || defined(__sun)
- LockMongoFilesExclusive lk(opCtx);
-#else
- LockMongoFilesShared lk(opCtx);
-#endif
-
- std::set<MongoFile*>& files = MongoFile::getAllFiles();
-
- const unsigned sz = files.size();
- if (sz == 0) {
- return;
- }
-
- unsigned ntodo = (unsigned)(sz * fraction);
- if (ntodo < 1)
- ntodo = 1;
- if (ntodo > sz)
- ntodo = sz;
-
- const set<MongoFile*>::iterator b = files.begin();
- const set<MongoFile*>::iterator e = files.end();
- set<MongoFile*>::iterator i = b;
-
- // Skip to our starting position as remembered from the last remap cycle
- for (unsigned x = 0; x < remapFileToStartAt; x++) {
- i++;
- if (i == e)
- i = b;
- }
-
- // Mark where to start on the next cycle
- const unsigned startedAt = remapFileToStartAt;
- remapFileToStartAt = (remapFileToStartAt + ntodo) % sz;
-
- Timer t;
-
- for (unsigned x = 0; x < ntodo; x++) {
- if ((*i)->isDurableMappedFile()) {
- DurableMappedFile* const mmf = (DurableMappedFile*)*i;
-
- // Sanity check that the contents of the shared and the private view match so we
- // don't end up overwriting data.
- if (mmapv1GlobalOptions.journalOptions & MMAPV1Options::JournalParanoid) {
- debugValidateFileMapsMatch(mmf);
- }
-
- if (mmf->willNeedRemap()) {
- mmf->remapThePrivateView(opCtx);
- }
-
- i++;
-
- if (i == e)
- i = b;
- }
- }
-
- LOG(3) << "journal REMAPPRIVATEVIEW done startedAt: " << startedAt << " n:" << ntodo << ' '
- << t.millis() << "ms";
-}
-
-
-// One instance of each durability interface
-DurableImpl durableImpl;
-NonDurableImpl nonDurableImpl;
-
-// Notified when we commit to the journal.
-static JournalListener* journalListener = &NoOpJournalListener::instance;
-// Protects journalListener.
-static stdx::mutex journalListenerMutex;
-
-} // namespace
-
-
-// Declared in dur_preplogbuffer.cpp
-void PREPLOGBUFFER(JSectHeader& outHeader,
- AlignedBuilder& outBuffer,
- ClockSource* cs,
- int64_t serverStartMs);
-
-// Declared in dur_journal.cpp
-boost::filesystem::path getJournalDir();
-void preallocateFiles();
-
-// Forward declaration
-static void durThread(ClockSource* cs, int64_t serverStartMs);
-
-// Durability activity statistics
-Stats stats;
-
-// Reference to the write intents tracking object
-CommitJob commitJob;
-
-// Reference to the active durability interface
-DurableInterface* DurableInterface::_impl(&nonDurableImpl);
-
-
-//
-// Stats
-//
-
-Stats::Stats() : _currIdx(0) {}
-
-void Stats::reset() {
- // Seal the current metrics
- _stats[_currIdx]._durationMillis = _stats[_currIdx].getCurrentDurationMillis();
-
- // Use a new metric
- const unsigned newCurrIdx = (_currIdx + 1) % (sizeof(_stats) / sizeof(_stats[0]));
- _stats[newCurrIdx].reset();
-
- _currIdx = newCurrIdx;
-}
-
-BSONObj Stats::asObj() const {
- // Use the previous statistic
- const S& stats = _stats[(_currIdx - 1) % (sizeof(_stats) / sizeof(_stats[0]))];
-
- BSONObjBuilder builder;
- stats._asObj(&builder);
-
- return builder.obj();
-}
-
-void Stats::S::reset() {
- memset(this, 0, sizeof(*this));
- _startTimeMicros = curTimeMicros64();
-}
-
-std::string Stats::S::_CSVHeader() const {
- return "cmts\t jrnMB\t wrDFMB\t cIWLk\t early\t prpLgB\t wrToJ\t wrToDF\t rmpPrVw";
-}
-
-std::string Stats::S::_asCSV() const {
- stringstream ss;
- ss << setprecision(2) << _commits << '\t' << _journaledBytes / 1000000.0 << '\t'
- << _writeToDataFilesBytes / 1000000.0 << '\t' << _commitsInWriteLock << '\t' << 0 << '\t'
- << (unsigned)(_prepLogBufferMicros / 1000) << '\t'
- << (unsigned)(_writeToJournalMicros / 1000) << '\t'
- << (unsigned)(_writeToDataFilesMicros / 1000) << '\t'
- << (unsigned)(_remapPrivateViewMicros / 1000) << '\t' << (unsigned)(_commitsMicros / 1000)
- << '\t' << (unsigned)(_commitsInWriteLockMicros / 1000) << '\t';
-
- return ss.str();
-}
-
-void Stats::S::_asObj(BSONObjBuilder* builder) const {
- BSONObjBuilder& b = *builder;
- b << "commits" << _commits << "journaledMB" << _journaledBytes / 1000000.0
- << "writeToDataFilesMB" << _writeToDataFilesBytes / 1000000.0 << "compression"
- << _journaledBytes / (_uncompressedBytes + 1.0) << "commitsInWriteLock" << _commitsInWriteLock
- << "earlyCommits" << 0 << "timeMs"
- << BSON("dt" << _durationMillis << "prepLogBuffer" << (unsigned)(_prepLogBufferMicros / 1000)
- << "writeToJournal"
- << (unsigned)(_writeToJournalMicros / 1000)
- << "writeToDataFiles"
- << (unsigned)(_writeToDataFilesMicros / 1000)
- << "remapPrivateView"
- << (unsigned)(_remapPrivateViewMicros / 1000)
- << "commits"
- << (unsigned)(_commitsMicros / 1000)
- << "commitsInWriteLock"
- << (unsigned)(_commitsInWriteLockMicros / 1000));
-
- if (storageGlobalParams.journalCommitIntervalMs.load() != 0) {
- b << "journalCommitIntervalMs" << storageGlobalParams.journalCommitIntervalMs.load();
- }
-}
-
-
-//
-// DurableInterface
-//
-
-DurableInterface::DurableInterface() {}
-
-DurableInterface::~DurableInterface() {}
-
-
-//
-// DurableImpl
-//
-
-bool DurableImpl::commitNow(OperationContext* opCtx) {
- CommitNotifier::When when = commitNotify.now();
-
- AutoYieldFlushLockForMMAPV1Commit flushLockYield(opCtx->lockState());
-
- // There is always just one waiting anyways
- flushRequested.notify_one();
-
- // commitNotify.waitFor ensures that whatever was scheduled for journaling before this
- // call has been persisted to the journal file. This does not mean that this data has been
- // applied to the shared view yet though, that's why we wait for applyToDataFilesNotify.
- applyToDataFilesNotify.waitFor(when);
-
- return true;
-}
-
-bool DurableImpl::waitUntilDurable() {
- commitNotify.awaitBeyondNow();
- return true;
-}
-
-void DurableImpl::createdFile(const std::string& filename, unsigned long long len) {
- std::shared_ptr<DurOp> op(new FileCreatedOp(filename, len));
- commitJob.noteOp(op);
-}
-
-
-void DurableImpl::declareWriteIntents(const std::vector<std::pair<void*, unsigned>>& intents) {
- typedef std::vector<std::pair<void*, unsigned>> Intents;
- stdx::lock_guard<SimpleMutex> lk(commitJob.groupCommitMutex);
- for (Intents::const_iterator it(intents.begin()), end(intents.end()); it != end; ++it) {
- commitJob.note(it->first, it->second);
- }
-}
-
-bool DurableImpl::commitIfNeeded() {
- if (MONGO_likely(commitJob.bytes() < UncommittedBytesLimit)) {
- return false;
- }
-
- // Just wake up the flush thread
- flushRequested.notify_one();
- return true;
-}
-
-void DurableImpl::syncDataAndTruncateJournal(OperationContext* opCtx) {
- invariant(opCtx->lockState()->isW());
-
- // Once this returns, all the outstanding journal has been applied to the data files and
- // so it's safe to do the flushAll/journalCleanup below.
- commitNow(opCtx);
-
- // Flush the shared view to disk.
- MongoFile::flushAll(opCtx, true);
-
- // Once the shared view has been flushed, we do not need the journal files anymore.
- journalCleanup(true);
-
- // Double check post-conditions
- invariant(!haveJournalFiles());
-}
-
-void DurableImpl::closingFileNotification() {
- // File is closing while there are unwritten changes
- invariant(!commitJob.hasWritten(),
- "journal warning files are closing outside locks with writes pending");
-}
-
-void DurableImpl::commitAndStopDurThread(OperationContext* opCtx) {
- CommitNotifier::When when = commitNotify.now();
-
- // There is always just one waiting anyways
- flushRequested.notify_one();
-
- // commitNotify.waitFor ensures that whatever was scheduled for journaling before this
- // call has been persisted to the journal file. This does not mean that this data has been
- // applied to the shared view yet though, that's why we wait for applyToDataFilesNotify.
- applyToDataFilesNotify.waitFor(when);
-
- // Flush the shared view to disk.
- MongoFile::flushAll(opCtx, true);
-
- // Once the shared view has been flushed, we do not need the journal files anymore.
- journalCleanup(true);
-
- // Double check post-conditions
- invariant(!haveJournalFiles());
-
- shutdownRequested.store(1);
-
- // Wait for the durability thread to terminate
- log() << "Terminating durability thread ...";
- _durThreadHandle.join();
-}
-
-void DurableImpl::start(ClockSource* cs, int64_t serverStartMs) {
- // Start the durability thread
- stdx::thread t(durThread, cs, serverStartMs);
- _durThreadHandle.swap(t);
-}
-
-
-/**
- * Remaps the private view from the shared view so that it does not consume too much
- * copy-on-write/swap space. Must only be called after the in-memory journal has been flushed
- * to disk and applied on top of the shared view.
- *
- * @param fraction Value between (0, 1] indicating what fraction of the memory to remap.
- * Remapping too much or too frequently incurs copy-on-write page fault cost.
- */
-static void remapPrivateView(OperationContext* opCtx, double fraction) {
- // Remapping private views must occur after WRITETODATAFILES otherwise we wouldn't see any
- // newly written data on reads.
- invariant(!commitJob.hasWritten());
-
- try {
- Timer t;
- remapPrivateViewImpl(opCtx, fraction);
- stats.curr()->_remapPrivateViewMicros += t.micros();
-
- LOG(4) << "remapPrivateView end";
- return;
- } catch (DBException& e) {
- severe() << "dbexception in remapPrivateView causing immediate shutdown: " << redact(e);
- } catch (std::ios_base::failure& e) {
- severe() << "ios_base exception in remapPrivateView causing immediate shutdown: "
- << redact(e.what());
- } catch (std::bad_alloc& e) {
- severe() << "bad_alloc exception in remapPrivateView causing immediate shutdown: "
- << redact(e.what());
- } catch (std::exception& e) {
- severe() << "exception in remapPrivateView causing immediate shutdown: "
- << redact(e.what());
- } catch (...) {
- severe() << "unknown exception in remapPrivateView causing immediate shutdown: ";
- }
-
- MONGO_UNREACHABLE;
-}
-
-
-/**
- * The main durability thread loop. There is a single instance of this function running.
- */
-static void durThread(ClockSource* cs, int64_t serverStartMs) {
- Client::initThread("durability");
-
- log() << "Durability thread started";
-
- bool samePartition = true;
- try {
- const std::string dbpathDir = boost::filesystem::path(storageGlobalParams.dbpath).string();
- samePartition = onSamePartition(getJournalDir().string(), dbpathDir);
- } catch (...) {
- }
-
- // Spawn the journal writer thread
- JournalWriter journalWriter(&commitNotify, &applyToDataFilesNotify, NumAsyncJournalWrites);
- journalWriter.start();
-
- // Used as an estimate of how much / how fast to remap
- uint64_t commitCounter(0);
- uint64_t estimatedPrivateMapSize(0);
- uint64_t remapLastTimestamp(0);
-
- while (shutdownRequested.loadRelaxed() == 0) {
- unsigned ms = storageGlobalParams.journalCommitIntervalMs.load();
- if (ms == 0) {
- ms = samePartition ? 100 : 30;
- }
-
- // +1 so it never goes down to zero
- const int64_t oneThird = (ms / 3) + 1;
-
- // Reset the stats based on the reset interval
- if (stats.curr()->getCurrentDurationMillis() > DurStatsResetIntervalMillis) {
- stats.reset();
- }
-
- try {
- stdx::unique_lock<stdx::mutex> lock(flushMutex);
-
- for (unsigned i = 0; i <= 2; i++) {
- if (stdx::cv_status::no_timeout ==
- flushRequested.wait_for(lock, Milliseconds(oneThird).toSystemDuration())) {
- // Someone forced a flush
- break;
- }
-
- if (commitNotify.nWaiting()) {
- // One or more getLastError j:true is pending
- break;
- }
-
- if (commitJob.bytes() > UncommittedBytesLimit / 2) {
- // The number of written bytes is growing
- break;
- }
- }
-
- // The commit logic itself
- LOG(4) << "groupCommit begin";
-
- Timer t;
-
- const ServiceContext::UniqueOperationContext opCtxPtr = cc().makeOperationContext();
- OperationContext& opCtx = *opCtxPtr;
- AutoAcquireFlushLockForMMAPV1Commit autoFlushLock(opCtx.lockState());
-
- // We need to snapshot the commitNumber after the flush lock has been obtained,
- // because at this point we know that we have a stable snapshot of the data.
- const CommitNotifier::When commitNumber(commitNotify.now());
-
- LOG(4) << "Processing commit number " << commitNumber;
-
- if (!commitJob.hasWritten()) {
- // We do not need the journal lock anymore. Free it here, for the really
- // unlikely possibility that the writeBuffer command below blocks.
- autoFlushLock.release();
-
- // getlasterror request could have came after the data was already committed.
- // No need to call committingReset though, because we have not done any
- // writes (hasWritten == false).
- JournalWriter::Buffer* const buffer = journalWriter.newBuffer();
- buffer->setNoop();
- buffer->journalListenerToken = getJournalListener()->getToken();
-
- journalWriter.writeBuffer(buffer, commitNumber);
- } else {
- // This copies all the in-memory changes into the journal writer's buffer.
- JournalWriter::Buffer* const buffer = journalWriter.newBuffer();
- PREPLOGBUFFER(buffer->getHeader(), buffer->getBuilder(), cs, serverStartMs);
-
- estimatedPrivateMapSize += commitJob.bytes();
- commitCounter++;
-
- // Now that the write intents have been copied to the buffer, the commit job is
- // free to be reused. We need to reset the commit job's contents while under
- // the S flush lock, because otherwise someone might have done a write and this
- // would wipe out their changes without ever being committed.
- commitJob.committingReset();
-
- double systemMemoryPressurePercentage =
- ProcessInfo::getSystemMemoryPressurePercentage();
-
- // Now that the in-memory modifications have been collected, we can potentially
- // release the flush lock if remap is not necessary.
- // When we remap due to memory pressure, we look at two criteria
- // 1. If the amount of 4k pages touched exceeds 512 MB,
- // a reasonable estimate of memory pressure on Linux.
- // 2. Check if the amount of free memory on the machine is running low,
- // since #1 is underestimates the memory pressure on Windows since
- // commits in 64MB chunks.
- const bool shouldRemap = (estimatedPrivateMapSize >= UncommittedBytesLimit) ||
- (systemMemoryPressurePercentage > 0.0) ||
- (commitCounter % NumCommitsBeforeRemap == 0) ||
- (mmapv1GlobalOptions.journalOptions & MMAPV1Options::JournalAlwaysRemap);
-
- double remapFraction = 0.0;
-
- if (shouldRemap) {
- // We want to remap all private views about every 2 seconds. There could be
- // ~1000 views so we do a little each pass. There will be copy on write
- // faults after remapping, so doing a little bit at a time will avoid big
- // load spikes when the pages are touched.
- //
- // TODO: Instead of the time-based logic above, consider using ProcessInfo
- // and watching for getResidentSize to drop, which is more precise.
- remapFraction = (curTimeMicros64() - remapLastTimestamp) / 2000000.0;
-
- if (mmapv1GlobalOptions.journalOptions & MMAPV1Options::JournalAlwaysRemap) {
- remapFraction = 1;
- } else {
- // We don't want to get close to the UncommittedBytesLimit
- const double remapMemFraction =
- estimatedPrivateMapSize / ((double)UncommittedBytesLimit);
-
- remapFraction = std::max(remapMemFraction, remapFraction);
-
- remapFraction = std::max(systemMemoryPressurePercentage, remapFraction);
- }
- } else {
- LOG(4) << "Early release flush lock";
-
- // We will not be doing a remap so drop the flush lock. That way we will be
- // doing the journal I/O outside of lock, so other threads can proceed.
- invariant(!shouldRemap);
- autoFlushLock.release();
- }
-
- buffer->journalListenerToken = getJournalListener()->getToken();
- // Request async I/O to the journal. This may block.
- journalWriter.writeBuffer(buffer, commitNumber);
-
- // Data has now been written to the shared view. If remap was requested, we
- // would still be holding the S flush lock here, so just upgrade it and
- // perform the remap.
- if (shouldRemap) {
- // Need to wait for the previously scheduled journal writes to complete
- // before any remap is attempted.
- journalWriter.flush();
- journalWriter.assertIdle();
-
- // Upgrading the journal lock to flush stops all activity on the system,
- // because we will be remapping memory and we don't want readers to be
- // accessing it. Technically this step could be avoided on systems, which
- // support atomic remap.
- autoFlushLock.upgradeFlushLockToExclusive();
- remapPrivateView(opCtxPtr.get(), remapFraction);
-
- autoFlushLock.release();
-
- // Reset the private map estimate outside of the lock
- estimatedPrivateMapSize = 0;
- remapLastTimestamp = curTimeMicros64();
-
- stats.curr()->_commitsInWriteLock++;
- stats.curr()->_commitsInWriteLockMicros += t.micros();
- }
- }
-
- stats.curr()->_commits++;
- stats.curr()->_commitsMicros += t.micros();
-
- LOG(4) << "groupCommit end";
- } catch (DBException& e) {
- severe() << "dbexception in durThread causing immediate shutdown: " << redact(e);
- MONGO_UNREACHABLE;
- } catch (std::ios_base::failure& e) {
- severe() << "ios_base exception in durThread causing immediate shutdown: "
- << redact(e.what());
- MONGO_UNREACHABLE;
- } catch (std::bad_alloc& e) {
- severe() << "bad_alloc exception in durThread causing immediate shutdown: "
- << redact(e.what());
- MONGO_UNREACHABLE;
- } catch (std::exception& e) {
- severe() << "exception in durThread causing immediate shutdown: " << redact(e.what());
- MONGO_UNREACHABLE;
- } catch (...) {
- severe() << "unhandled exception in durThread causing immediate shutdown";
- MONGO_UNREACHABLE;
- }
- }
-
- // Stops the journal thread and ensures everything was written
- invariant(!commitJob.hasWritten());
-
- journalWriter.flush();
- journalWriter.shutdown();
-
- log() << "Durability thread stopped";
-}
-
-
-/**
- * Invoked at server startup. Recovers the database by replaying journal files and then
- * starts the durability thread.
- */
-void startup(ClockSource* cs, int64_t serverStartMs) {
- if (!storageGlobalParams.dur) {
- return;
- }
-
- journalMakeDir(cs, serverStartMs);
-
- try {
- replayJournalFilesAtStartup();
- } catch (DBException& e) {
- severe() << "dbexception during recovery: " << redact(e);
- throw;
- } catch (std::exception& e) {
- severe() << "std::exception during recovery: " << redact(e.what());
- throw;
- } catch (...) {
- severe() << "exception during recovery";
- throw;
- }
-
- preallocateFiles();
-
- durableImpl.start(cs, serverStartMs);
- DurableInterface::_impl = &durableImpl;
-}
-
-void setJournalListener(JournalListener* jl) {
- stdx::unique_lock<stdx::mutex> lk(journalListenerMutex);
- journalListener = jl;
-}
-
-JournalListener* getJournalListener() {
- stdx::unique_lock<stdx::mutex> lk(journalListenerMutex);
- return journalListener;
-}
-
-} // namespace dur
-} // namespace mongo
diff --git a/src/mongo/db/storage/mmap_v1/dur.h b/src/mongo/db/storage/mmap_v1/dur.h
deleted file mode 100644
index 06b38255c25..00000000000
--- a/src/mongo/db/storage/mmap_v1/dur.h
+++ /dev/null
@@ -1,171 +0,0 @@
-/**
- * Copyright (C) 2009-2014 MongoDB Inc.
- *
- * This program is free software: you can redistribute it and/or modify
- * it under the terms of the GNU Affero General Public License, version 3,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU Affero General Public License for more details.
- *
- * You should have received a copy of the GNU Affero General Public License
- * along with this program. If not, see <http://www.gnu.org/licenses/>.
- *
- * As a special exception, the copyright holders give permission to link the
- * code of portions of this program with the OpenSSL library under certain
- * conditions as described in each individual source file and distribute
- * linked combinations including the program with the OpenSSL library. You
- * must comply with the GNU Affero General Public License in all respects for
- * all of the code used other than as permitted herein. If you modify file(s)
- * with this exception, you may extend this exception to your version of the
- * file(s), but you are not obligated to do so. If you do not wish to do so,
- * delete this exception statement from your version. If you delete this
- * exception statement from all source files in the program, then also delete
- * it in the license file.
- */
-
-#pragma once
-
-#include <string>
-#include <vector>
-
-#include "mongo/base/disallow_copying.h"
-#include "mongo/db/storage/journal_listener.h"
-
-namespace mongo {
-
-class ClockSource;
-class OperationContext;
-
-namespace dur {
-
-// a smaller limit is likely better on 32 bit
-const unsigned UncommittedBytesLimit = (sizeof(void*) == 4) ? 50 * 1024 * 1024 : 512 * 1024 * 1024;
-
-class DurableInterface {
- MONGO_DISALLOW_COPYING(DurableInterface);
-
-public:
- virtual ~DurableInterface();
-
- /**
- * Declare that a file has been created. Normally writes are applied only after journaling
- * for safety. But here the file is created first, and the journal will just replay the
- * creation if the create didn't happen due to a crash.
- */
- virtual void createdFile(const std::string& filename, unsigned long long len) = 0;
-
- // Declare write intents. Use these methods to declare "i'm about to write to x and it
- // should be logged for redo."
- //
- // Failure to call declare write intents is checked in MONGO_CONFIG_DEBUG_BUILD mode by
- // using a read only mapped view (i.e., you'll segfault if the code is covered in that
- // situation). The debug check doesn't verify that your length is correct though.
- virtual void declareWriteIntents(const std::vector<std::pair<void*, unsigned>>& intents) = 0;
-
- /** Wait for acknowledgement of the next group commit.
- @return true if --dur is on. There will be delay.
- @return false if --dur is off.
- */
- virtual bool waitUntilDurable() = 0;
-
- /** Commit immediately.
-
- Generally, you do not want to do this often, as highly granular committing may affect
- performance.
-
- Does not return until the commit is complete.
-
- You must be at least read locked when you call this. Ideally, you are not write locked
- and then read operations can occur concurrently.
-
- Do not use this. Use commitIfNeeded() instead.
-
- @return true if --dur is on.
- @return false if --dur is off. (in which case there is action)
- */
- virtual bool commitNow(OperationContext* opCtx) = 0;
-
- /** Commit if enough bytes have been modified. Current threshold is 50MB
-
- The idea is that long running write operations that don't yield
- (like creating an index) can call this whenever the db is in a sane state and it will
- prevent commits from growing too large.
- @return true if commited
- */
- virtual bool commitIfNeeded() = 0;
-
-
- /**
- * Called when a DurableMappedFile is closing. Asserts that there are no unwritten changes,
- * because that would mean journal replay on recovery would try to write to non-existent
- * files and fail.
- */
- virtual void closingFileNotification() = 0;
-
- /**
- * Invoked at clean shutdown time. Performs one last commit/flush and terminates the
- * flush thread.
- *
- * Must be called under the global X lock.
- */
- virtual void commitAndStopDurThread(OperationContext* opCtx) = 0;
-
- /**
- * Commits pending changes, flushes all changes to main data files, then removes the
- * journal.
- *
- * WARNING: Data *must* be in a crash-recoverable state when this is called and must
- * not be inside of a write unit of work.
- *
- * This is useful as a "barrier" to ensure that writes before this call will never go
- * through recovery and be applied to files that have had changes made after this call
- * applied.
- */
- virtual void syncDataAndTruncateJournal(OperationContext* opCtx) = 0;
-
- virtual bool isDurable() const = 0;
-
- static DurableInterface& getDur() {
- return *_impl;
- }
-
-protected:
- DurableInterface();
-
-private:
- friend void startup(ClockSource* cs, int64_t serverStartMs);
-
- static DurableInterface* _impl;
-};
-
-
-/**
- * Called during startup to startup the durability module.
- * Does nothing if storageGlobalParams.dur is false
- */
-void startup(ClockSource* cs, int64_t serverStartMs);
-
-// Sets a new JournalListener, which is used to alert the rest of the system about
-// journaled write progress.
-void setJournalListener(JournalListener* jl);
-
-// Expose the JournalListener, needed for the journal writer thread.
-JournalListener* getJournalListener();
-
-} // namespace dur
-
-
-/**
- * Provides a reference to the active durability interface.
- *
- * TODO: The only reason this is an inline function is that tests try to link it and fail if
- * the MMAP V1 engine is not included.
- */
-inline dur::DurableInterface& getDur() {
- return dur::DurableInterface::getDur();
-}
-
-} // namespace mongo
diff --git a/src/mongo/db/storage/mmap_v1/dur_commitjob.cpp b/src/mongo/db/storage/mmap_v1/dur_commitjob.cpp
deleted file mode 100644
index 6a8ca62f15d..00000000000
--- a/src/mongo/db/storage/mmap_v1/dur_commitjob.cpp
+++ /dev/null
@@ -1,119 +0,0 @@
-/* @file dur_commitjob.cpp */
-
-/**
-* Copyright (C) 2009 10gen Inc.
-*
-* This program is free software: you can redistribute it and/or modify
-* it under the terms of the GNU Affero General Public License, version 3,
-* as published by the Free Software Foundation.
-*
-* This program is distributed in the hope that it will be useful,
-* but WITHOUT ANY WARRANTY; without even the implied warranty of
-* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-* GNU Affero General Public License for more details.
-*
-* You should have received a copy of the GNU Affero General Public License
-* along with this program. If not, see <http://www.gnu.org/licenses/>.
-*
-* As a special exception, the copyright holders give permission to link the
-* code of portions of this program with the OpenSSL library under certain
-* conditions as described in each individual source file and distribute
-* linked combinations including the program with the OpenSSL library. You
-* must comply with the GNU Affero General Public License in all respects for
-* all of the code used other than as permitted herein. If you modify file(s)
-* with this exception, you may extend this exception to your version of the
-* file(s), but you are not obligated to do so. If you do not wish to do so,
-* delete this exception statement from your version. If you delete this
-* exception statement from all source files in the program, then also delete
-* it in the license file.
-*/
-
-#define MONGO_LOG_DEFAULT_COMPONENT ::mongo::logger::LogComponent::kStorage
-
-#include "mongo/platform/basic.h"
-
-#include "mongo/db/storage/mmap_v1/dur_commitjob.h"
-
-#include <iostream>
-
-#include "mongo/db/storage/mmap_v1/dur.h"
-#include "mongo/db/storage/mmap_v1/dur_stats.h"
-#include "mongo/db/storage/mmap_v1/durable_mapped_file.h"
-#include "mongo/util/log.h"
-#include "mongo/util/stacktrace.h"
-
-namespace mongo {
-
-using std::shared_ptr;
-using std::endl;
-using std::max;
-using std::min;
-
-namespace dur {
-
-void WriteIntent::absorb(const WriteIntent& other) {
- dassert(overlaps(other));
-
- void* newStart = min(start(), other.start());
- p = max(p, other.p);
- len = (char*)p - (char*)newStart;
-
- dassert(contains(other));
-}
-
-
-CommitJob::CommitJob() : _hasWritten(false), _lastNotedPos(0), _bytes(0) {}
-
-CommitJob::~CommitJob() {}
-
-void CommitJob::noteOp(shared_ptr<DurOp> p) {
- stdx::lock_guard<SimpleMutex> lk(groupCommitMutex);
- _hasWritten = true;
- _durOps.push_back(p);
-}
-
-void CommitJob::note(void* p, int len) {
- _hasWritten = true;
-
- if (!_alreadyNoted.checkAndSet(p, len)) {
- // Remember intent. We will journal it in a bit.
- _insertWriteIntent(p, len);
-
- // Round off to page address (4KB).
- const size_t x = ((size_t)p) & ~0xfff;
-
- if (x != _lastNotedPos) {
- _lastNotedPos = x;
-
- // Add the full page amount
- _bytes += (len + 4095) & ~0xfff;
-
- if (_bytes > UncommittedBytesLimit * 3) {
- _complains++;
-
- // Throttle logging
- if (_complains < 100 || (curTimeMillis64() - _lastComplainMs >= 60000)) {
- _lastComplainMs = curTimeMillis64();
-
- warning() << "DR102 too much data written uncommitted (" << _bytes / 1000000.0
- << "MB)";
-
- if (_complains < 10 || _complains % 10 == 0) {
- printStackTrace();
- }
- }
- }
- }
- }
-}
-
-void CommitJob::committingReset() {
- _hasWritten = false;
- _alreadyNoted.clear();
- _intents.clear();
- _durOps.clear();
- _bytes = 0;
-}
-
-} // namespace "dur"
-} // namespace "mongo"
diff --git a/src/mongo/db/storage/mmap_v1/dur_commitjob.h b/src/mongo/db/storage/mmap_v1/dur_commitjob.h
deleted file mode 100644
index 80d6cf900f5..00000000000
--- a/src/mongo/db/storage/mmap_v1/dur_commitjob.h
+++ /dev/null
@@ -1,224 +0,0 @@
-/**
- * Copyright (C) 2009-2014 MongoDB Inc.
- *
- * This program is free software: you can redistribute it and/or modify
- * it under the terms of the GNU Affero General Public License, version 3,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU Affero General Public License for more details.
- *
- * You should have received a copy of the GNU Affero General Public License
- * along with this program. If not, see <http://www.gnu.org/licenses/>.
- *
- * As a special exception, the copyright holders give permission to link the
- * code of portions of this program with the OpenSSL library under certain
- * conditions as described in each individual source file and distribute
- * linked combinations including the program with the OpenSSL library. You
- * must comply with the GNU Affero General Public License in all respects for
- * all of the code used other than as permitted herein. If you modify file(s)
- * with this exception, you may extend this exception to your version of the
- * file(s), but you are not obligated to do so. If you do not wish to do so,
- * delete this exception statement from your version. If you delete this
- * exception statement from all source files in the program, then also delete
- * it in the license file.
- */
-
-#pragma once
-
-
-#include "mongo/db/storage/mmap_v1/durop.h"
-#include "mongo/util/concurrency/mutex.h"
-
-namespace mongo {
-namespace dur {
-
-typedef std::vector<std::shared_ptr<DurOp>> DurOpsVector;
-
-/**
- * Declaration of an intent to write to a region of a memory mapped view. We store the end
- * rather than the start pointer to make operator < faster since that is heavily used in
- * set lookup.
- */
-struct WriteIntent {
- WriteIntent() : p(0) {}
- WriteIntent(void* a, unsigned b) : p((char*)a + b), len(b) {}
-
- void* start() const {
- return (char*)p - len;
- }
- void* end() const {
- return p;
- }
- unsigned length() const {
- return len;
- }
- bool operator<(const WriteIntent& rhs) const {
- return end() < rhs.end();
- }
-
- bool overlaps(const WriteIntent& rhs) const {
- return (start() <= rhs.end() && end() >= rhs.start());
- }
-
- bool contains(const WriteIntent& rhs) const {
- return (start() <= rhs.start() && end() >= rhs.end());
- }
-
- // merge into me:
- void absorb(const WriteIntent& other);
-
- friend std::ostream& operator<<(std::ostream& out, const WriteIntent& wi) {
- return (out << "p: " << wi.p << " end: " << wi.end() << " len: " << wi.len);
- }
-
-private:
- void* p; // intent to write up to p
- unsigned len; // up to this len
-};
-
-typedef std::vector<WriteIntent> WriteIntentsVector;
-
-
-/**
- * Bitmap to remember things we have already marked for journaling. False negatives are ok
- * if infrequent, since they impact performance.
- */
-template <int Prime>
-class Already {
- MONGO_DISALLOW_COPYING(Already);
-
-public:
- Already() {
- clear();
- }
-
- void clear() {
- memset(this, 0, sizeof(*this));
- }
-
- /**
- * Checks if we have Already recorded/indicated our write intent for this region of
- * memory and automatically upgrades the length if the length was shorter previously.
- *
- * @return true if already indicated.
- */
- bool checkAndSet(void* p, int len) {
- const unsigned x = hashPointer(p);
- std::pair<void*, int>& nd = nodes[x % Prime];
-
- if (nd.first == p) {
- if (nd.second < len) {
- nd.second = len;
- return false; // haven't indicated this len yet
- }
- return true; // already indicated
- }
-
- nd.first = p;
- nd.second = len;
- return false; // a new set
- }
-
-private:
- static unsigned hashPointer(void* v) {
- unsigned x = 0;
- unsigned char* p = (unsigned char*)&v;
- for (unsigned i = 0; i < sizeof(void*); i++) {
- x = x * 131 + p[i];
- }
- return x;
- }
-
- std::pair<void*, int> nodes[Prime];
-};
-
-
-/**
- * Tracks all write operations on the private view so they can be journaled.
- */
-class CommitJob {
- MONGO_DISALLOW_COPYING(CommitJob);
-
-public:
- CommitJob();
- ~CommitJob();
-
- /**
- * Note an operation other than a "basic write".
- */
- void noteOp(std::shared_ptr<DurOp> p);
-
- /**
- * Record/note an intent to write.
- *
- * NOTE: Not thread safe. Requires the mutex to be locked.
- */
- void note(void* p, int len);
-
- /**
- * When this value is false we don't have to do any group commit.
- */
- bool hasWritten() const {
- return _hasWritten;
- }
-
- /**
- * We use the commitjob object over and over, calling committingReset() rather than
- * reconstructing.
- */
- void committingReset();
-
- /**
- * We check how much written and if it is getting to be a lot, we commit sooner.
- */
- size_t bytes() const {
- return _bytes;
- }
-
- /**
- * Sorts the internal list of write intents so that overlapping and duplicate items can be
- * merged. We do the sort here so the caller receives something they must keep const from
- * their POV.
- */
- const WriteIntentsVector& getIntentsSorted() {
- sort(_intents.begin(), _intents.end());
- return _intents;
- }
-
- const DurOpsVector& ops() const {
- return _durOps;
- }
-
- SimpleMutex groupCommitMutex;
-
-private:
- void _insertWriteIntent(void* p, int len) {
- _intents.push_back(WriteIntent(p, len));
- }
-
-
- // Whether we put write intents or durops
- bool _hasWritten;
-
- // Write intents along with a bitmask for whether we have already noted them
- Already<127> _alreadyNoted;
- WriteIntentsVector _intents;
-
- // All the ops other than basic writes
- DurOpsVector _durOps;
-
- // Used to count the private map used bytes. Note that _lastNotedPos doesn't reset with
- // each commit, but that is ok we aren't being that precise.
- size_t _lastNotedPos;
- size_t _bytes;
-
- // Warning logging for large commits
- uint64_t _lastComplainMs;
- unsigned _complains;
-};
-
-} // namespace "dur"
-} // namespace "mongo"
diff --git a/src/mongo/db/storage/mmap_v1/dur_journal.cpp b/src/mongo/db/storage/mmap_v1/dur_journal.cpp
deleted file mode 100644
index bfb39a0bc6c..00000000000
--- a/src/mongo/db/storage/mmap_v1/dur_journal.cpp
+++ /dev/null
@@ -1,826 +0,0 @@
-// @file dur_journal.cpp writing to the writeahead logging journal
-
-/**
-* Copyright (C) 2010 10gen Inc.
-*
-* This program is free software: you can redistribute it and/or modify
-* it under the terms of the GNU Affero General Public License, version 3,
-* as published by the Free Software Foundation.
-*
-* This program is distributed in the hope that it will be useful,
-* but WITHOUT ANY WARRANTY; without even the implied warranty of
-* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-* GNU Affero General Public License for more details.
-*
-* You should have received a copy of the GNU Affero General Public License
-* along with this program. If not, see <http://www.gnu.org/licenses/>.
-*
-* As a special exception, the copyright holders give permission to link the
-* code of portions of this program with the OpenSSL library under certain
-* conditions as described in each individual source file and distribute
-* linked combinations including the program with the OpenSSL library. You
-* must comply with the GNU Affero General Public License in all respects for
-* all of the code used other than as permitted herein. If you modify file(s)
-* with this exception, you may extend this exception to your version of the
-* file(s), but you are not obligated to do so. If you do not wish to do so,
-* delete this exception statement from your version. If you delete this
-* exception statement from all source files in the program, then also delete
-* it in the license file.
-*/
-
-#define MONGO_LOG_DEFAULT_COMPONENT ::mongo::logger::LogComponent::kJournal
-
-#include "mongo/platform/basic.h"
-
-#include "mongo/db/storage/mmap_v1/dur_journal.h"
-
-#include <boost/filesystem.hpp>
-#include <boost/filesystem/operations.hpp>
-
-#include "mongo/base/init.h"
-#include "mongo/base/static_assert.h"
-#include "mongo/config.h"
-#include "mongo/db/client.h"
-#include "mongo/db/storage/mmap_v1/aligned_builder.h"
-#include "mongo/db/storage/mmap_v1/compress.h"
-#include "mongo/db/storage/mmap_v1/dur_journalformat.h"
-#include "mongo/db/storage/mmap_v1/dur_journalimpl.h"
-#include "mongo/db/storage/mmap_v1/dur_stats.h"
-#include "mongo/db/storage/mmap_v1/logfile.h"
-#include "mongo/db/storage/mmap_v1/mmap.h"
-#include "mongo/db/storage/mmap_v1/mmap_v1_options.h"
-#include "mongo/db/storage/mmap_v1/paths.h"
-#include "mongo/db/storage/storage_options.h"
-#include "mongo/platform/random.h"
-#include "mongo/util/checksum.h"
-#include "mongo/util/clock_source.h"
-#include "mongo/util/exit.h"
-#include "mongo/util/file.h"
-#include "mongo/util/hex.h"
-#include "mongo/util/log.h"
-#include "mongo/util/mongoutils/str.h"
-#include "mongo/util/progress_meter.h"
-#include "mongo/util/timer.h"
-
-using namespace mongoutils;
-
-namespace mongo {
-
-using std::endl;
-using std::hex;
-using std::string;
-
-class AlignedBuilder;
-
-namespace dur {
-// Rotate after reaching this data size in a journal (j._<n>) file
-// We use a smaller size for 32 bit as the journal is mmapped during recovery (only)
-// Note if you take a set of datafiles, including journal files, from 32->64 or vice-versa, it must
-// work. (and should as-is)
-// --smallfiles makes the limit small.
-
-#if defined(MONGO_CONFIG_DEBUG_BUILD)
-unsigned long long DataLimitPerJournalFile = 128 * 1024 * 1024;
-#elif defined(__APPLE__)
-// assuming a developer box if OS X
-unsigned long long DataLimitPerJournalFile = 256 * 1024 * 1024;
-#else
-unsigned long long DataLimitPerJournalFile =
- (sizeof(void*) == 4) ? 256 * 1024 * 1024 : 1 * 1024 * 1024 * 1024;
-#endif
-
-MONGO_INITIALIZER(InitializeJournalingParams)(InitializerContext* context) {
- if (mmapv1GlobalOptions.smallfiles == true) {
- verify(dur::DataLimitPerJournalFile >= 128 * 1024 * 1024);
- dur::DataLimitPerJournalFile = 128 * 1024 * 1024;
- }
- return Status::OK();
-}
-
-MONGO_STATIC_ASSERT(sizeof(Checksum) == 16);
-MONGO_STATIC_ASSERT(sizeof(JHeader) == 8192);
-MONGO_STATIC_ASSERT(sizeof(JSectHeader) == 20);
-MONGO_STATIC_ASSERT(sizeof(JSectFooter) == 32);
-MONGO_STATIC_ASSERT(sizeof(JEntry) == 12);
-MONGO_STATIC_ASSERT(sizeof(LSNFile) == 88);
-
-bool usingPreallocate = false;
-
-void removeOldJournalFile(boost::filesystem::path p);
-
-boost::filesystem::path getJournalDir() {
- boost::filesystem::path p(storageGlobalParams.dbpath);
- p /= "journal";
- return p;
-}
-
-boost::filesystem::path lsnPath() {
- return getJournalDir() / "lsn";
-}
-
-/** this should be called when something really bad happens so that we can flag appropriately
-*/
-void journalingFailure(const char* msg) {
- /** todo:
- (1) don't log too much
- (2) make an indicator in the journal dir that something bad happened.
- (2b) refuse to do a recovery startup if that is there without manual override.
- */
- log() << "journaling failure/error: " << redact(msg) << endl;
- verify(false);
-}
-
-JSectFooter::JSectFooter() {
- memset(this, 0, sizeof(*this));
- sentinel = JEntry::OpCode_Footer;
-}
-
-JSectFooter::JSectFooter(const void* begin, int len) { // needs buffer to compute hash
- sentinel = JEntry::OpCode_Footer;
- reserved = 0;
- magic[0] = magic[1] = magic[2] = magic[3] = '\n';
-
- Checksum c;
- c.gen(begin, (unsigned)len);
- memcpy(hash, c.bytes, sizeof(hash));
-}
-
-bool JSectFooter::checkHash(const void* begin, int len) const {
- if (!magicOk()) {
- log() << "journal footer not valid" << endl;
- return false;
- }
- Checksum c;
- c.gen(begin, len);
- DEV log() << "checkHash len:" << len << " hash:" << toHex(hash, 16)
- << " current:" << toHex(c.bytes, 16) << endl;
- if (memcmp(hash, c.bytes, sizeof(hash)) == 0)
- return true;
- log() << "journal checkHash mismatch, got: " << toHex(c.bytes, 16)
- << " expected: " << toHex(hash, 16) << endl;
- return false;
-}
-
-namespace {
-std::unique_ptr<SecureRandom> mySecureRandom;
-stdx::mutex mySecureRandomMutex;
-int64_t getMySecureRandomNumber() {
- stdx::lock_guard<stdx::mutex> lk(mySecureRandomMutex);
- if (!mySecureRandom)
- mySecureRandom = SecureRandom::create();
- return mySecureRandom->nextInt64();
-}
-}
-
-JHeader::JHeader(string fname) {
- magic[0] = 'j';
- magic[1] = '\n';
- _version = CurrentVersion;
- memset(ts, 0, sizeof(ts));
- time_t t = time(0);
- strncpy(ts, time_t_to_String_short(t).c_str(), sizeof(ts) - 1);
- memset(dbpath, 0, sizeof(dbpath));
- strncpy(dbpath, fname.c_str(), sizeof(dbpath) - 1);
- {
- fileId = t & 0xffffffff;
- fileId |= static_cast<unsigned long long>(getMySecureRandomNumber()) << 32;
- }
- memset(reserved3, 0, sizeof(reserved3));
- txt2[0] = txt2[1] = '\n';
- n1 = n2 = n3 = n4 = '\n';
-}
-
-Journal j;
-
-const unsigned long long LsnShutdownSentinel = ~((unsigned long long)0);
-
-Journal::Journal() : _written(0), _nextFileNumber(0), _curLogFile(0), _curFileId(0) {
- _lastSeqNumberWrittenToSharedView.store(0);
- _preFlushTime.store(0);
- _lastFlushTime.store(0);
- _writeToLSNNeeded.store(false);
-}
-
-boost::filesystem::path Journal::getFilePathFor(int filenumber) const {
- boost::filesystem::path p(dir);
- p /= string(str::stream() << "j._" << filenumber);
- return p;
-}
-
-/** never throws
- @param anyFiles by default we only look at j._* files. If anyFiles is true, return true
- if there are any files in the journal directory. checkForUncleanShutdown() uses this to
- make sure that the journal directory is mounted.
- @return true if journal dir is not empty
-*/
-bool haveJournalFiles(bool anyFiles) {
- try {
- boost::filesystem::path jdir = getJournalDir();
- if (!boost::filesystem::exists(jdir))
- return false;
-
- for (boost::filesystem::directory_iterator i(jdir);
- i != boost::filesystem::directory_iterator();
- ++i) {
- string fileName = boost::filesystem::path(*i).leaf().string();
- if (anyFiles || str::startsWith(fileName, "j._"))
- return true;
- }
- } catch (const std::exception& e) {
- log() << "Unable to check for journal files due to: " << e.what() << endl;
- }
- return false;
-}
-
-/** throws */
-void removeJournalFiles() {
- log() << "removeJournalFiles" << endl;
- try {
- for (boost::filesystem::directory_iterator i(getJournalDir());
- i != boost::filesystem::directory_iterator();
- ++i) {
- string fileName = boost::filesystem::path(*i).leaf().string();
- if (str::startsWith(fileName, "j._")) {
- try {
- removeOldJournalFile(*i);
- } catch (std::exception& e) {
- log() << "couldn't remove " << fileName << ' ' << e.what() << endl;
- throw;
- }
- }
- }
- try {
- boost::filesystem::remove(lsnPath());
- } catch (...) {
- // std::exception details logged in catch below
- log() << "couldn't remove " << lsnPath().string() << endl;
- throw;
- }
- } catch (std::exception& e) {
- log() << "error removing journal files " << e.what() << endl;
- throw;
- }
- verify(!haveJournalFiles());
-
- flushMyDirectory(getJournalDir() /
- "file"); // flushes parent of argument (in this case journal dir)
-
- LOG(1) << "removeJournalFiles end" << endl;
-}
-
-/** at clean shutdown */
-bool okToCleanUp = false; // successful recovery would set this to true
-void Journal::cleanup(bool _log) {
- if (!okToCleanUp)
- return;
-
- if (_log)
- log() << "journalCleanup..." << endl;
- try {
- stdx::lock_guard<SimpleMutex> lk(_curLogFileMutex);
- closeCurrentJournalFile();
- removeJournalFiles();
- } catch (std::exception& e) {
- log() << "error couldn't remove journal file during shutdown " << e.what() << endl;
- throw;
- }
-}
-void journalCleanup(bool log) {
- j.cleanup(log);
-}
-
-bool _preallocateIsFaster() {
- bool faster = false;
- boost::filesystem::path p = getJournalDir() / "tempLatencyTest";
- if (boost::filesystem::exists(p)) {
- try {
- remove(p);
- } catch (const std::exception& e) {
- log() << "Unable to remove temporary file due to: " << e.what() << endl;
- }
- }
- try {
- AlignedBuilder b(8192);
- int millis[2];
- const int N = 50;
- for (int pass = 0; pass < 2; pass++) {
- LogFile f(p.string());
- Timer t;
- for (int i = 0; i < N; i++) {
- f.synchronousAppend(b.buf(), 8192);
- }
- millis[pass] = t.millis();
- // second time through, file exists and is prealloc case
- }
- int diff = millis[0] - millis[1];
- if (diff > 2 * N) {
- // at least 2ms faster for prealloc case?
- faster = true;
- log() << "preallocateIsFaster=true " << diff / (1.0 * N) << endl;
- }
- } catch (const std::exception& e) {
- log() << "info preallocateIsFaster couldn't run due to: " << e.what() << "; returning false"
- << endl;
- }
- if (boost::filesystem::exists(p)) {
- try {
- remove(p);
- } catch (const std::exception& e) {
- log() << "Unable to remove temporary file due to: " << e.what() << endl;
- }
- }
- return faster;
-}
-bool preallocateIsFaster() {
- Timer t;
- bool res = false;
- if (_preallocateIsFaster() && _preallocateIsFaster()) {
- // maybe system is just super busy at the moment? sleep a second to let it calm down.
- // deciding to to prealloc is a medium big decision:
- sleepsecs(1);
- res = _preallocateIsFaster();
- }
- if (t.millis() > 3000)
- log() << "preallocateIsFaster check took " << t.millis() / 1000.0 << " secs" << endl;
- return res;
-}
-
-// throws
-void preallocateFile(boost::filesystem::path p, unsigned long long len) {
- if (exists(p))
- return;
-
- log() << "preallocating a journal file " << p.string() << endl;
-
- const unsigned BLKSZ = 1024 * 1024;
- verify(len % BLKSZ == 0);
-
- AlignedBuilder b(BLKSZ);
- memset((void*)b.buf(), 0, BLKSZ);
-
- ProgressMeter m(len, 3 /*secs*/, 10 /*hits between time check (once every 6.4MB)*/);
- m.setName("File Preallocator Progress");
-
- File f;
- f.open(p.string().c_str(), /*read-only*/ false, /*direct-io*/ false);
- verify(f.is_open());
- fileofs loc = 0;
- while (loc < len) {
- f.write(loc, b.buf(), BLKSZ);
- loc += BLKSZ;
- m.hit(BLKSZ);
- }
- verify(loc == len);
- f.fsync();
-}
-
-const int NUM_PREALLOC_FILES = 3;
-inline boost::filesystem::path preallocPath(int n) {
- verify(n >= 0);
- verify(n < NUM_PREALLOC_FILES);
- string fn = str::stream() << "prealloc." << n;
- return getJournalDir() / fn;
-}
-
-// throws
-void _preallocateFiles() {
- for (int i = 0; i < NUM_PREALLOC_FILES; i++) {
- boost::filesystem::path filepath = preallocPath(i);
-
- unsigned long long limit = DataLimitPerJournalFile;
- if (kDebugBuild && i == 1) {
- // moving 32->64, the prealloc files would be short. that is "ok", but we
- // want to exercise that case, so we force exercising here when
- // MONGO_CONFIG_DEBUG_BUILD is set by arbitrarily stopping prealloc at a
- // low limit for a file. also we want to be able to change in the future
- // the constant without a lot of work anyway.
- limit = 16 * 1024 * 1024;
- }
- preallocateFile(filepath, limit);
- }
-}
-
-void checkFreeSpace() {
- unsigned long long spaceNeeded =
- static_cast<unsigned long long>(3 * DataLimitPerJournalFile * 1.1); // add 10% for headroom
- unsigned long long freeSpace = File::freeSpace(getJournalDir().string());
- unsigned long long prealloced = 0;
- for (int i = 0; i < NUM_PREALLOC_FILES; i++) {
- boost::filesystem::path filepath = preallocPath(i);
- if (exists(filepath))
- prealloced += file_size(filepath);
- }
-
- if (freeSpace + prealloced < spaceNeeded) {
- log() << endl;
- error() << "Insufficient free space for journal files" << endl;
- log() << "Please make at least " << spaceNeeded / (1024 * 1024) << "MB available in "
- << getJournalDir().string() << " or use --smallfiles" << endl;
- log() << endl;
- uasserted(15926, "Insufficient free space for journals");
- }
-}
-
-void preallocateFiles() {
- if (!(mmapv1GlobalOptions.journalOptions & MMAPV1Options::JournalNoCheckSpace))
- checkFreeSpace();
-
- if (exists(preallocPath(0)) || // if enabled previously, keep using
- exists(preallocPath(1)) ||
- (mmapv1GlobalOptions.preallocj && preallocateIsFaster())) {
- usingPreallocate = true;
- try {
- _preallocateFiles();
- } catch (const std::exception& e) {
- log() << "warning caught exception (" << e.what() << ") in preallocateFiles, continuing"
- << endl;
- }
- }
- j.open();
-}
-
-void removeOldJournalFile(boost::filesystem::path p) {
- if (usingPreallocate) {
- try {
- for (int i = 0; i < NUM_PREALLOC_FILES; i++) {
- boost::filesystem::path filepath = preallocPath(i);
- if (!boost::filesystem::exists(filepath)) {
- // we can recycle this file into this prealloc file location
- boost::filesystem::path temppath = filepath.string() + ".temp";
- boost::filesystem::rename(p, temppath);
- {
- // zero the header
- File f;
- f.open(temppath.string().c_str(), false, false);
- char buf[8192];
- memset(buf, 0, 8192);
- f.write(0, buf, 8192);
- f.truncate(DataLimitPerJournalFile);
- f.fsync();
- }
- log() << "old journal file " << p.string() << " will be reused as "
- << filepath.string();
- boost::filesystem::rename(temppath, filepath);
- return;
- }
- }
- } catch (const std::exception& e) {
- log() << "warning exception in dur::removeOldJournalFile " << p.string() << ": "
- << e.what() << endl;
- // fall through and try to delete the file
- }
- }
-
- // already have 3 prealloc files, so delete this file
- try {
- log() << "old journal file will be removed: " << p.string() << endl;
- boost::filesystem::remove(p);
- } catch (const std::exception& e) {
- log() << "warning exception removing " << p.string() << ": " << e.what() << endl;
- }
-}
-
-// find a prealloc.<n> file, presumably to take and use
-boost::filesystem::path findPrealloced() {
- try {
- for (int i = 0; i < NUM_PREALLOC_FILES; i++) {
- boost::filesystem::path filepath = preallocPath(i);
- if (boost::filesystem::exists(filepath))
- return filepath;
- }
- } catch (const std::exception& e) {
- log() << "warning exception in dur::findPrealloced(): " << e.what() << endl;
- }
- return boost::filesystem::path();
-}
-
-/** assure journal/ dir exists. throws. call during startup. */
-void journalMakeDir(ClockSource* cs, int64_t serverStartMs) {
- j.init(cs, serverStartMs);
-
- boost::filesystem::path p = getJournalDir();
- j.dir = p.string();
- log() << "journal dir=" << j.dir << endl;
- if (!boost::filesystem::exists(j.dir)) {
- try {
- boost::filesystem::create_directory(j.dir);
- } catch (std::exception& e) {
- log() << "error creating directory " << j.dir << ' ' << e.what() << endl;
- throw;
- }
- }
-}
-
-void Journal::_open() {
- _curFileId = 0;
- verify(_curLogFile == 0);
- boost::filesystem::path fname = getFilePathFor(_nextFileNumber);
-
- // if we have a prealloced file, use it
- {
- boost::filesystem::path p = findPrealloced();
- if (!p.empty()) {
- try {
- {
- // JHeader::fileId must be updated before renaming to be race-safe
- LogFile f(p.string());
- JHeader h(p.string());
- AlignedBuilder b(8192);
- b.appendStruct(h);
- f.synchronousAppend(b.buf(), b.len());
- }
- boost::filesystem::rename(p, fname);
- } catch (const std::exception& e) {
- log() << "warning couldn't write to / rename file " << p.string() << ": "
- << e.what() << endl;
- }
- }
- }
-
- _curLogFile = new LogFile(fname.string());
- _nextFileNumber++;
- {
- JHeader h(fname.string());
- _curFileId = h.fileId;
- verify(_curFileId);
- AlignedBuilder b(8192);
- b.appendStruct(h);
- _curLogFile->synchronousAppend(b.buf(), b.len());
- }
-}
-
-void Journal::init(ClockSource* cs, int64_t serverStartMs) {
- verify(_curLogFile == 0);
- _clock = cs;
- _serverStartMs = serverStartMs;
-}
-
-void Journal::open() {
- stdx::lock_guard<SimpleMutex> lk(_curLogFileMutex);
- _open();
-}
-
-void LSNFile::set(unsigned long long x) {
- memset(this, 0, sizeof(*this));
- lsn = x;
- checkbytes = ~x;
-}
-
-/** logs details of the situation, and returns 0, if anything surprising in the LSNFile
- if something highly surprising, throws to abort
-*/
-unsigned long long LSNFile::get() {
- uassert(13614,
- str::stream() << "unexpected version number of lsn file in journal/ directory got: "
- << ver,
- ver == 0);
- if (~lsn != checkbytes) {
- log() << "lsnfile not valid. recovery will be from log start. lsn: " << hex << lsn
- << " checkbytes: " << hex << checkbytes << endl;
- return 0;
- }
- return lsn;
-}
-
-/** called during recovery (the error message text below assumes that)
-*/
-unsigned long long journalReadLSN() {
- if (!exists(lsnPath())) {
- log() << "info no lsn file in journal/ directory" << endl;
- return 0;
- }
-
- try {
- // os can flush as it likes. if it flushes slowly, we will just do extra work on recovery.
- // however, given we actually close the file when writing, that seems unlikely.
- LSNFile L;
- File f;
- f.open(lsnPath().string().c_str());
- verify(f.is_open());
- if (f.len() == 0) {
- // this could be 'normal' if we crashed at the right moment
- log() << "info lsn file is zero bytes long" << endl;
- return 0;
- }
- f.read(0, (char*)&L, sizeof(L));
- unsigned long long lsn = L.get();
- return lsn;
- } catch (std::exception& e) {
- uasserted(13611,
- str::stream() << "can't read lsn file in journal directory : " << e.what());
- }
- return 0;
-}
-
-/** remember "last sequence number" to speed recoveries
- concurrency: called by durThread only.
-*/
-void Journal::updateLSNFile(unsigned long long lsnOfCurrentJournalEntry) {
- if (!_writeToLSNNeeded.load())
- return;
- _writeToLSNNeeded.store(false);
- try {
- // Don't read from _lastFlushTime again in this function since it may change.
- const uint64_t copyOfLastFlushTime = _lastFlushTime.load();
-
- // Only write an LSN that is older than the journal entry we are in the middle of writing.
- // If this trips, it means that _lastFlushTime got ahead of what is actually in the data
- // files because lsnOfCurrentJournalEntry includes data that hasn't yet been written to the
- // data files.
- if (copyOfLastFlushTime >= lsnOfCurrentJournalEntry) {
- severe() << "Attempting to update LSNFile to " << copyOfLastFlushTime
- << " which is not older than the current journal sequence number "
- << lsnOfCurrentJournalEntry;
- fassertFailed(34370);
- }
-
- // os can flush as it likes. if it flushes slowly, we will just do extra work on recovery.
- // however, given we actually close the file, that seems unlikely.
- File f;
- f.open(lsnPath().string().c_str());
- if (!f.is_open()) {
- // can get 0 if an i/o error
- log() << "warning: open of lsn file failed" << endl;
- return;
- }
- LOG(1) << "lsn set " << copyOfLastFlushTime << endl;
- LSNFile lsnf;
- lsnf.set(copyOfLastFlushTime);
- f.write(0, (char*)&lsnf, sizeof(lsnf));
- // do we want to fsync here? if we do it probably needs to be async so the durthread
- // is not delayed.
- } catch (std::exception& e) {
- log() << "warning: write to lsn file failed " << e.what() << endl;
- // keep running (ignore the error). recovery will be slow.
- }
-}
-
-namespace {
-stdx::mutex lastGeneratedSeqNumberMutex;
-uint64_t lastGeneratedSeqNumber = 0;
-}
-
-uint64_t generateNextSeqNumber(ClockSource* cs, int64_t serverStartMs) {
- const uint64_t now = cs->now().toMillisSinceEpoch() - serverStartMs;
-
- stdx::lock_guard<stdx::mutex> lock(lastGeneratedSeqNumberMutex);
- if (now > lastGeneratedSeqNumber) {
- lastGeneratedSeqNumber = now;
- } else {
- // Make sure we return unique monotonically increasing numbers.
- lastGeneratedSeqNumber++;
- }
- return lastGeneratedSeqNumber;
-}
-
-void setLastSeqNumberWrittenToSharedView(uint64_t seqNumber) {
- j._lastSeqNumberWrittenToSharedView.store(seqNumber);
-}
-
-void notifyPreDataFileFlush() {
- j._preFlushTime.store(j._lastSeqNumberWrittenToSharedView.load());
-}
-
-void notifyPostDataFileFlush() {
- j._lastFlushTime.store(j._preFlushTime.load());
- j._writeToLSNNeeded.store(true);
-}
-
-// call from within _curLogFileMutex
-void Journal::closeCurrentJournalFile() {
- if (!_curLogFile)
- return;
-
- JFile jf;
- jf.filename = _curLogFile->_name;
- jf.lastEventTimeMs = generateNextSeqNumber(_clock, _serverStartMs);
- _oldJournalFiles.push_back(jf);
-
- delete _curLogFile; // close
- _curLogFile = 0;
- _written = 0;
-}
-
-/** remove older journal files.
- be in _curLogFileMutex but not dbMutex when calling
-*/
-void Journal::removeUnneededJournalFiles() {
- while (!_oldJournalFiles.empty()) {
- JFile f = _oldJournalFiles.front();
-
- // 'f.lastEventTimeMs' is the timestamp of the last thing in the journal file.
- // '_lastFlushTime' is the start time of the last successful flush of the data files to
- // disk. We can't delete this journal file until the last successful flush time is at least
- // 10 seconds after 'f.lastEventTimeMs'.
- if (f.lastEventTimeMs + ExtraKeepTimeMs < _lastFlushTime.load()) {
- // eligible for deletion
- boost::filesystem::path p(f.filename);
- removeOldJournalFile(p);
- } else {
- break;
- }
-
- _oldJournalFiles.pop_front();
- }
-}
-
-void Journal::_rotate(unsigned long long lsnOfCurrentJournalEntry) {
- if (globalInShutdownDeprecated() || !_curLogFile)
- return;
-
- j.updateLSNFile(lsnOfCurrentJournalEntry);
-
- if (_curLogFile && _written < DataLimitPerJournalFile)
- return;
-
- if (_curLogFile) {
- _curLogFile->truncate();
- closeCurrentJournalFile();
- removeUnneededJournalFiles();
- }
-
- try {
- Timer t;
- _open();
- int ms = t.millis();
- if (ms >= 200) {
- log() << "DR101 latency warning on journal file open " << ms << "ms" << endl;
- }
- } catch (std::exception& e) {
- log() << "warning exception opening journal file " << e.what() << endl;
- throw;
- }
-}
-
-/** write (append) the buffer we have built to the journal and fsync it.
- outside of dbMutex lock as this could be slow.
- @param uncompressed - a buffer that will be written to the journal after compression
- will not return until on disk
-*/
-void WRITETOJOURNAL(const JSectHeader& h, const AlignedBuilder& uncompressed) {
- Timer t;
- j.journal(h, uncompressed);
- stats.curr()->_writeToJournalMicros += t.micros();
-}
-
-void Journal::journal(const JSectHeader& h, const AlignedBuilder& uncompressed) {
- static AlignedBuilder b(32 * 1024 * 1024);
- /* buffer to journal will be
- JSectHeader
- compressed operations
- JSectFooter
- */
- const unsigned headTailSize = sizeof(JSectHeader) + sizeof(JSectFooter);
- const unsigned max = maxCompressedLength(uncompressed.len()) + headTailSize;
- b.reset(max);
-
- {
- dassert(h.sectionLen() == (unsigned)0xffffffff); // we will backfill later
- b.appendStruct(h);
- }
-
- size_t compressedLength = 0;
- rawCompress(uncompressed.buf(), uncompressed.len(), b.cur(), &compressedLength);
- verify(compressedLength < 0xffffffff);
- verify(compressedLength < max);
- b.skip(compressedLength);
-
- // footer
- unsigned L = 0xffffffff;
- {
- // pad to alignment, and set the total section length in the JSectHeader
- verify(0xffffe000 == (~(Alignment - 1)));
- unsigned lenUnpadded = b.len() + sizeof(JSectFooter);
- L = (lenUnpadded + Alignment - 1) & (~(Alignment - 1));
- dassert(L >= lenUnpadded);
-
- ((JSectHeader*)b.atOfs(0))->setSectionLen(lenUnpadded);
-
- JSectFooter f(b.buf(), b.len()); // computes checksum
- b.appendStruct(f);
- dassert(b.len() == lenUnpadded);
-
- b.skip(L - lenUnpadded);
- dassert(b.len() % Alignment == 0);
- }
-
- try {
- stdx::lock_guard<SimpleMutex> lk(_curLogFileMutex);
-
- // must already be open -- so that _curFileId is correct for previous buffer building
- verify(_curLogFile);
-
- stats.curr()->_uncompressedBytes += uncompressed.len();
- unsigned w = b.len();
- _written += w;
- verify(w <= L);
- stats.curr()->_journaledBytes += L;
- _curLogFile->synchronousAppend((const void*)b.buf(), L);
- _rotate(h.seqNumber);
- } catch (std::exception& e) {
- log() << "error exception in dur::journal " << e.what() << endl;
- throw;
- }
-}
-}
-}
diff --git a/src/mongo/db/storage/mmap_v1/dur_journal.h b/src/mongo/db/storage/mmap_v1/dur_journal.h
deleted file mode 100644
index e1da1b65818..00000000000
--- a/src/mongo/db/storage/mmap_v1/dur_journal.h
+++ /dev/null
@@ -1,100 +0,0 @@
-// @file dur_journal.h
-
-/**
-* Copyright (C) 2010 10gen Inc.
-*
-* This program is free software: you can redistribute it and/or modify
-* it under the terms of the GNU Affero General Public License, version 3,
-* as published by the Free Software Foundation.
-*
-* This program is distributed in the hope that it will be useful,
-* but WITHOUT ANY WARRANTY; without even the implied warranty of
-* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-* GNU Affero General Public License for more details.
-*
-* You should have received a copy of the GNU Affero General Public License
-* along with this program. If not, see <http://www.gnu.org/licenses/>.
-*
-* As a special exception, the copyright holders give permission to link the
-* code of portions of this program with the OpenSSL library under certain
-* conditions as described in each individual source file and distribute
-* linked combinations including the program with the OpenSSL library. You
-* must comply with the GNU Affero General Public License in all respects for
-* all of the code used other than as permitted herein. If you modify file(s)
-* with this exception, you may extend this exception to your version of the
-* file(s), but you are not obligated to do so. If you do not wish to do so,
-* delete this exception statement from your version. If you delete this
-* exception statement from all source files in the program, then also delete
-* it in the license file.
-*/
-
-#pragma once
-
-#include <cstdint>
-
-namespace mongo {
-
-class AlignedBuilder;
-class ClockSource;
-class JSectHeader;
-
-namespace dur {
-
-/** true if ok to cleanup journal files at termination. otherwise, files journal will be retained.
-*/
-extern bool okToCleanUp;
-
-/** at termination after db files closed & fsynced
- also after recovery
- closes and removes journal files
- @param log report in log that we are cleaning up if we actually do any work
-*/
-void journalCleanup(bool log = false);
-
-/** assure journal/ dir exists. throws */
-void journalMakeDir(ClockSource* cs, int64_t serverStartMs);
-
-/**
- * Generates the next sequence number for use in the journal, guaranteed to be greater than all
- * prior sequence numbers.
- */
-uint64_t generateNextSeqNumber(ClockSource* cs, int64_t serverStartMs);
-
-/**
- * Informs the journaling system that all writes on or before the passed in sequence number have
- * been written to the data files' shared mmap view.
- */
-void setLastSeqNumberWrittenToSharedView(uint64_t seqNumber);
-
-/** flag that something has gone wrong during writing to the journal
- (not for recovery mode)
-*/
-void journalingFailure(const char* msg);
-
-/** read lsn from disk from the last run before doing recovery */
-unsigned long long journalReadLSN();
-
-/** never throws.
- @param anyFiles by default we only look at j._* files. If anyFiles is true, return true
- if there are any files in the journal directory. checkForUncleanShutdown() uses this to
- make sure that the journal directory is mounted.
- @return true if there are any journal files in the journal dir.
-*/
-bool haveJournalFiles(bool anyFiles = false);
-
-/**
- * Writes the specified uncompressed buffer to the journal.
- */
-void WRITETOJOURNAL(const JSectHeader& h, const AlignedBuilder& uncompressed);
-
-// in case disk controller buffers writes
-const long long ExtraKeepTimeMs = 10000;
-
-/**
- * Call these before (pre) and after (post) the datafiles are flushed to disk by the DataFileSync
- * thread. These should not be called for any other flushes.
- */
-void notifyPreDataFileFlush();
-void notifyPostDataFileFlush();
-} // namespace dur
-} // namespace mongo
diff --git a/src/mongo/db/storage/mmap_v1/dur_journal_writer.cpp b/src/mongo/db/storage/mmap_v1/dur_journal_writer.cpp
deleted file mode 100644
index 3b244c25006..00000000000
--- a/src/mongo/db/storage/mmap_v1/dur_journal_writer.cpp
+++ /dev/null
@@ -1,307 +0,0 @@
-/**
- * Copyright (C) 2015 MongoDB Inc.
- *
- * This program is free software: you can redistribute it and/or modify
- * it under the terms of the GNU Affero General Public License, version 3,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU Affero General Public License for more details.
- *
- * You should have received a copy of the GNU Affero General Public License
- * along with this program. If not, see <http://www.gnu.org/licenses/>.
- *
- * As a special exception, the copyright holders give permission to link the
- * code of portions of this program with the OpenSSL library under certain
- * conditions as described in each individual source file and distribute
- * linked combinations including the program with the OpenSSL library. You
- * must comply with the GNU Affero General Public License in all respects for
- * all of the code used other than as permitted herein. If you modify file(s)
- * with this exception, you may extend this exception to your version of the
- * file(s), but you are not obligated to do so. If you do not wish to do so,
- * delete this exception statement from your version. If you delete this
- * exception statement from all source files in the program, then also delete
- * it in the license file.
- */
-
-#define MONGO_LOG_DEFAULT_COMPONENT ::mongo::logger::LogComponent::kJournal
-
-#include "mongo/platform/basic.h"
-
-#include "mongo/db/storage/mmap_v1/dur_journal_writer.h"
-
-#include "mongo/db/client.h"
-#include "mongo/db/storage/mmap_v1/dur.h"
-#include "mongo/db/storage/mmap_v1/dur_journal.h"
-#include "mongo/db/storage/mmap_v1/dur_recover.h"
-#include "mongo/db/storage/mmap_v1/dur_stats.h"
-#include "mongo/stdx/functional.h"
-#include "mongo/stdx/thread.h"
-#include "mongo/util/concurrency/idle_thread_block.h"
-#include "mongo/util/log.h"
-#include "mongo/util/timer.h"
-
-namespace mongo {
-namespace dur {
-
-namespace {
-
-/**
- * Apply the writes back to the non-private MMF after they are for certain in the journal.
- *
- * (1) TODO we don't need to write back everything every group commit. We MUST write back that
- * which is going to be a remapped on its private view - but that might not be all views.
- *
- * (2) TODO should we do this using N threads? Would be quite easy see Hackenberg paper table
- * 5 and 6. 2 threads might be a good balance.
- */
-void WRITETODATAFILES(OperationContext* opCtx,
- const JSectHeader& h,
- const AlignedBuilder& uncompressed) {
- Timer t;
-
- LOG(4) << "WRITETODATAFILES BEGIN";
-
- RecoveryJob::get().processSection(opCtx, &h, uncompressed.buf(), uncompressed.len(), NULL);
-
- const long long m = t.micros();
- stats.curr()->_writeToDataFilesMicros += m;
-
- setLastSeqNumberWrittenToSharedView(h.seqNumber);
-
- LOG(4) << "journal WRITETODATAFILES " << m / 1000.0 << "ms";
-}
-
-} // namespace
-
-
-/**
- * Used inside the journal writer thread to ensure that used buffers are cleaned up properly.
- */
-class BufferGuard {
- MONGO_DISALLOW_COPYING(BufferGuard);
-
-public:
- BufferGuard(JournalWriter::Buffer* buffer, JournalWriter::BufferQueue* bufferQueue)
- : _buffer(buffer), _bufferQueue(bufferQueue) {}
-
- ~BufferGuard() {
- // This buffer is done. Reset and remove it from the journal queue and put it on
- // the ready queue.
- _buffer->_reset();
-
- // This should never block. Otherwise we will stall the journaling pipeline
- // permanently and cause deadlock.
- invariant(_bufferQueue->count() < _bufferQueue->maxSize());
- _bufferQueue->push(_buffer);
- }
-
-private:
- // Buffer that this scoped object is managing. Owned until destruction time. Then, the
- // bufferQueue owns it.
- JournalWriter::Buffer* const _buffer;
-
- // Queue where the buffer should be returned to at destruction time. Not owned.
- JournalWriter::BufferQueue* const _bufferQueue;
-};
-
-
-//
-// JournalWriter
-//
-
-JournalWriter::JournalWriter(CommitNotifier* commitNotify,
- CommitNotifier* applyToDataFilesNotify,
- size_t numBuffers)
- : _commitNotify(commitNotify),
- _applyToDataFilesNotify(applyToDataFilesNotify),
- _shutdownRequested(false),
- _journalQueue(numBuffers),
- _lastCommitNumber(0),
- _readyQueue(numBuffers) {
- invariant(_journalQueue.maxSize() == _readyQueue.maxSize());
-}
-
-JournalWriter::~JournalWriter() {
- // Never close the journal writer with outstanding or unaccounted writes
- invariant(_journalQueue.empty());
- invariant(_readyQueue.empty());
-}
-
-void JournalWriter::start() {
- // Do not allow reuse
- invariant(!_shutdownRequested);
-
- // Pre-allocate the journal buffers and push them on the ready queue
- for (size_t i = 0; i < _readyQueue.maxSize(); i++) {
- _readyQueue.push(new Buffer(InitialBufferSizeBytes));
- }
-
- // Start the thread
- stdx::thread t([this] { _journalWriterThread(); });
- _journalWriterThreadHandle.swap(t);
-}
-
-void JournalWriter::shutdown() {
- // There is no reason to call shutdown multiple times
- invariant(!_shutdownRequested);
- _shutdownRequested = true;
-
- // Never terminate the journal writer with outstanding or unaccounted writes
- assertIdle();
-
- Buffer* const shutdownBuffer = newBuffer();
- shutdownBuffer->_setShutdown();
-
- // This will terminate the journal thread. No need to specify commit number, since we are
- // shutting down and nothing will be notified anyways.
- writeBuffer(shutdownBuffer, 0);
-
- // Ensure the journal thread has stopped and everything accounted for.
- _journalWriterThreadHandle.join();
- assertIdle();
-
- // Delete the buffers (this deallocates the journal buffer memory)
- while (!_readyQueue.empty()) {
- Buffer* const buffer = _readyQueue.blockingPop();
- delete buffer;
- }
-}
-
-void JournalWriter::assertIdle() {
- // All buffers are in the ready queue means there is nothing pending.
- invariant(_journalQueue.empty());
- invariant(_readyQueue.count() == _readyQueue.maxSize());
-}
-
-JournalWriter::Buffer* JournalWriter::newBuffer() {
- Buffer* const buffer = _readyQueue.blockingPop();
- buffer->_assertEmpty();
-
- return buffer;
-}
-
-void JournalWriter::writeBuffer(Buffer* buffer, CommitNotifier::When commitNumber) {
- invariant(buffer->_commitNumber == 0);
- invariant((commitNumber > _lastCommitNumber) || (buffer->_isShutdown && (commitNumber == 0)));
-
- buffer->_commitNumber = commitNumber;
-
- _journalQueue.push(buffer);
-}
-
-void JournalWriter::flush() {
- std::vector<Buffer*> buffers;
-
- // Pop the expected number of buffers from the ready queue. This will block until all
- // in-progress buffers have completed.
- for (size_t i = 0; i < _readyQueue.maxSize(); i++) {
- buffers.push_back(_readyQueue.blockingPop());
- }
-
- // Put them back in to restore the original state.
- for (size_t i = 0; i < buffers.size(); i++) {
- _readyQueue.push(buffers[i]);
- }
-}
-
-void JournalWriter::_journalWriterThread() {
- Client::initThread("journal writer");
-
- log() << "Journal writer thread started";
-
- try {
- while (true) {
- Buffer* const buffer = [&] {
- MONGO_IDLE_THREAD_BLOCK;
- return _journalQueue.blockingPop();
- }();
-
- BufferGuard bufferGuard(buffer, &_readyQueue);
-
- if (buffer->_isShutdown) {
- invariant(buffer->_builder.len() == 0);
-
- // The journal writer thread is terminating. Nothing to notify or write.
- break;
- }
-
- if (buffer->_isNoop) {
- invariant(buffer->_builder.len() == 0);
-
- // There's nothing to be writen, but we still need to notify this commit number
- _commitNotify->notifyAll(buffer->_commitNumber);
- _applyToDataFilesNotify->notifyAll(buffer->_commitNumber);
- continue;
- }
-
- LOG(4) << "Journaling commit number " << buffer->_commitNumber << " (journal file "
- << buffer->_header.fileId << ", sequence " << buffer->_header.seqNumber
- << ", size " << buffer->_builder.len() << " bytes)";
-
- // This performs synchronous I/O to the journal file and will block.
- WRITETOJOURNAL(buffer->_header, buffer->_builder);
-
- // Data is now persisted in the journal, which is sufficient for acknowledging
- // durability.
- dur::getJournalListener()->onDurable(buffer->journalListenerToken);
- _commitNotify->notifyAll(buffer->_commitNumber);
-
- // Apply the journal entries on top of the shared view so that when flush is
- // requested it would write the latest.
- WRITETODATAFILES(cc().makeOperationContext().get(), buffer->_header, buffer->_builder);
-
- // Data is now persisted on the shared view, so notify any potential journal file
- // cleanup waiters.
- _applyToDataFilesNotify->notifyAll(buffer->_commitNumber);
- }
- } catch (const DBException& e) {
- severe() << "dbexception in journalWriterThread causing immediate shutdown: " << redact(e);
- MONGO_UNREACHABLE;
- } catch (const std::ios_base::failure& e) {
- severe() << "ios_base exception in journalWriterThread causing immediate shutdown: "
- << e.what();
- MONGO_UNREACHABLE;
- } catch (const std::bad_alloc& e) {
- severe() << "bad_alloc exception in journalWriterThread causing immediate shutdown: "
- << e.what();
- MONGO_UNREACHABLE;
- } catch (const std::exception& e) {
- severe() << "exception in journalWriterThread causing immediate shutdown: "
- << redact(e.what());
- MONGO_UNREACHABLE;
- } catch (...) {
- severe() << "unhandled exception in journalWriterThread causing immediate shutdown";
- MONGO_UNREACHABLE;
- }
-
- log() << "Journal writer thread stopped";
-}
-
-
-//
-// Buffer
-//
-
-JournalWriter::Buffer::Buffer(size_t initialSize)
- : _commitNumber(0), _isNoop(false), _isShutdown(false), _header(), _builder(initialSize) {}
-
-JournalWriter::Buffer::~Buffer() {
- _assertEmpty();
-}
-
-void JournalWriter::Buffer::_assertEmpty() {
- invariant(_commitNumber == 0);
- invariant(_builder.len() == 0);
-}
-
-void JournalWriter::Buffer::_reset() {
- _commitNumber = 0;
- _isNoop = false;
- _builder.reset();
-}
-
-} // namespace dur
-} // namespace mongo
diff --git a/src/mongo/db/storage/mmap_v1/dur_journal_writer.h b/src/mongo/db/storage/mmap_v1/dur_journal_writer.h
deleted file mode 100644
index de36e202f81..00000000000
--- a/src/mongo/db/storage/mmap_v1/dur_journal_writer.h
+++ /dev/null
@@ -1,200 +0,0 @@
-/**
- * Copyright (C) 2015 MongoDB Inc.
- *
- * This program is free software: you can redistribute it and/or modify
- * it under the terms of the GNU Affero General Public License, version 3,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU Affero General Public License for more details.
- *
- * You should have received a copy of the GNU Affero General Public License
- * along with this program. If not, see <http://www.gnu.org/licenses/>.
- *
- * As a special exception, the copyright holders give permission to link the
- * code of portions of this program with the OpenSSL library under certain
- * conditions as described in each individual source file and distribute
- * linked combinations including the program with the OpenSSL library. You
- * must comply with the GNU Affero General Public License in all respects for
- * all of the code used other than as permitted herein. If you modify file(s)
- * with this exception, you may extend this exception to your version of the
- * file(s), but you are not obligated to do so. If you do not wish to do so,
- * delete this exception statement from your version. If you delete this
- * exception statement from all source files in the program, then also delete
- * it in the license file.
- */
-
-#pragma once
-
-#include "mongo/base/disallow_copying.h"
-#include "mongo/db/storage/journal_listener.h"
-#include "mongo/db/storage/mmap_v1/aligned_builder.h"
-#include "mongo/db/storage/mmap_v1/commit_notifier.h"
-#include "mongo/db/storage/mmap_v1/dur_journalformat.h"
-#include "mongo/stdx/thread.h"
-#include "mongo/util/queue.h"
-
-namespace mongo {
-namespace dur {
-
-/**
- * Manages the thread and queues used for writing the journal to disk and notify parties with
- * are waiting on the write concern.
- *
- * NOTE: Not thread-safe and must not be used from more than one thread.
- */
-class JournalWriter {
- MONGO_DISALLOW_COPYING(JournalWriter);
-
-public:
- /**
- * Stores the memory and the header for a complete journal buffer which is pending to be
- * written by the journal writer thread.
- */
- class Buffer {
- public:
- Buffer(size_t initialSize);
- ~Buffer();
-
- JSectHeader& getHeader() {
- return _header;
- }
- AlignedBuilder& getBuilder() {
- return _builder;
- }
-
- void setNoop() {
- _isNoop = true;
- }
-
- JournalListener::Token journalListenerToken;
-
- private:
- friend class BufferGuard;
- friend class JournalWriter;
-
-
- void _assertEmpty();
- void _reset();
- void _setShutdown() {
- _isShutdown = true;
- }
-
- // Specifies the commit number which flushing this buffer would notify. This value is
- // zero, if there is no data to be flushed or if the buffer is noop/shutdown.
- CommitNotifier::When _commitNumber;
-
- // Special buffer that's posted when there is nothing to be written to the journal,
- // but we want to order a notification so it happens after all other writes have
- // completed.
- bool _isNoop;
-
- // Special buffer that's posted when the receiving thread must terminate. This should
- // be the last entry posted to the queue and the commit number should be zero.
- bool _isShutdown;
-
- JSectHeader _header;
- AlignedBuilder _builder;
- };
-
-
- /**
- * Initializes the journal writer.
- *
- * @param commitNotify Notification object to be called after journal entries have been
- * written to disk. The caller retains ownership and the notify object must outlive
- * the journal writer object.
- * @param applyToDataFilesNotify Notification object to be called after journal entries
- * have been applied to the shared view. This means that if the shared view were to be
- * flushed at this point, the journal files before this point are not necessary. The
- * caller retains ownership and the notify object must outlive the journal writer
- * object.
- * @param numBuffers How many buffers to create to hold outstanding writes. If there are
- * more than this number of journal writes that have not completed, the write calls
- * will block.
- */
- JournalWriter(CommitNotifier* commitNotify,
- CommitNotifier* applyToDataFilesNotify,
- size_t numBuffers);
- ~JournalWriter();
-
- /**
- * Allocates buffer memory and starts the journal writer thread.
- */
- void start();
-
- /**
- * Terminates the journal writer thread and frees memory for the buffers. Must not be
- * called if there are any pending journal writes.
- */
- void shutdown();
-
- /**
- * Asserts that there are no pending journal writes.
- */
- void assertIdle();
-
- /**
- * Obtains a new empty buffer into which a journal entry should be written.
- *
- * This method may block if there are no free buffers.
- *
- * The caller does not own the buffer and needs to "return" it to the writer by calling
- * writeBuffer. Buffers with data on them should never be discarded until they are written.
- */
- Buffer* newBuffer();
-
- /**
- * Requests that the specified buffer be written asynchronously.
- *
- * This method may block if there are too many outstanding unwritten buffers.
- *
- * @param buffer Buffer entry to be written. The buffer object must not be used anymore
- * after it has been given to this function.
- * @param commitNumber What commit number to be notified once the buffer has been written
- * to disk.
- */
- void writeBuffer(Buffer* buffer, CommitNotifier::When commitNumber);
-
- /**
- * Ensures that all previously submitted write requests complete. This call is blocking.
- */
- void flush();
-
-private:
- friend class BufferGuard;
-
- typedef BlockingQueue<Buffer*> BufferQueue;
-
- // Start all buffers with 4MB of size
- enum { InitialBufferSizeBytes = 4 * 1024 * 1024 };
-
-
- void _journalWriterThread();
-
-
- // This gets notified as journal buffers are written. It is not owned and needs to outlive
- // the journal writer object.
- CommitNotifier* const _commitNotify;
-
- // This gets notified as journal buffers are done being applied to the shared view
- CommitNotifier* const _applyToDataFilesNotify;
-
- // Wraps and controls the journal writer thread
- stdx::thread _journalWriterThreadHandle;
-
- // Indicates that shutdown has been requested. Used for idempotency of the shutdown call.
- bool _shutdownRequested;
-
- // Queue of buffers, which need to be written by the journal writer thread
- BufferQueue _journalQueue;
- CommitNotifier::When _lastCommitNumber;
-
- // Queue of buffers, whose write has been completed by the journal writer thread.
- BufferQueue _readyQueue;
-};
-
-} // namespace dur
-} // namespace mongo
diff --git a/src/mongo/db/storage/mmap_v1/dur_journalformat.h b/src/mongo/db/storage/mmap_v1/dur_journalformat.h
deleted file mode 100644
index 964c0b79b9b..00000000000
--- a/src/mongo/db/storage/mmap_v1/dur_journalformat.h
+++ /dev/null
@@ -1,219 +0,0 @@
-// @file dur_journalformat.h The format of our journal files.
-
-/**
-* Copyright (C) 2010 10gen Inc.
-*
-* This program is free software: you can redistribute it and/or modify
-* it under the terms of the GNU Affero General Public License, version 3,
-* as published by the Free Software Foundation.
-*
-* This program is distributed in the hope that it will be useful,
-* but WITHOUT ANY WARRANTY; without even the implied warranty of
-* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-* GNU Affero General Public License for more details.
-*
-* You should have received a copy of the GNU Affero General Public License
-* along with this program. If not, see <http://www.gnu.org/licenses/>.
-*
-* As a special exception, the copyright holders give permission to link the
-* code of portions of this program with the OpenSSL library under certain
-* conditions as described in each individual source file and distribute
-* linked combinations including the program with the OpenSSL library. You
-* must comply with the GNU Affero General Public License in all respects for
-* all of the code used other than as permitted herein. If you modify file(s)
-* with this exception, you may extend this exception to your version of the
-* file(s), but you are not obligated to do so. If you do not wish to do so,
-* delete this exception statement from your version. If you delete this
-* exception statement from all source files in the program, then also delete
-* it in the license file.
-*/
-
-#pragma once
-
-#include <sstream>
-#include <string>
-
-#include "mongo/util/assert_util.h"
-
-namespace mongo {
-
-namespace dur {
-
-const unsigned Alignment = 8192;
-
-#pragma pack(1)
-/** beginning header for a journal/j._<n> file
- there is nothing important int this header at this time. except perhaps version #.
-*/
-struct JHeader {
- JHeader() {}
- JHeader(std::string fname);
-
- // "j\n". j means journal, then a linefeed, fwiw if you were to run "less" on the file or
- // something...
- char magic[2];
-
-// x4142 is asci--readable if you look at the file with head/less -- thus the starting values were
-// near that. simply incrementing the version # is safe on a fwd basis.
-#if defined(_NOCOMPRESS)
- enum { CurrentVersion = 0x4148 };
-#else
- enum { CurrentVersion = 0x4149 };
-#endif
- unsigned short _version;
-
- // these are just for diagnostic ease (make header more useful as plain text)
- char n1; // '\n'
- char ts[20]; // ascii timestamp of file generation. for user reading, not used by code.
- char n2; // '\n'
- char dbpath[128]; // path/filename of this file for human reading and diagnostics. not used
- // by code.
- char n3, n4; // '\n', '\n'
-
- unsigned long long fileId; // unique identifier that will be in each JSectHeader.
- // important as we recycle prealloced files
-
- char reserved3[8026]; // 8KB total for the file header
- char txt2[2]; // "\n\n" at the end
-
- bool versionOk() const {
- return _version == CurrentVersion;
- }
- bool valid() const {
- return magic[0] == 'j' && txt2[1] == '\n' && fileId;
- }
-};
-
-/** "Section" header. A section corresponds to a group commit.
- len is length of the entire section including header and footer.
- header and footer are not compressed, just the stuff in between.
-*/
-struct JSectHeader {
-private:
- unsigned _sectionLen; // unpadded length in bytes of the whole section
-public:
- unsigned long long
- seqNumber; // sequence number that can be used on recovery to not do too much work
- unsigned long long fileId; // matches JHeader::fileId
- unsigned sectionLen() const {
- return _sectionLen;
- }
-
- // we store the unpadded length so we can use that when we uncompress. to
- // get the true total size this must be rounded up to the Alignment.
- void setSectionLen(unsigned lenUnpadded) {
- _sectionLen = lenUnpadded;
- }
-
- unsigned sectionLenWithPadding() const {
- unsigned x = (sectionLen() + (Alignment - 1)) & (~(Alignment - 1));
- dassert(x % Alignment == 0);
- return x;
- }
-};
-
-/** an individual write operation within a group commit section. Either the entire section should
- be applied, or nothing. (We check the md5 for the whole section before doing anything on
- recovery.)
-*/
-struct JEntry {
- enum OpCodes {
- OpCode_Footer = 0xffffffff,
- OpCode_DbContext = 0xfffffffe,
- OpCode_FileCreated = 0xfffffffd,
- OpCode_DropDb = 0xfffffffc,
- OpCode_Min = 0xfffff000
- };
- union {
- unsigned
- len; // length in bytes of the data of the JEntry. does not include the JEntry header
- OpCodes opcode;
- };
-
- unsigned ofs; // offset in file
-
- // sentinel and masks for _fileNo
- enum {
- DotNsSuffix = 0x7fffffff, // ".ns" file
- LocalDbBit = 0x80000000 // assuming "local" db instead of using the JDbContext
- };
- int _fileNo; // high bit is set to indicate it should be the <dbpath>/local database
- // char data[len] follows
-
- const char* srcData() const {
- const int* i = &_fileNo;
- return (const char*)(i + 1);
- }
-
- int getFileNo() const {
- return _fileNo & (~LocalDbBit);
- }
- void setFileNo(int f) {
- _fileNo = f;
- }
- bool isNsSuffix() const {
- return getFileNo() == DotNsSuffix;
- }
-
- void setLocalDbContextBit() {
- _fileNo |= LocalDbBit;
- }
- bool isLocalDbContext() const {
- return _fileNo & LocalDbBit;
- }
- void clearLocalDbContextBit() {
- _fileNo = getFileNo();
- }
-
- static std::string suffix(int fileno) {
- if (fileno == DotNsSuffix)
- return "ns";
- std::stringstream ss;
- ss << fileno;
- return ss.str();
- }
-};
-
-/** group commit section footer. md5 is a key field. */
-struct JSectFooter {
- JSectFooter();
- JSectFooter(const void* begin, int len); // needs buffer to compute hash
- unsigned sentinel;
- unsigned char hash[16];
- unsigned long long reserved;
- char magic[4]; // "\n\n\n\n"
-
- /** used by recovery to see if buffer is valid
- @param begin the buffer
- @param len buffer len
- @return true if buffer looks valid
- */
- bool checkHash(const void* begin, int len) const;
-
- bool magicOk() const {
- return *((unsigned*)magic) == 0x0a0a0a0a;
- }
-};
-
-/** declares "the next entry(s) are for this database / file path prefix" */
-struct JDbContext {
- JDbContext() : sentinel(JEntry::OpCode_DbContext) {}
- const unsigned sentinel; // compare to JEntry::len -- zero is our sentinel
- // char dbname[];
-};
-
-/** "last sequence number" */
-struct LSNFile {
- unsigned ver;
- unsigned reserved2;
- unsigned long long lsn;
- unsigned long long checkbytes;
- unsigned long long reserved[8];
-
- void set(unsigned long long lsn);
- unsigned long long get();
-};
-
-#pragma pack()
-}
-}
diff --git a/src/mongo/db/storage/mmap_v1/dur_journalimpl.h b/src/mongo/db/storage/mmap_v1/dur_journalimpl.h
deleted file mode 100644
index 9a4d22fa826..00000000000
--- a/src/mongo/db/storage/mmap_v1/dur_journalimpl.h
+++ /dev/null
@@ -1,130 +0,0 @@
-// @file dur_journal.h
-
-/**
-* Copyright (C) 2010 10gen Inc.
-*
-* This program is free software: you can redistribute it and/or modify
-* it under the terms of the GNU Affero General Public License, version 3,
-* as published by the Free Software Foundation.
-*
-* This program is distributed in the hope that it will be useful,
-* but WITHOUT ANY WARRANTY; without even the implied warranty of
-* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-* GNU Affero General Public License for more details.
-*
-* You should have received a copy of the GNU Affero General Public License
-* along with this program. If not, see <http://www.gnu.org/licenses/>.
-*
-* As a special exception, the copyright holders give permission to link the
-* code of portions of this program with the OpenSSL library under certain
-* conditions as described in each individual source file and distribute
-* linked combinations including the program with the OpenSSL library. You
-* must comply with the GNU Affero General Public License in all respects for
-* all of the code used other than as permitted herein. If you modify file(s)
-* with this exception, you may extend this exception to your version of the
-* file(s), but you are not obligated to do so. If you do not wish to do so,
-* delete this exception statement from your version. If you delete this
-* exception statement from all source files in the program, then also delete
-* it in the license file.
-*/
-
-#pragma once
-
-#include <boost/filesystem/path.hpp>
-
-#include "mongo/db/storage/mmap_v1/aligned_builder.h"
-#include "mongo/db/storage/mmap_v1/dur_journalformat.h"
-#include "mongo/db/storage/mmap_v1/logfile.h"
-#include "mongo/platform/atomic_word.h"
-#include "mongo/stdx/mutex.h"
-#include "mongo/util/concurrency/mutex.h"
-
-namespace mongo {
-
-class ClockSource;
-
-namespace dur {
-
-/** the writeahead journal for durability */
-class Journal {
-public:
- std::string dir; // set by journalMakeDir() during initialization
-
- Journal();
-
- /** call during startup by journalMakeDir() */
- void init(ClockSource* cs, int64_t serverStartMs);
-
- /** check if time to rotate files. assure a file is open.
- done separately from the journal() call as we can do this part
- outside of lock.
- thread: durThread()
- */
- void rotate();
-
- /** append to the journal file
- */
- void journal(const JSectHeader& h, const AlignedBuilder& b);
-
- boost::filesystem::path getFilePathFor(int filenumber) const;
-
- void cleanup(bool log); // closes and removes journal files
-
- unsigned long long curFileId() const {
- return _curFileId;
- }
-
- void assureLogFileOpen() {
- stdx::lock_guard<SimpleMutex> lk(_curLogFileMutex);
- if (_curLogFile == 0)
- _open();
- }
-
- /** open a journal file to journal operations to. */
- void open();
-
-private:
- /** check if time to rotate files. assure a file is open.
- * internally called with every commit
- */
- void _rotate(unsigned long long lsnOfCurrentJournalEntry);
-
- void _open();
- void closeCurrentJournalFile();
- void removeUnneededJournalFiles();
-
- unsigned long long _written = 0; // bytes written so far to the current journal (log) file
- unsigned _nextFileNumber = 0;
-
- SimpleMutex _curLogFileMutex;
-
- LogFile* _curLogFile; // use _curLogFileMutex
- unsigned long long _curFileId; // current file id see JHeader::fileId
-
- struct JFile {
- std::string filename;
- unsigned long long lastEventTimeMs;
- };
-
- // files which have been closed but not unlinked (rotated out) yet
- // ordered oldest to newest
- std::list<JFile> _oldJournalFiles; // use _curLogFileMutex
-
- // lsn related
- friend void setLastSeqNumberWrittenToSharedView(uint64_t seqNumber);
- friend void notifyPreDataFileFlush();
- friend void notifyPostDataFileFlush();
- void updateLSNFile(unsigned long long lsnOfCurrentJournalEntry);
- // data <= this time is in the shared view
- AtomicUInt64 _lastSeqNumberWrittenToSharedView;
- // data <= this time was in the shared view when the last flush to start started
- AtomicUInt64 _preFlushTime;
- // data <= this time is fsynced in the datafiles (unless hard drive controller is caching)
- AtomicUInt64 _lastFlushTime;
- AtomicWord<bool> _writeToLSNNeeded;
-
- ClockSource* _clock;
- int64_t _serverStartMs;
-};
-}
-}
diff --git a/src/mongo/db/storage/mmap_v1/dur_preplogbuffer.cpp b/src/mongo/db/storage/mmap_v1/dur_preplogbuffer.cpp
deleted file mode 100644
index d31b883b9c7..00000000000
--- a/src/mongo/db/storage/mmap_v1/dur_preplogbuffer.cpp
+++ /dev/null
@@ -1,209 +0,0 @@
-/**
- * Copyright (C) 2009-2014 MongoDB Inc.
- *
- * This program is free software: you can redistribute it and/or modify
- * it under the terms of the GNU Affero General Public License, version 3,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU Affero General Public License for more details.
- *
- * You should have received a copy of the GNU Affero General Public License
- * along with this program. If not, see <http://www.gnu.org/licenses/>.
- *
- * As a special exception, the copyright holders give permission to link the
- * code of portions of this program with the OpenSSL library under certain
- * conditions as described in each individual source file and distribute
- * linked combinations including the program with the OpenSSL library. You
- * must comply with the GNU Affero General Public License in all respects for
- * all of the code used other than as permitted herein. If you modify file(s)
- * with this exception, you may extend this exception to your version of the
- * file(s), but you are not obligated to do so. If you do not wish to do so,
- * delete this exception statement from your version. If you delete this
- * exception statement from all source files in the program, then also delete
- * it in the license file.
- */
-
-/*
- PREPLOGBUFFER
- we will build an output buffer ourself and then use O_DIRECT
- we could be in read lock for this
- for very large objects write directly to redo log in situ?
- @see https://docs.google.com/drawings/edit?id=1TklsmZzm7ohIZkwgeK6rMvsdaR13KjtJYMsfLr175Zc
-*/
-
-#define MONGO_LOG_DEFAULT_COMPONENT ::mongo::logger::LogComponent::kStorage
-
-#include "mongo/platform/basic.h"
-
-#include "mongo/db/storage/mmap_v1/aligned_builder.h"
-#include "mongo/db/storage/mmap_v1/dur_commitjob.h"
-#include "mongo/db/storage/mmap_v1/dur_journal.h"
-#include "mongo/db/storage/mmap_v1/dur_journalimpl.h"
-#include "mongo/db/storage/mmap_v1/dur_stats.h"
-#include "mongo/db/storage/mmap_v1/durable_mapped_file.h"
-#include "mongo/db/storage/storage_options.h"
-#include "mongo/stdx/thread.h"
-#include "mongo/util/clock_source.h"
-#include "mongo/util/log.h"
-#include "mongo/util/mongoutils/str.h"
-#include "mongo/util/stacktrace.h"
-#include "mongo/util/timer.h"
-
-namespace mongo {
-
-using std::endl;
-using std::min;
-using std::stringstream;
-
-namespace dur {
-
-extern Journal j;
-extern CommitJob commitJob;
-
-const RelativePath local = RelativePath::fromRelativePath("local");
-
-static DurableMappedFile* findMMF_inlock(void* ptr, size_t& ofs) {
- DurableMappedFile* f = privateViews.find_inlock(ptr, ofs);
- if (f == 0) {
- error() << "findMMF_inlock failed " << privateViews.numberOfViews_inlock() << endl;
-
- // we want a stack trace and the assert below didn't print a trace once in the real world
- // - not sure why
- printStackTrace();
- stringstream ss;
- ss << "view pointer cannot be resolved " << std::hex << (size_t)ptr;
- journalingFailure(ss.str().c_str()); // asserts, which then abends
- }
- return f;
-}
-
-/** put the basic write operation into the buffer (bb) to be journaled */
-static void prepBasicWrite_inlock(AlignedBuilder& bb,
- const WriteIntent* i,
- RelativePath& lastDbPath) {
- size_t ofs = 1;
- DurableMappedFile* mmf = findMMF_inlock(i->start(), /*out*/ ofs);
-
- if (MONGO_unlikely(!mmf->willNeedRemap())) {
- // tag this mmf as needed a remap of its private view later.
- // usually it will already be dirty/already set, so we do the if above first
- // to avoid possibility of cpu cache line contention
- mmf->setWillNeedRemap();
- }
-
- // since we have already looked up the mmf, we go ahead and remember the write view location
- // so we don't have to find the DurableMappedFile again later in WRITETODATAFILES()
- //
- // this was for WRITETODATAFILES_Impl2 so commented out now
- //
- /*
- dassert( i->w_ptr == 0 );
- i->w_ptr = ((char*)mmf->view_write()) + ofs;
- */
-
- JEntry e;
- e.len = min(i->length(), (unsigned)(mmf->length() - ofs)); // don't write past end of file
- verify(ofs <= 0x80000000);
- e.ofs = (unsigned)ofs;
- e.setFileNo(mmf->fileSuffixNo());
-
- if (mmf->relativePath() == local) {
- e.setLocalDbContextBit();
- } else if (mmf->relativePath() != lastDbPath) {
- lastDbPath = mmf->relativePath();
- JDbContext c;
- bb.appendStruct(c);
- bb.appendStr(lastDbPath.toString());
- }
-
- bb.appendStruct(e);
- bb.appendBuf(i->start(), e.len);
-
- if (MONGO_unlikely(e.len != (unsigned)i->length())) {
- log() << "journal info splitting prepBasicWrite at boundary" << endl;
-
- // This only happens if we write to the last byte in a file and
- // the fist byte in another file that is mapped adjacently. I
- // think most OSs leave at least a one page gap between
- // mappings, but better to be safe.
-
- WriteIntent next((char*)i->start() + e.len, i->length() - e.len);
- prepBasicWrite_inlock(bb, &next, lastDbPath);
- }
-}
-
-/** basic write ops / write intents. note there is no particular order to these : if we have
- two writes to the same location during the group commit interval, it is likely
- (although not assured) that it is journaled here once.
-*/
-static void prepBasicWrites(AlignedBuilder& bb, const std::vector<WriteIntent>& intents) {
- stdx::lock_guard<stdx::mutex> lk(privateViews._mutex());
-
- // Each time write intents switch to a different database we journal a JDbContext.
- // Switches will be rare as we sort by memory location first and we batch commit.
- RelativePath lastDbPath;
-
- invariant(!intents.empty());
-
- WriteIntent last;
- for (std::vector<WriteIntent>::const_iterator i = intents.begin(); i != intents.end(); i++) {
- if (i->start() < last.end()) {
- // overlaps
- last.absorb(*i);
- } else {
- // discontinuous
- if (i != intents.begin()) {
- prepBasicWrite_inlock(bb, &last, lastDbPath);
- }
-
- last = *i;
- }
- }
-
- prepBasicWrite_inlock(bb, &last, lastDbPath);
-}
-
-/** we will build an output buffer ourself and then use O_DIRECT
- we could be in read lock for this
- caller handles locking
- @return partially populated sectheader and _ab set
-*/
-static void _PREPLOGBUFFER(JSectHeader& h,
- AlignedBuilder& bb,
- ClockSource* cs,
- int64_t serverStartMs) {
- // Add the JSectHeader
-
- // Invalidate the total length, we will fill it in later.
- h.setSectionLen(0xffffffff);
- h.seqNumber = generateNextSeqNumber(cs, serverStartMs);
- h.fileId = j.curFileId();
-
- // Ops other than basic writes (DurOp's) go first
- const std::vector<std::shared_ptr<DurOp>>& durOps = commitJob.ops();
- for (std::vector<std::shared_ptr<DurOp>>::const_iterator i = durOps.begin(); i != durOps.end();
- i++) {
- (*i)->serialize(bb);
- }
-
- // Write intents
- const std::vector<WriteIntent>& intents = commitJob.getIntentsSorted();
- if (!intents.empty()) {
- prepBasicWrites(bb, intents);
- }
-}
-
-void PREPLOGBUFFER(/*out*/ JSectHeader& outHeader,
- AlignedBuilder& outBuffer,
- ClockSource* cs,
- int64_t serverStartMs) {
- Timer t;
- j.assureLogFileOpen(); // so fileId is set
- _PREPLOGBUFFER(outHeader, outBuffer, cs, serverStartMs);
- stats.curr()->_prepLogBufferMicros += t.micros();
-}
-}
-}
diff --git a/src/mongo/db/storage/mmap_v1/dur_recover.cpp b/src/mongo/db/storage/mmap_v1/dur_recover.cpp
deleted file mode 100644
index 936766f0160..00000000000
--- a/src/mongo/db/storage/mmap_v1/dur_recover.cpp
+++ /dev/null
@@ -1,682 +0,0 @@
-// @file dur_recover.cpp crash recovery via the journal
-
-/**
-* Copyright (C) 2009 10gen Inc.
-*
-* This program is free software: you can redistribute it and/or modify
-* it under the terms of the GNU Affero General Public License, version 3,
-* as published by the Free Software Foundation.
-*
-* This program is distributed in the hope that it will be useful,
-* but WITHOUT ANY WARRANTY; without even the implied warranty of
-* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-* GNU Affero General Public License for more details.
-*
-* You should have received a copy of the GNU Affero General Public License
-* along with this program. If not, see <http://www.gnu.org/licenses/>.
-*
-* As a special exception, the copyright holders give permission to link the
-* code of portions of this program with the OpenSSL library under certain
-* conditions as described in each individual source file and distribute
-* linked combinations including the program with the OpenSSL library. You
-* must comply with the GNU Affero General Public License in all respects for
-* all of the code used other than as permitted herein. If you modify file(s)
-* with this exception, you may extend this exception to your version of the
-* file(s), but you are not obligated to do so. If you do not wish to do so,
-* delete this exception statement from your version. If you delete this
-* exception statement from all source files in the program, then also delete
-* it in the license file.
-*/
-
-#define MONGO_LOG_DEFAULT_COMPONENT ::mongo::logger::LogComponent::kJournal
-
-#include "mongo/platform/basic.h"
-
-#include "mongo/db/storage/mmap_v1/dur_recover.h"
-
-#include <cstring>
-#include <fcntl.h>
-#include <iomanip>
-#include <iostream>
-#include <sys/stat.h>
-
-#include "mongo/db/client.h"
-#include "mongo/db/concurrency/d_concurrency.h"
-#include "mongo/db/operation_context.h"
-#include "mongo/db/storage/mmap_v1/compress.h"
-#include "mongo/db/storage/mmap_v1/dur_commitjob.h"
-#include "mongo/db/storage/mmap_v1/dur_journal.h"
-#include "mongo/db/storage/mmap_v1/dur_journalformat.h"
-#include "mongo/db/storage/mmap_v1/dur_stats.h"
-#include "mongo/db/storage/mmap_v1/durable_mapped_file.h"
-#include "mongo/db/storage/mmap_v1/durop.h"
-#include "mongo/db/storage/mmap_v1/mmap_v1_options.h"
-#include "mongo/platform/strnlen.h"
-#include "mongo/util/bufreader.h"
-#include "mongo/util/checksum.h"
-#include "mongo/util/destructor_guard.h"
-#include "mongo/util/exit.h"
-#include "mongo/util/hex.h"
-#include "mongo/util/log.h"
-#include "mongo/util/mongoutils/str.h"
-#include "mongo/util/scopeguard.h"
-#include "mongo/util/startup_test.h"
-
-namespace mongo {
-
-using std::shared_ptr;
-using std::unique_ptr;
-using std::endl;
-using std::hex;
-using std::map;
-using std::pair;
-using std::setw;
-using std::string;
-using std::stringstream;
-using std::vector;
-
-/**
- * Thrown when a journal section is corrupt. This is considered OK as long as it occurs while
- * processing the last file. Processing stops at the first corrupt section.
- *
- * Any logging about the nature of the corruption should happen before throwing as this class
- * contains no data.
- */
-class JournalSectionCorruptException {};
-
-namespace dur {
-
-// The singleton recovery job object
-RecoveryJob& RecoveryJob::_instance = *(new RecoveryJob());
-
-
-void removeJournalFiles();
-boost::filesystem::path getJournalDir();
-
-
-struct ParsedJournalEntry { /*copyable*/
- ParsedJournalEntry() : e(0) {}
-
- // relative path of database for the operation.
- // might be a pointer into mmaped Journal file
- const char* dbName;
-
- // those are pointers into the memory mapped journal file
- const JEntry* e; // local db sentinel is already parsed out here into dbName
-
- // if not one of the two simple JEntry's above, this is the operation:
- std::shared_ptr<DurOp> op;
-};
-
-
-/**
- * Get journal filenames, in order. Throws if unexpected content found.
- */
-static void getFiles(boost::filesystem::path dir, vector<boost::filesystem::path>& files) {
- map<unsigned, boost::filesystem::path> m;
- for (boost::filesystem::directory_iterator i(dir); i != boost::filesystem::directory_iterator();
- ++i) {
- boost::filesystem::path filepath = *i;
- string fileName = boost::filesystem::path(*i).leaf().string();
- if (str::startsWith(fileName, "j._")) {
- unsigned u = str::toUnsigned(str::after(fileName, '_'));
- if (m.count(u)) {
- uasserted(13531,
- str::stream() << "unexpected files in journal directory " << dir.string()
- << " : "
- << fileName);
- }
- m.insert(pair<unsigned, boost::filesystem::path>(u, filepath));
- }
- }
- for (map<unsigned, boost::filesystem::path>::iterator i = m.begin(); i != m.end(); ++i) {
- if (i != m.begin() && m.count(i->first - 1) == 0) {
- uasserted(13532,
- str::stream() << "unexpected file in journal directory " << dir.string()
- << " : "
- << boost::filesystem::path(i->second).leaf().string()
- << " : can't find its preceding file");
- }
- files.push_back(i->second);
- }
-}
-
-/** read through the memory mapped data of a journal file (journal/j._<n> file)
- throws
-*/
-class JournalSectionIterator {
- MONGO_DISALLOW_COPYING(JournalSectionIterator);
-
-public:
- JournalSectionIterator(const JSectHeader& h,
- const void* compressed,
- unsigned compressedLen,
- bool doDurOpsRecovering)
- : _h(h), _lastDbName(0), _doDurOps(doDurOpsRecovering) {
- verify(doDurOpsRecovering);
-
- if (!uncompress((const char*)compressed, compressedLen, &_uncompressed)) {
- // We check the checksum before we uncompress, but this may still fail as the
- // checksum isn't foolproof.
- log() << "couldn't uncompress journal section" << endl;
- throw JournalSectionCorruptException();
- }
-
- const char* p = _uncompressed.c_str();
- verify(compressedLen == _h.sectionLen() - sizeof(JSectFooter) - sizeof(JSectHeader));
-
- _entries = unique_ptr<BufReader>(new BufReader(p, _uncompressed.size()));
- }
-
- // We work with the uncompressed buffer when doing a WRITETODATAFILES (for speed)
- JournalSectionIterator(const JSectHeader& h, const void* p, unsigned len)
- : _entries(new BufReader((const char*)p, len)), _h(h), _lastDbName(0), _doDurOps(false) {}
-
- bool atEof() const {
- return _entries->atEof();
- }
-
- unsigned long long seqNumber() const {
- return _h.seqNumber;
- }
-
- /** get the next entry from the log. this function parses and combines JDbContext and JEntry's.
- * throws on premature end of section.
- */
- void next(ParsedJournalEntry& e) {
- unsigned lenOrOpCode{};
- _entries->read(lenOrOpCode);
-
- if (lenOrOpCode > JEntry::OpCode_Min) {
- switch (lenOrOpCode) {
- case JEntry::OpCode_Footer: {
- verify(false);
- }
-
- case JEntry::OpCode_FileCreated:
- case JEntry::OpCode_DropDb: {
- e.dbName = 0;
- std::shared_ptr<DurOp> op = DurOp::read(lenOrOpCode, *_entries);
- if (_doDurOps) {
- e.op = op;
- }
- return;
- }
-
- case JEntry::OpCode_DbContext: {
- _lastDbName = (const char*)_entries->pos();
- const unsigned limit = _entries->remaining();
- const unsigned len = strnlen(_lastDbName, limit);
- if (_lastDbName[len] != '\0') {
- log() << "problem processing journal file during recovery";
- throw JournalSectionCorruptException();
- }
-
- _entries->skip(len + 1); // skip '\0' too
- _entries->read(lenOrOpCode); // read this for the fall through
- }
- // fall through as a basic operation always follows jdbcontext, and we don't have
- // anything to return yet
-
- default:
- // fall through
- ;
- }
- }
-
- // JEntry - a basic write
- verify(lenOrOpCode && lenOrOpCode < JEntry::OpCode_Min);
- _entries->rewind(4);
- e.e = (JEntry*)_entries->skip(sizeof(JEntry));
- e.dbName = e.e->isLocalDbContext() ? "local" : _lastDbName;
- verify(e.e->len == lenOrOpCode);
- _entries->skip(e.e->len);
- }
-
-
-private:
- unique_ptr<BufReader> _entries;
- const JSectHeader _h;
- const char* _lastDbName; // pointer into mmaped journal file
- const bool _doDurOps;
- string _uncompressed;
-};
-
-
-static string fileName(const char* dbName, int fileNo) {
- stringstream ss;
- ss << dbName << '.';
- verify(fileNo >= 0);
- if (fileNo == JEntry::DotNsSuffix)
- ss << "ns";
- else
- ss << fileNo;
-
- // relative name -> full path name
- boost::filesystem::path full(storageGlobalParams.dbpath);
- full /= ss.str();
- return full.string();
-}
-
-
-RecoveryJob::RecoveryJob()
- : _recovering(false),
- _lastDataSyncedFromLastRun(0),
- _lastSeqSkipped(0),
- _appliedAnySections(false) {}
-
-#pragma warning(push)
-// C4722: 'mongo::dur::RecoveryJob::~RecoveryJob': destructor never returns, potential memory leak
-#pragma warning(disable : 4722)
-RecoveryJob::~RecoveryJob() {
- invariant(!"RecoveryJob is intentionally leaked with a bare call to operator new()");
-}
-#pragma warning(pop)
-
-void RecoveryJob::close(OperationContext* opCtx) {
- stdx::lock_guard<stdx::mutex> lk(_mx);
- _close(opCtx);
-}
-
-void RecoveryJob::_close(OperationContext* opCtx) {
- MongoFile::flushAll(opCtx, true);
- LockMongoFilesExclusive lock(opCtx);
- for (auto& durFile : _mmfs) {
- durFile->close(opCtx);
- }
- _mmfs.clear();
-}
-
-RecoveryJob::Last::Last(OperationContext* opCtx) : _opCtx(opCtx), mmf(NULL), fileNo(-1) {
- // Make sure the files list does not change from underneath
- LockMongoFilesShared::assertAtLeastReadLocked(opCtx);
-}
-
-DurableMappedFile* RecoveryJob::Last::newEntry(const dur::ParsedJournalEntry& entry,
- RecoveryJob& rj) {
- int num = entry.e->getFileNo();
- if (num == fileNo && entry.dbName == dbName)
- return mmf;
-
- string fn = fileName(entry.dbName, num);
- MongoFile* file;
- {
- MongoFileFinder finder(_opCtx); // must release lock before creating new DurableMappedFile
- file = finder.findByPath(fn);
- }
-
- if (file) {
- verify(file->isDurableMappedFile());
- mmf = (DurableMappedFile*)file;
- } else {
- if (!rj._recovering) {
- log() << "journal error applying writes, file " << fn << " is not open" << endl;
- verify(false);
- }
- std::shared_ptr<DurableMappedFile> sp(new DurableMappedFile(_opCtx));
- verify(sp->open(_opCtx, fn));
- rj._mmfs.push_back(sp);
- mmf = sp.get();
- }
-
- // we do this last so that if an exception were thrown, there isn't any wrong memory
- dbName = entry.dbName;
- fileNo = num;
- return mmf;
-}
-
-void RecoveryJob::write(Last& last, const ParsedJournalEntry& entry) {
- // TODO(mathias): look into making some of these dasserts
- verify(entry.e);
- verify(entry.dbName);
-
- DurableMappedFile* mmf = last.newEntry(entry, *this);
-
- if ((entry.e->ofs + entry.e->len) <= mmf->length()) {
- verify(mmf->view_write());
- verify(entry.e->srcData());
-
- void* dest = (char*)mmf->view_write() + entry.e->ofs;
- memcpy(dest, entry.e->srcData(), entry.e->len);
- stats.curr()->_writeToDataFilesBytes += entry.e->len;
- } else {
- massert(13622, "Trying to write past end of file in WRITETODATAFILES", _recovering);
- }
-}
-
-void RecoveryJob::applyEntry(Last& last, const ParsedJournalEntry& entry, bool apply, bool dump) {
- if (entry.e) {
- if (dump) {
- stringstream ss;
- ss << " BASICWRITE " << setw(20) << entry.dbName << '.';
- if (entry.e->isNsSuffix())
- ss << "ns";
- else
- ss << setw(2) << entry.e->getFileNo();
- ss << ' ' << setw(6) << entry.e->len << ' '
- << /*hex << setw(8) << (size_t) fqe.srcData << dec <<*/
- " " << redact(hexdump(entry.e->srcData(), entry.e->len));
- log() << ss.str() << endl;
- }
- if (apply) {
- write(last, entry);
- }
- } else if (entry.op) {
- // a DurOp subclass operation
- if (dump) {
- log() << " OP " << redact(entry.op->toString()) << endl;
- }
- if (apply) {
- if (entry.op->needFilesClosed()) {
- _close(last.opCtx()); // locked in processSection
- }
- entry.op->replay();
- }
- }
-}
-
-void RecoveryJob::applyEntries(OperationContext* opCtx, const vector<ParsedJournalEntry>& entries) {
- const bool apply = (mmapv1GlobalOptions.journalOptions & MMAPV1Options::JournalScanOnly) == 0;
- const bool dump = (mmapv1GlobalOptions.journalOptions & MMAPV1Options::JournalDumpJournal);
-
- if (dump) {
- log() << "BEGIN section" << endl;
- }
-
- Last last(opCtx);
- for (vector<ParsedJournalEntry>::const_iterator i = entries.begin(); i != entries.end(); ++i) {
- applyEntry(last, *i, apply, dump);
- }
-
- if (dump) {
- log() << "END section" << endl;
- }
-}
-
-void RecoveryJob::processSection(OperationContext* opCtx,
- const JSectHeader* h,
- const void* p,
- unsigned len,
- const JSectFooter* f) {
- LockMongoFilesShared lkFiles(opCtx); // for RecoveryJob::Last
- stdx::lock_guard<stdx::mutex> lk(_mx);
-
- if (_recovering) {
- // Check the footer checksum before doing anything else.
- verify(((const char*)h) + sizeof(JSectHeader) == p);
- if (!f->checkHash(h, len + sizeof(JSectHeader))) {
- log() << "journal section checksum doesn't match";
- throw JournalSectionCorruptException();
- }
-
- static uint64_t numJournalSegmentsSkipped = 0;
- static const uint64_t kMaxSkippedSectionsToLog = 10;
- if (_lastDataSyncedFromLastRun > h->seqNumber + ExtraKeepTimeMs) {
- if (_appliedAnySections) {
- severe() << "Journal section sequence number " << h->seqNumber
- << " is lower than the threshold for applying ("
- << h->seqNumber + ExtraKeepTimeMs
- << ") but we have already applied some journal sections. This implies a "
- << "corrupt journal file.";
- fassertFailed(34369);
- }
-
- if (++numJournalSegmentsSkipped < kMaxSkippedSectionsToLog) {
- log() << "recover skipping application of section seq:" << h->seqNumber
- << " < lsn:" << _lastDataSyncedFromLastRun << endl;
- } else if (numJournalSegmentsSkipped == kMaxSkippedSectionsToLog) {
- log() << "recover skipping application of section more..." << endl;
- }
- _lastSeqSkipped = h->seqNumber;
- return;
- }
-
- if (!_appliedAnySections) {
- _appliedAnySections = true;
- if (numJournalSegmentsSkipped >= kMaxSkippedSectionsToLog) {
- // Log the last skipped section's sequence number if it hasn't been logged before.
- log() << "recover final skipped journal section had sequence number "
- << _lastSeqSkipped;
- }
- log() << "recover applying initial journal section with sequence number "
- << h->seqNumber;
- }
- }
-
- unique_ptr<JournalSectionIterator> i;
- if (_recovering) {
- i = unique_ptr<JournalSectionIterator>(new JournalSectionIterator(*h, p, len, _recovering));
- } else {
- i = unique_ptr<JournalSectionIterator>(
- new JournalSectionIterator(*h, /*after header*/ p, /*w/out header*/ len));
- }
-
- // we use a static so that we don't have to reallocate every time through. occasionally we
- // go back to a small allocation so that if there were a spiky growth it won't stick forever.
- static vector<ParsedJournalEntry> entries;
- entries.clear();
- /** TEMP uncomment
- RARELY OCCASIONALLY {
- if( entries.capacity() > 2048 ) {
- entries.shrink_to_fit();
- entries.reserve(2048);
- }
- }
- */
-
- // first read all entries to make sure this section is valid
- ParsedJournalEntry e;
- while (!i->atEof()) {
- i->next(e);
- entries.push_back(e);
- }
-
- // got all the entries for one group commit. apply them:
- applyEntries(opCtx, entries);
-}
-
-/** apply a specific journal file, that is already mmap'd
- @param p start of the memory mapped file
- @return true if this is detected to be the last file (ends abruptly)
-*/
-bool RecoveryJob::processFileBuffer(OperationContext* opCtx, const void* p, unsigned len) {
- try {
- unsigned long long fileId;
- BufReader br(p, len);
-
- {
- // read file header
- JHeader h;
- std::memset(&h, 0, sizeof(h));
-
- br.read(h);
-
- if (!h.valid()) {
- log() << "Journal file header invalid. This could indicate corruption, or "
- << "an unclean shutdown while writing the first section in a journal "
- << "file.";
- throw JournalSectionCorruptException();
- }
-
- if (!h.versionOk()) {
- log() << "journal file version number mismatch got:" << hex << h._version
- << " expected:" << hex << (unsigned)JHeader::CurrentVersion
- << ". if you have just upgraded, recover with old version of mongod, "
- "terminate cleanly, then upgrade."
- << endl;
- // Not using JournalSectionCurruptException as we don't want to ignore
- // journal files on upgrade.
- uasserted(13536, str::stream() << "journal version number mismatch " << h._version);
- }
- fileId = h.fileId;
- if (mmapv1GlobalOptions.journalOptions & MMAPV1Options::JournalDumpJournal) {
- log() << "JHeader::fileId=" << fileId << endl;
- }
- }
-
- // read sections
- while (!br.atEof()) {
- JSectHeader h;
- std::memset(&h, 0, sizeof(h));
-
- br.peek(h);
- if (h.fileId != fileId) {
- if (kDebugBuild ||
- (mmapv1GlobalOptions.journalOptions & MMAPV1Options::JournalDumpJournal)) {
- log() << "Ending processFileBuffer at differing fileId want:" << fileId
- << " got:" << h.fileId << endl;
- log() << " sect len:" << h.sectionLen() << " seqnum:" << h.seqNumber << endl;
- }
- return true;
- }
- unsigned slen = h.sectionLen();
- unsigned dataLen = slen - sizeof(JSectHeader) - sizeof(JSectFooter);
- const char* hdr = (const char*)br.skip(h.sectionLenWithPadding());
- const char* data = hdr + sizeof(JSectHeader);
- const char* footer = data + dataLen;
- processSection(
- opCtx, (const JSectHeader*)hdr, data, dataLen, (const JSectFooter*)footer);
-
- // ctrl c check
- uassert(ErrorCodes::Interrupted,
- "interrupted during journal recovery",
- !globalInShutdownDeprecated());
- }
- } catch (const DBException& ex) {
- if (ex.code() != ErrorCodes::Overflow)
- throw; // Only ignore errors related to the file abruptly ending.
-
- if (mmapv1GlobalOptions.journalOptions & MMAPV1Options::JournalDumpJournal)
- log() << "ABRUPT END" << endl;
- return true; // abrupt end
- } catch (const JournalSectionCorruptException&) {
- if (mmapv1GlobalOptions.journalOptions & MMAPV1Options::JournalDumpJournal)
- log() << "ABRUPT END" << endl;
- return true; // abrupt end
- }
-
- return false; // non-abrupt end
-}
-
-/** apply a specific journal file */
-bool RecoveryJob::processFile(OperationContext* opCtx, boost::filesystem::path journalfile) {
- log() << "recover " << journalfile.string() << endl;
-
- try {
- if (boost::filesystem::file_size(journalfile.string()) == 0) {
- log() << "recover info " << journalfile.string() << " has zero length" << endl;
- return true;
- }
- } catch (...) {
- // if something weird like a permissions problem keep going so the massert down below can
- // happen (presumably)
- log() << "recover exception checking filesize" << endl;
- }
-
- MemoryMappedFile f{opCtx, MongoFile::Options::READONLY | MongoFile::Options::SEQUENTIAL};
- ON_BLOCK_EXIT([&f, &opCtx] {
- LockMongoFilesExclusive lock(opCtx);
- f.close(opCtx);
- });
- void* p = f.map(opCtx, journalfile.string().c_str());
- massert(13544, str::stream() << "recover error couldn't open " << journalfile.string(), p);
- return processFileBuffer(opCtx, p, (unsigned)f.length());
-}
-
-/** @param files all the j._0 style files we need to apply for recovery */
-void RecoveryJob::go(OperationContext* opCtx, vector<boost::filesystem::path>& files) {
- log() << "recover begin" << endl;
- LockMongoFilesExclusive lkFiles(opCtx); // for RecoveryJob::Last
- _recovering = true;
-
- // load the last sequence number synced to the datafiles on disk before the last crash
- _lastDataSyncedFromLastRun = journalReadLSN();
- log() << "recover lsn: " << _lastDataSyncedFromLastRun << endl;
-
- for (unsigned i = 0; i != files.size(); ++i) {
- bool abruptEnd = processFile(opCtx, files[i]);
- if (abruptEnd && i + 1 < files.size()) {
- log() << "recover error: abrupt end to file " << files[i].string()
- << ", yet it isn't the last journal file" << endl;
- close(opCtx);
- uasserted(13535, "recover abrupt journal file end");
- }
- }
-
- if (_lastSeqSkipped && !_appliedAnySections) {
- log() << "recover journal replay completed without applying any sections. "
- << "This can happen if there were no writes after the last fsync of the data files. "
- << "Last skipped sections had sequence number " << _lastSeqSkipped;
- }
-
- close(opCtx);
-
- if (mmapv1GlobalOptions.journalOptions & MMAPV1Options::JournalScanOnly) {
- uasserted(13545,
- str::stream() << "--durOptions " << (int)MMAPV1Options::JournalScanOnly
- << " (scan only) specified");
- }
-
- log() << "recover cleaning up" << endl;
- removeJournalFiles();
- log() << "recover done" << endl;
- okToCleanUp = true;
- _recovering = false;
-}
-
-void _recover(OperationContext* opCtx) {
- verify(storageGlobalParams.dur);
-
- boost::filesystem::path p = getJournalDir();
- if (!exists(p)) {
- log() << "directory " << p.string()
- << " does not exist, there will be no recovery startup step" << endl;
- okToCleanUp = true;
- return;
- }
-
- vector<boost::filesystem::path> journalFiles;
- getFiles(p, journalFiles);
-
- if (journalFiles.empty()) {
- log() << "recover : no journal files present, no recovery needed" << endl;
- okToCleanUp = true;
- return;
- }
-
- RecoveryJob::get().go(opCtx, journalFiles);
-}
-
-/** recover from a crash
- called during startup
- throws on error
-*/
-void replayJournalFilesAtStartup() {
- // we use a lock so that exitCleanly will wait for us
- // to finish (or at least to notice what is up and stop)
- auto opCtx = cc().makeOperationContext();
- Lock::GlobalWrite lk(opCtx.get());
-
- _recover(opCtx.get()); // throws on interruption
-}
-
-struct BufReaderY {
- int a, b;
-};
-class BufReaderUnitTest : public StartupTest {
-
-public:
- void run() {
- BufReader r((void*)"abcdabcdabcd", 12);
- char x;
- BufReaderY y;
- r.read(x); // cout << x; // a
- verify(x == 'a');
- r.read(y);
- r.read(x);
- verify(x == 'b');
- }
-} brunittest;
-
-} // namespace dur
-} // namespace mongo
diff --git a/src/mongo/db/storage/mmap_v1/dur_recover.h b/src/mongo/db/storage/mmap_v1/dur_recover.h
deleted file mode 100644
index 79ce0b03e5d..00000000000
--- a/src/mongo/db/storage/mmap_v1/dur_recover.h
+++ /dev/null
@@ -1,119 +0,0 @@
-// @file dur.h durability support
-
-/**
-* Copyright (C) 2009 10gen Inc.
-*
-* This program is free software: you can redistribute it and/or modify
-* it under the terms of the GNU Affero General Public License, version 3,
-* as published by the Free Software Foundation.
-*
-* This program is distributed in the hope that it will be useful,
-* but WITHOUT ANY WARRANTY; without even the implied warranty of
-* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-* GNU Affero General Public License for more details.
-*
-* You should have received a copy of the GNU Affero General Public License
-* along with this program. If not, see <http://www.gnu.org/licenses/>.
-*
-* As a special exception, the copyright holders give permission to link the
-* code of portions of this program with the OpenSSL library under certain
-* conditions as described in each individual source file and distribute
-* linked combinations including the program with the OpenSSL library. You
-* must comply with the GNU Affero General Public License in all respects for
-* all of the code used other than as permitted herein. If you modify file(s)
-* with this exception, you may extend this exception to your version of the
-* file(s), but you are not obligated to do so. If you do not wish to do so,
-* delete this exception statement from your version. If you delete this
-* exception statement from all source files in the program, then also delete
-* it in the license file.
-*/
-
-#pragma once
-
-#include <boost/filesystem/operations.hpp>
-#include <list>
-#include <memory>
-
-#include "mongo/db/service_context.h"
-#include "mongo/db/storage/mmap_v1/dur_journalformat.h"
-#include "mongo/stdx/mutex.h"
-#include "mongo/util/concurrency/mutex.h"
-
-namespace mongo {
-
-class DurableMappedFile;
-
-namespace dur {
-
-struct ParsedJournalEntry;
-
-/** call go() to execute a recovery from existing journal files.
- */
-class RecoveryJob {
- MONGO_DISALLOW_COPYING(RecoveryJob);
-
-public:
- RecoveryJob();
- ~RecoveryJob();
-
- void go(OperationContext* opCtx, std::vector<boost::filesystem::path>& files);
-
- /** @param data data between header and footer. compressed if recovering. */
- void processSection(OperationContext* opCtx,
- const JSectHeader* h,
- const void* data,
- unsigned len,
- const JSectFooter* f);
-
- // locks and calls _close()
- void close(OperationContext* opCtx);
-
- static RecoveryJob& get() {
- return _instance;
- }
-
-private:
- class Last {
- public:
- Last(OperationContext* opCtx);
-
- DurableMappedFile* newEntry(const ParsedJournalEntry&, RecoveryJob&);
-
- OperationContext* opCtx() {
- return _opCtx;
- }
-
- private:
- OperationContext* _opCtx;
- DurableMappedFile* mmf;
- std::string dbName;
- int fileNo;
- };
-
-
- void write(Last& last, const ParsedJournalEntry& entry); // actually writes to the file
- void applyEntry(Last& last, const ParsedJournalEntry& entry, bool apply, bool dump);
- void applyEntries(OperationContext* opCtx, const std::vector<ParsedJournalEntry>& entries);
- bool processFileBuffer(OperationContext* opCtx, const void*, unsigned len);
- bool processFile(OperationContext* opCtx, boost::filesystem::path journalfile);
- void _close(OperationContext* opCtx); // doesn't lock
-
- // Set of memory mapped files and a mutex to protect them
- stdx::mutex _mx;
- std::list<std::shared_ptr<DurableMappedFile>> _mmfs;
-
- // Are we in recovery or WRITETODATAFILES
- bool _recovering;
-
- unsigned long long _lastDataSyncedFromLastRun;
- unsigned long long _lastSeqSkipped;
- bool _appliedAnySections;
-
-
- static RecoveryJob& _instance;
-};
-
-
-void replayJournalFilesAtStartup();
-}
-}
diff --git a/src/mongo/db/storage/mmap_v1/dur_recovery_unit.cpp b/src/mongo/db/storage/mmap_v1/dur_recovery_unit.cpp
deleted file mode 100644
index ff5b114975f..00000000000
--- a/src/mongo/db/storage/mmap_v1/dur_recovery_unit.cpp
+++ /dev/null
@@ -1,316 +0,0 @@
-/**
- * Copyright (C) 2014 MongoDB Inc.
- *
- * This program is free software: you can redistribute it and/or modify
- * it under the terms of the GNU Affero General Public License, version 3,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU Affero General Public License for more details.
- *
- * You should have received a copy of the GNU Affero General Public License
- * along with this program. If not, see <http://www.gnu.org/licenses/>.
- *
- * As a special exception, the copyright holders give permission to link the
- * code of portions of this program with the OpenSSL library under certain
- * conditions as described in each individual source file and distribute
- * linked combinations including the program with the OpenSSL library. You
- * must comply with the GNU Affero General Public License in all respects for
- * all of the code used other than as permitted herein. If you modify file(s)
- * with this exception, you may extend this exception to your version of the
- * file(s), but you are not obligated to do so. If you do not wish to do so,
- * delete this exception statement from your version. If you delete this
- * exception statement from all source files in the program, then also delete
- * it in the license file.
- */
-
-#define MONGO_LOG_DEFAULT_COMPONENT ::mongo::logger::LogComponent::kStorage
-
-#include "mongo/platform/basic.h"
-
-#include "mongo/db/storage/mmap_v1/dur_recovery_unit.h"
-
-#include <algorithm>
-#include <limits>
-#include <map>
-#include <set>
-#include <string>
-
-#include "mongo/db/storage/mmap_v1/dur.h"
-#include "mongo/db/storage/mmap_v1/durable_mapped_file.h"
-#include "mongo/util/assert_util.h"
-#include "mongo/util/log.h"
-
-namespace mongo {
-
-DurRecoveryUnit::DurRecoveryUnit()
- : _writeCount(0), _writeBytes(0), _inUnitOfWork(false), _rollbackWritesDisabled(false) {}
-
-void DurRecoveryUnit::beginUnitOfWork(OperationContext* opCtx) {
- invariant(!_inUnitOfWork);
- _inUnitOfWork = true;
-}
-
-void DurRecoveryUnit::commitUnitOfWork() {
- invariant(_inUnitOfWork);
-
- commitChanges();
-
- // global journal flush opportunity
- getDur().commitIfNeeded();
-
- resetChanges();
-}
-
-void DurRecoveryUnit::abortUnitOfWork() {
- invariant(_inUnitOfWork);
-
- rollbackChanges();
- resetChanges();
-}
-
-void DurRecoveryUnit::abandonSnapshot() {
- invariant(!_inUnitOfWork);
- // no-op since we have no transaction
-}
-
-void DurRecoveryUnit::commitChanges() {
- if (getDur().isDurable())
- markWritesForJournaling();
-
- try {
- for (Changes::const_iterator it = _changes.begin(), end = _changes.end(); it != end; ++it) {
- (*it)->commit(boost::none);
- }
- } catch (...) {
- std::terminate();
- }
-}
-
-void DurRecoveryUnit::markWritesForJournaling() {
- if (!_writeCount)
- return;
-
- typedef std::pair<void*, unsigned> Intent;
- std::vector<Intent> intents;
- const size_t numStoredWrites = _initialWrites.size() + _mergedWrites.size();
- intents.reserve(numStoredWrites);
-
- // Show very large units of work at LOG(1) level as they may hint at performance issues
- const int logLevel = (_writeCount > 100 * 1000 || _writeBytes > 50 * 1024 * 1024) ? 1 : 3;
-
- LOG(logLevel) << _writeCount << " writes (" << _writeBytes / 1024 << " kB) covered by "
- << numStoredWrites << " pre-images (" << _preimageBuffer.size() / 1024 << " kB) ";
-
- // orders the initial, unmerged writes, by address so we can coalesce overlapping and
- // adjacent writes
- std::sort(_initialWrites.begin(), _initialWrites.end());
-
- if (!_initialWrites.empty()) {
- intents.push_back(std::make_pair(_initialWrites.front().addr, _initialWrites.front().len));
- for (InitialWrites::iterator it = (_initialWrites.begin() + 1), end = _initialWrites.end();
- it != end;
- ++it) {
- Intent& lastIntent = intents.back();
- char* lastEnd = static_cast<char*>(lastIntent.first) + lastIntent.second;
- if (it->addr <= lastEnd) {
- // overlapping or adjacent, so extend.
- ptrdiff_t extendedLen = (it->end()) - static_cast<char*>(lastIntent.first);
- lastIntent.second = std::max(lastIntent.second, unsigned(extendedLen));
- } else {
- // not overlapping, so create a new intent
- intents.push_back(std::make_pair(it->addr, it->len));
- }
- }
- }
-
- MergedWrites::iterator it = _mergedWrites.begin();
- if (it != _mergedWrites.end()) {
- intents.push_back(std::make_pair(it->addr, it->len));
- while (++it != _mergedWrites.end()) {
- // Check the property that write intents are sorted and don't overlap.
- invariant(it->addr >= intents.back().first);
- Intent& lastIntent = intents.back();
- char* lastEnd = static_cast<char*>(lastIntent.first) + lastIntent.second;
- if (it->addr == lastEnd) {
- // adjacent, so extend.
- lastIntent.second += it->len;
- } else {
- // not overlapping, so create a new intent
- invariant(it->addr > lastEnd);
- intents.push_back(std::make_pair(it->addr, it->len));
- }
- }
- }
- LOG(logLevel) << _mergedWrites.size() << " pre-images "
- << "coalesced into " << intents.size() << " write intents";
-
- getDur().declareWriteIntents(intents);
-}
-
-void DurRecoveryUnit::resetChanges() {
- _writeCount = 0;
- _writeBytes = 0;
- _initialWrites.clear();
- _mergedWrites.clear();
- _changes.clear();
- _preimageBuffer.clear();
- _rollbackWritesDisabled = false;
- _inUnitOfWork = false;
-}
-
-void DurRecoveryUnit::rollbackChanges() {
- // First rollback disk writes, then Changes. This matches behavior in other storage engines
- // that either rollback a transaction or don't write a writebatch.
-
- if (_rollbackWritesDisabled) {
- LOG(2) << " ***** NOT ROLLING BACK " << _writeCount << " disk writes";
- } else {
- LOG(2) << " ***** ROLLING BACK " << _writeCount << " disk writes";
-
- // First roll back the merged writes. These have no overlap or ordering requirement
- // other than needing to be rolled back before all _initialWrites.
- for (MergedWrites::iterator it = _mergedWrites.begin(); it != _mergedWrites.end(); ++it) {
- _preimageBuffer.copy(it->addr, it->len, it->offset);
- }
-
- // Then roll back the initial writes in LIFO order, as these might have overlaps.
- for (InitialWrites::reverse_iterator rit = _initialWrites.rbegin();
- rit != _initialWrites.rend();
- ++rit) {
- _preimageBuffer.copy(rit->addr, rit->len, rit->offset);
- }
- }
-
- LOG(2) << " ***** ROLLING BACK " << (_changes.size()) << " custom changes";
-
- try {
- for (int i = _changes.size() - 1; i >= 0; i--) {
- LOG(2) << "CUSTOM ROLLBACK " << redact(demangleName(typeid(*_changes[i])));
- _changes[i]->rollback();
- }
- } catch (...) {
- std::terminate();
- }
-}
-
-bool DurRecoveryUnit::waitUntilDurable() {
- invariant(!_inUnitOfWork);
- return getDur().waitUntilDurable();
-}
-
-void DurRecoveryUnit::mergingWritingPtr(char* addr, size_t len) {
- // The invariant is that all writes are non-overlapping and non-empty. So, a single
- // writingPtr call may result in a number of new segments added. At this point, we cannot
- // in general merge adjacent writes, as that would require inefficient operations on the
- // preimage buffer.
-
- MergedWrites::iterator coveringWrite = _mergedWrites.upper_bound(Write(addr, 0, 0));
-
- char* const end = addr + len;
- while (addr < end) {
- dassert(coveringWrite == _mergedWrites.end() || coveringWrite->end() > addr);
-
- // Determine whether addr[0] is already covered by a write or not.
- // If covered, adjust addr and len to exclude the covered run from addr[0] onwards.
-
- if (coveringWrite != _mergedWrites.end()) {
- char* const cwEnd = coveringWrite->end();
-
- if (coveringWrite->addr <= addr) {
- // If the begin of the covering write at or before addr[0], addr[0] is covered.
- // While the existing pre-image will not generally be the same as the data
- // being written now, during rollback only the oldest pre-image matters.
-
- if (end <= cwEnd) {
- break; // fully covered
- }
-
- addr = cwEnd;
- coveringWrite++;
- dassert(coveringWrite == _mergedWrites.end() || coveringWrite->addr >= cwEnd);
- }
- }
- dassert(coveringWrite == _mergedWrites.end() || coveringWrite->end() > addr);
-
- // If the next coveringWrite overlaps, adjust the end of the uncovered region.
- char* uncoveredEnd = end;
- if (coveringWrite != _mergedWrites.end() && coveringWrite->addr < end) {
- uncoveredEnd = coveringWrite->addr;
- }
-
- const size_t uncoveredLen = uncoveredEnd - addr;
- if (uncoveredLen) {
- // We are writing to a region that hasn't been declared previously.
- _mergedWrites.insert(Write(addr, uncoveredLen, _preimageBuffer.size()));
-
- // Windows requires us to adjust the address space *before* we write to anything.
- privateViews.makeWritable(addr, uncoveredLen);
-
- if (!_rollbackWritesDisabled) {
- _preimageBuffer.append(addr, uncoveredLen);
- }
- addr = uncoveredEnd;
- }
- }
-}
-
-void* DurRecoveryUnit::writingPtr(void* addr, size_t len) {
- invariant(_inUnitOfWork);
-
- if (len == 0) {
- return addr; // Don't need to do anything for empty ranges.
- }
-
- invariant(len < size_t(std::numeric_limits<int>::max()));
-
- _writeCount++;
- _writeBytes += len;
- char* const data = static_cast<char*>(addr);
-
- // The initial writes are stored in a faster, but less memory-efficient way. This will
- // typically be enough for simple operations, where the extra cost of incremental
- // coalescing and merging would be too much. For larger writes, more redundancy is
- // is expected, so the cost of checking for duplicates is offset by savings in copying
- // and allocating preimage buffers. Total memory use of the preimage buffer may be up to
- // kMaxUnmergedPreimageBytes larger than the amount memory covered by the write intents.
-
- const size_t kMaxUnmergedPreimageBytes = kDebugBuild ? 16 * 1024 : 10 * 1024 * 1024;
-
- if (_preimageBuffer.size() + len > kMaxUnmergedPreimageBytes) {
- mergingWritingPtr(data, len);
-
- // After a merged write, no more initial writes can occur or there would be an
- // ordering violation during rollback. So, ensure that the if-condition will be true
- // for any future write regardless of length. This is true now because
- // mergingWritingPtr also will store its first write in _preimageBuffer as well.
- invariant(_preimageBuffer.size() >= kMaxUnmergedPreimageBytes);
-
- return addr;
- }
-
- // Windows requires us to adjust the address space *before* we write to anything.
- privateViews.makeWritable(data, len);
-
- _initialWrites.push_back(Write(data, len, _preimageBuffer.size()));
-
- if (!_rollbackWritesDisabled) {
- _preimageBuffer.append(data, len);
- }
-
- return addr;
-}
-
-void DurRecoveryUnit::setRollbackWritesDisabled() {
- invariant(_inUnitOfWork);
- _rollbackWritesDisabled = true;
-}
-
-void DurRecoveryUnit::registerChange(Change* change) {
- invariant(_inUnitOfWork);
- _changes.push_back(change);
-}
-
-} // namespace mongo
diff --git a/src/mongo/db/storage/mmap_v1/dur_recovery_unit.h b/src/mongo/db/storage/mmap_v1/dur_recovery_unit.h
deleted file mode 100644
index b2c6dc0f20c..00000000000
--- a/src/mongo/db/storage/mmap_v1/dur_recovery_unit.h
+++ /dev/null
@@ -1,171 +0,0 @@
-/**
- * Copyright (C) 2014 MongoDB Inc.
- *
- * This program is free software: you can redistribute it and/or modify
- * it under the terms of the GNU Affero General Public License, version 3,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU Affero General Public License for more details.
- *
- * You should have received a copy of the GNU Affero General Public License
- * along with this program. If not, see <http://www.gnu.org/licenses/>.
- *
- * As a special exception, the copyright holders give permission to link the
- * code of portions of this program with the OpenSSL library under certain
- * conditions as described in each individual source file and distribute
- * linked combinations including the program with the OpenSSL library. You
- * must comply with the GNU Affero General Public License in all respects for
- * all of the code used other than as permitted herein. If you modify file(s)
- * with this exception, you may extend this exception to your version of the
- * file(s), but you are not obligated to do so. If you do not wish to do so,
- * delete this exception statement from your version. If you delete this
- * exception statement from all source files in the program, then also delete
- * it in the license file.
- */
-
-#include <set>
-#include <string>
-#include <utility>
-#include <vector>
-
-#include "mongo/base/owned_pointer_vector.h"
-#include "mongo/db/storage/recovery_unit.h"
-#include "mongo/platform/compiler.h"
-
-#pragma once
-
-namespace mongo {
-
-/**
- * Just pass through to getDur().
- */
-class DurRecoveryUnit : public RecoveryUnit {
-public:
- DurRecoveryUnit();
-
- void beginUnitOfWork(OperationContext* opCtx) final;
- void commitUnitOfWork() final;
- void abortUnitOfWork() final;
-
- virtual bool waitUntilDurable();
-
- virtual void abandonSnapshot();
-
- // The recovery unit takes ownership of change.
- virtual void registerChange(Change* change);
-
- virtual void* writingPtr(void* addr, size_t len);
-
- virtual void setRollbackWritesDisabled();
-
- virtual SnapshotId getSnapshotId() const {
- return SnapshotId();
- }
-
- virtual void setOrderedCommit(bool orderedCommit) {}
-
-private:
- /**
- * Marks writes for journaling, if enabled, and then commits all other Changes in order.
- * Returns with empty _initialWrites, _mergedWrites, _changes and _preimageBuffer, but
- * does not reset the _rollbackWritesDisabled or _mustRollback flags. This leaves the
- * RecoveryUnit ready for more changes that may be committed or rolled back.
- */
- void commitChanges();
-
- /**
- * Creates a list of write intents to be journaled, and hands it of to the active
- * DurabilityInterface.
- */
- void markWritesForJournaling();
-
- /**
- * Restores state by rolling back all writes using the saved pre-images, and then
- * rolling back all other Changes in LIFO order. Resets internal state.
- */
- void rollbackChanges();
-
-
- /**
- * Version of writingPtr that checks existing writes for overlap and only stores those
- * changes not yet covered by an existing write intent and pre-image.
- */
- void mergingWritingPtr(char* data, size_t len);
-
- /**
- * Reset to a clean state without any uncommitted changes or write.
- */
- void resetChanges();
-
- // Changes are ordered from oldest to newest.
- typedef OwnedPointerVector<Change> Changes;
- Changes _changes;
-
-
- // Number of pending uncommitted writes. Incremented even if new write is fully covered by
- // existing writes.
- size_t _writeCount;
- // Total size of the pending uncommitted writes.
- size_t _writeBytes;
-
- /**
- * These are memory writes inside the mmapv1 mmap-ed files. A pointer past the end is just
- * instead of a pointer to the beginning for the benefit of MergedWrites.
- */
- struct Write {
- Write(char* addr, int len, int offset) : addr(addr), len(len), offset(offset) {}
- Write(const Write& rhs) : addr(rhs.addr), len(rhs.len), offset(rhs.offset) {}
- Write() : addr(0), len(0), offset(0) {}
- bool operator<(const Write& rhs) const {
- return addr < rhs.addr;
- }
-
- struct compareEnd {
- bool operator()(const Write& lhs, const Write& rhs) const {
- return lhs.addr + lhs.len < rhs.addr + rhs.len;
- }
- };
-
- char* end() const {
- return addr + len;
- }
-
- char* addr;
- int len;
- int offset; // index into _preimageBuffer
- };
-
- /**
- * Writes are ordered by ending address, so MergedWrites::upper_bound() can find the first
- * overlapping write, if any. Overlapping and duplicate regions are forbidden, as rollback
- * of MergedChanges undoes changes by address rather than LIFO order. In addition, empty
- * regions are not allowed. Storing writes by age does not work well for large indexed
- * arrays, as coalescing is needed to bound the size of the preimage buffer.
- */
- typedef std::set<Write, Write::compareEnd> MergedWrites;
- MergedWrites _mergedWrites;
-
- // Generally it's more efficient to just store pre-images unconditionally and then
- // sort/eliminate duplicates at commit time. However, this can lead to excessive memory
- // use in cases involving large indexes arrays, where the same memory is written many
- // times. To keep the speed for the general case and bound memory use, the first few MB of
- // pre-images are stored unconditionally, but once the threshold has been exceeded, the
- // remainder is stored in a more space-efficient datastructure.
- typedef std::vector<Write> InitialWrites;
- InitialWrites _initialWrites;
-
- std::string _preimageBuffer;
-
- bool _inUnitOfWork;
-
-
- // Default is false.
- // If true, no preimages are tracked. If rollback is subsequently attempted, the process
- // will abort.
- bool _rollbackWritesDisabled;
-};
-
-} // namespace mongo
diff --git a/src/mongo/db/storage/mmap_v1/dur_stats.h b/src/mongo/db/storage/mmap_v1/dur_stats.h
deleted file mode 100644
index 0b3daf7f021..00000000000
--- a/src/mongo/db/storage/mmap_v1/dur_stats.h
+++ /dev/null
@@ -1,96 +0,0 @@
-// @file dur_stats.h
-
-/**
-* Copyright (C) 2012 10gen Inc.
-*
-* This program is free software: you can redistribute it and/or modify
-* it under the terms of the GNU Affero General Public License, version 3,
-* as published by the Free Software Foundation.
-*
-* This program is distributed in the hope that it will be useful,
-* but WITHOUT ANY WARRANTY; without even the implied warranty of
-* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-* GNU Affero General Public License for more details.
-*
-* You should have received a copy of the GNU Affero General Public License
-* along with this program. If not, see <http://www.gnu.org/licenses/>.
-*
-* As a special exception, the copyright holders give permission to link the
-* code of portions of this program with the OpenSSL library under certain
-* conditions as described in each individual source file and distribute
-* linked combinations including the program with the OpenSSL library. You
-* must comply with the GNU Affero General Public License in all respects for
-* all of the code used other than as permitted herein. If you modify file(s)
-* with this exception, you may extend this exception to your version of the
-* file(s), but you are not obligated to do so. If you do not wish to do so,
-* delete this exception statement from your version. If you delete this
-* exception statement from all source files in the program, then also delete
-* it in the license file.
-*/
-
-#include "mongo/db/jsobj.h"
-
-namespace mongo {
-namespace dur {
-
-/**
- * journaling stats. the model here is that the commit thread is the only writer, and that reads
- * are uncommon (from a serverStatus command and such). Thus, there should not be multicore chatter
- * overhead.
- */
-struct Stats {
- struct S {
- std::string _CSVHeader() const;
- std::string _asCSV() const;
-
- void _asObj(BSONObjBuilder* builder) const;
-
- void reset();
-
- uint64_t getCurrentDurationMillis() const {
- return ((curTimeMicros64() - _startTimeMicros) / 1000);
- }
-
-
- // Not reported. Internal use only.
- uint64_t _startTimeMicros;
-
- // Reported statistics
- unsigned _durationMillis;
-
- unsigned _commits;
- unsigned _commitsInWriteLock;
-
- uint64_t _journaledBytes;
- uint64_t _uncompressedBytes;
- uint64_t _writeToDataFilesBytes;
-
- uint64_t _prepLogBufferMicros;
- uint64_t _writeToJournalMicros;
- uint64_t _writeToDataFilesMicros;
- uint64_t _remapPrivateViewMicros;
- uint64_t _commitsMicros;
- uint64_t _commitsInWriteLockMicros;
- };
-
-
- Stats();
- void reset();
-
- BSONObj asObj() const;
-
- const S* curr() const {
- return &_stats[_currIdx];
- }
- S* curr() {
- return &_stats[_currIdx];
- }
-
-private:
- S _stats[5];
- unsigned _currIdx;
-};
-
-extern Stats stats;
-}
-}
diff --git a/src/mongo/db/storage/mmap_v1/durable_mapped_file.cpp b/src/mongo/db/storage/mmap_v1/durable_mapped_file.cpp
deleted file mode 100644
index fd199817f11..00000000000
--- a/src/mongo/db/storage/mmap_v1/durable_mapped_file.cpp
+++ /dev/null
@@ -1,315 +0,0 @@
-// durable_mapped_file.cpp
-
-/**
-* Copyright (C) 2010 10gen Inc.
-*
-* This program is free software: you can redistribute it and/or modify
-* it under the terms of the GNU Affero General Public License, version 3,
-* as published by the Free Software Foundation.
-*
-* This program is distributed in the hope that it will be useful,
-* but WITHOUT ANY WARRANTY; without even the implied warranty of
-* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-* GNU Affero General Public License for more details.
-*
-* You should have received a copy of the GNU Affero General Public License
-* along with this program. If not, see <http://www.gnu.org/licenses/>.
-*
-* As a special exception, the copyright holders give permission to link the
-* code of portions of this program with the OpenSSL library under certain
-* conditions as described in each individual source file and distribute
-* linked combinations including the program with the OpenSSL library. You
-* must comply with the GNU Affero General Public License in all respects for
-* all of the code used other than as permitted herein. If you modify file(s)
-* with this exception, you may extend this exception to your version of the
-* file(s), but you are not obligated to do so. If you do not wish to do so,
-* delete this exception statement from your version. If you delete this
-* exception statement from all source files in the program, then also delete
-* it in the license file.
-*/
-
-/**
- * this module adds some of our layers atop memory mapped files - specifically our handling of
- * private views & such if you don't care about journaling/durability (temp sort files & such) use
- * MemoryMappedFile class, not this.
- */
-
-#define MONGO_LOG_DEFAULT_COMPONENT ::mongo::logger::LogComponent::kStorage
-
-#include "mongo/platform/basic.h"
-
-#include "mongo/db/storage/mmap_v1/durable_mapped_file.h"
-
-#include <utility>
-
-#include "mongo/db/concurrency/d_concurrency.h"
-#include "mongo/db/storage/mmap_v1/dur.h"
-#include "mongo/db/storage/mmap_v1/dur_journalformat.h"
-#include "mongo/db/storage/storage_options.h"
-#include "mongo/util/log.h"
-#include "mongo/util/mongoutils/str.h"
-#include "mongo/util/processinfo.h"
-
-using namespace mongoutils;
-
-namespace mongo {
-
-using std::dec;
-using std::endl;
-using std::hex;
-using std::map;
-using std::pair;
-using std::string;
-
-void DurableMappedFile::remapThePrivateView(OperationContext* opCtx) {
- verify(storageGlobalParams.dur);
-
- _willNeedRemap = false;
-
- // todo 1.9 : it turns out we require that we always remap to the same address.
- // so the remove / add isn't necessary and can be removed?
- void* old = _view_private;
- // privateViews.remove(_view_private);
- _view_private = remapPrivateView(opCtx, _view_private);
- // privateViews.add(_view_private, this);
- fassert(16112, _view_private == old);
-}
-
-/** register view. threadsafe */
-void PointerToDurableMappedFile::add_inlock(void* view, DurableMappedFile* f) {
- verify(view);
- verify(f);
- clearWritableBits_inlock(view, f->length());
- _views.insert(pair<void*, DurableMappedFile*>(view, f));
-}
-
-/** de-register view. threadsafe */
-void PointerToDurableMappedFile::remove(void* view, size_t len) {
- if (view) {
- stdx::lock_guard<stdx::mutex> lk(_m);
- clearWritableBits_inlock(view, len);
- _views.erase(view);
- }
-}
-
-#ifdef _WIN32
-void PointerToDurableMappedFile::clearWritableBits(void* privateView, size_t len) {
- stdx::lock_guard<stdx::mutex> lk(_m);
- clearWritableBits_inlock(privateView, len);
-}
-
-/** notification on unmapping so we can clear writable bits */
-void PointerToDurableMappedFile::clearWritableBits_inlock(void* privateView, size_t len) {
- for (unsigned i = reinterpret_cast<size_t>(privateView) / MemoryMappedCOWBitset::ChunkSize;
- i <= (reinterpret_cast<size_t>(privateView) + len) / MemoryMappedCOWBitset::ChunkSize;
- ++i) {
- writable.clear(i);
- dassert(!writable.get(i));
- }
-}
-
-extern stdx::mutex mapViewMutex;
-
-__declspec(noinline) void PointerToDurableMappedFile::makeChunkWritable(size_t chunkno) {
- stdx::lock_guard<stdx::mutex> lkPrivateViews(_m);
-
- if (writable.get(chunkno)) // double check lock
- return;
-
- // remap all maps in this chunk.
- // common case is a single map, but could have more than one with smallfiles or .ns files
- size_t chunkStart = chunkno * MemoryMappedCOWBitset::ChunkSize;
- size_t chunkNext = chunkStart + MemoryMappedCOWBitset::ChunkSize;
-
- stdx::lock_guard<stdx::mutex> lkMapView(mapViewMutex);
-
- map<void*, DurableMappedFile*>::iterator i = _views.upper_bound((void*)(chunkNext - 1));
- while (1) {
- const pair<void*, DurableMappedFile*> x = *(--i);
- DurableMappedFile* mmf = x.second;
- if (mmf == 0)
- break;
-
- size_t viewStart = reinterpret_cast<size_t>(x.first);
- size_t viewEnd = viewStart + mmf->length();
- if (viewEnd <= chunkStart)
- break;
-
- size_t protectStart = std::max(viewStart, chunkStart);
- dassert(protectStart < chunkNext);
-
- size_t protectEnd = std::min(viewEnd, chunkNext);
- size_t protectSize = protectEnd - protectStart;
- dassert(protectSize > 0 && protectSize <= MemoryMappedCOWBitset::ChunkSize);
-
- DWORD oldProtection;
- bool ok = VirtualProtect(
- reinterpret_cast<void*>(protectStart), protectSize, PAGE_WRITECOPY, &oldProtection);
- if (!ok) {
- DWORD dosError = GetLastError();
-
- if (dosError == ERROR_COMMITMENT_LIMIT) {
- // System has run out of memory between physical RAM & page file, tell the user
- BSONObjBuilder bb;
-
- ProcessInfo p;
- p.getExtraInfo(bb);
-
- severe() << "MongoDB has exhausted the system memory capacity.";
- severe() << "Current Memory Status: " << bb.obj();
- }
-
- severe() << "VirtualProtect for " << mmf->filename() << " chunk " << chunkno
- << " failed with " << errnoWithDescription(dosError) << " (chunk size is "
- << protectSize << ", address is " << hex << protectStart << dec << ")"
- << " in mongo::makeChunkWritable, terminating" << endl;
-
- fassertFailed(16362);
- }
- }
-
- writable.set(chunkno);
-}
-#else
-void PointerToDurableMappedFile::clearWritableBits(void* privateView, size_t len) {}
-
-void PointerToDurableMappedFile::clearWritableBits_inlock(void* privateView, size_t len) {}
-#endif
-
-PointerToDurableMappedFile::PointerToDurableMappedFile() {
-#if defined(SIZE_MAX)
- size_t max = SIZE_MAX;
-#else
- size_t max = ~((size_t)0);
-#endif
- verify(max > (size_t) this); // just checking that no one redef'd SIZE_MAX and that it is sane
-
- // this way we don't need any boundary checking in _find()
- _views.insert(pair<void*, DurableMappedFile*>((void*)0, (DurableMappedFile*)0));
- _views.insert(pair<void*, DurableMappedFile*>((void*)max, (DurableMappedFile*)0));
-}
-
-/** underscore version of find is for when you are already locked
- @param ofs out return our offset in the view
- @return the DurableMappedFile to which this pointer belongs
-*/
-DurableMappedFile* PointerToDurableMappedFile::find_inlock(void* p, /*out*/ size_t& ofs) {
- //
- // .................memory..........................
- // v1 p v2
- // [--------------------] [-------]
- //
- // e.g., _find(p) == v1
- //
- const pair<void*, DurableMappedFile*> x = *(--_views.upper_bound(p));
- DurableMappedFile* mmf = x.second;
- if (mmf) {
- size_t o = ((char*)p) - ((char*)x.first);
- if (o < mmf->length()) {
- ofs = o;
- return mmf;
- }
- }
- return 0;
-}
-
-/** find associated MMF object for a given pointer.
- threadsafe
- @param ofs out returns offset into the view of the pointer, if found.
- @return the DurableMappedFile to which this pointer belongs. null if not found.
-*/
-DurableMappedFile* PointerToDurableMappedFile::find(void* p, /*out*/ size_t& ofs) {
- stdx::lock_guard<stdx::mutex> lk(_m);
- return find_inlock(p, ofs);
-}
-
-PointerToDurableMappedFile privateViews;
-
-// here so that it is precomputed...
-void DurableMappedFile::setPath(const std::string& f) {
- string suffix;
- string prefix;
- bool ok = str::rSplitOn(f, '.', prefix, suffix);
- uassert(13520,
- str::stream() << "DurableMappedFile only supports filenames in a certain format " << f,
- ok);
- if (suffix == "ns")
- _fileSuffixNo = dur::JEntry::DotNsSuffix;
- else
- _fileSuffixNo = (int)str::toUnsigned(suffix);
-
- _p = RelativePath::fromFullPath(storageGlobalParams.dbpath, prefix);
-}
-
-bool DurableMappedFile::open(OperationContext* opCtx, const std::string& fname) {
- LOG(3) << "mmf open " << fname;
- invariant(!_view_write);
-
- setPath(fname);
- _view_write = map(opCtx, fname.c_str());
- fassert(16333, _view_write);
- return finishOpening();
-}
-
-bool DurableMappedFile::create(OperationContext* opCtx,
- const std::string& fname,
- unsigned long long& len) {
- LOG(3) << "mmf create " << fname;
- invariant(!_view_write);
-
- setPath(fname);
- _view_write = map(opCtx, fname.c_str(), len);
- fassert(16332, _view_write);
- return finishOpening();
-}
-
-bool DurableMappedFile::finishOpening() {
- LOG(3) << "mmf finishOpening " << (void*)_view_write << ' ' << filename()
- << " len:" << length();
- if (_view_write) {
- if (storageGlobalParams.dur) {
- stdx::lock_guard<stdx::mutex> lk2(privateViews._mutex());
-
- _view_private = createPrivateMap();
- if (_view_private == 0) {
- severe() << "file " << filename() << " open/create failed in createPrivateMap";
- fassertFailed(13636);
- }
- // note that testIntent builds use this, even though it points to view_write then...
- privateViews.add_inlock(_view_private, this);
- } else {
- _view_private = _view_write;
- }
- return true;
- }
- return false;
-}
-
-void DurableMappedFile::close(OperationContext* opCtx) {
- try {
- LOG(3) << "mmf close " << filename();
-
- // If _view_private was not set, this means file open failed
- if (_view_private) {
- // Notify the durability system that we are closing a file so it can ensure we
- // will not have journaled operations with no corresponding file.
- getDur().closingFileNotification();
- }
-
- privateViews.remove(_view_private, length());
-
- MemoryMappedFile::close(opCtx);
- } catch (...) {
- error() << "exception in DurableMappedFile::close";
- }
-}
-
-DurableMappedFile::DurableMappedFile(OperationContext* opCtx, OptionSet options)
- : MemoryMappedFile(opCtx, options), _willNeedRemap(false) {
- _view_write = _view_private = 0;
-}
-
-DurableMappedFile::~DurableMappedFile() {
- invariant(isClosed());
-}
-}
diff --git a/src/mongo/db/storage/mmap_v1/durable_mapped_file.h b/src/mongo/db/storage/mmap_v1/durable_mapped_file.h
deleted file mode 100644
index 7050156fd25..00000000000
--- a/src/mongo/db/storage/mmap_v1/durable_mapped_file.h
+++ /dev/null
@@ -1,289 +0,0 @@
-// durable_mapped_file.h
-
-/*
-*
-* Copyright (C) 2008 10gen Inc.
-*
-* This program is free software: you can redistribute it and/or modify
-* it under the terms of the GNU Affero General Public License, version 3,
-* as published by the Free Software Foundation.
-*
-* This program is distributed in the hope that it will be useful,
-* but WITHOUT ANY WARRANTY; without even the implied warranty of
-* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-* GNU Affero General Public License for more details.
-*
-* You should have received a copy of the GNU Affero General Public License
-* along with this program. If not, see <http://www.gnu.org/licenses/>.
-*
-* As a special exception, the copyright holders give permission to link the
-* code of portions of this program with the OpenSSL library under certain
-* conditions as described in each individual source file and distribute
-* linked combinations including the program with the OpenSSL library. You
-* must comply with the GNU Affero General Public License in all respects for
-* all of the code used other than as permitted herein. If you modify file(s)
-* with this exception, you may extend this exception to your version of the
-* file(s), but you are not obligated to do so. If you do not wish to do so,
-* delete this exception statement from your version. If you delete this
-* exception statement from all source files in the program, then also delete
-* it in the license file.
-*/
-
-#pragma once
-
-#include "mongo/base/static_assert.h"
-#include "mongo/db/operation_context.h"
-#include "mongo/db/storage/mmap_v1/mmap.h"
-#include "mongo/db/storage/mmap_v1/paths.h"
-#include "mongo/stdx/mutex.h"
-
-namespace mongo {
-
-/**
- * DurableMappedFile adds some layers atop memory mapped files - specifically our handling of
- * private views & such. if you don't care about journaling/durability (temp sort files & such) use
- * MemoryMappedFile class, not this.
- */
-class DurableMappedFile : private MemoryMappedFile {
-protected:
- virtual void* viewForFlushing() {
- return _view_write;
- }
-
-public:
- explicit DurableMappedFile(OperationContext* opCtx, OptionSet options = NONE);
- virtual ~DurableMappedFile();
-
- /**
- * Callers must be holding a `LockMongoFilesExclusive`.
- */
- virtual void close(OperationContext* opCtx);
-
- /** @return true if opened ok. */
- bool open(OperationContext* opCtx, const std::string& fname);
-
- /** @return file length */
- unsigned long long length() const {
- return MemoryMappedFile::length();
- }
-
- std::string filename() const {
- return MemoryMappedFile::filename();
- }
-
- void flush(bool sync) {
- MemoryMappedFile::flush(sync);
- }
-
- /* Creates with length if DNE, otherwise uses existing file length,
- passed length.
- @return true for ok
- */
- bool create(OperationContext* opCtx, const std::string& fname, unsigned long long& len);
-
- /* Get the "standard" view (which is the private one).
- @return the private view.
- */
- void* getView() const {
- return _view_private;
- }
-
- /* Get the "write" view (which is required for writing).
- @return the write view.
- */
- void* view_write() const {
- return _view_write;
- }
-
- /** for a filename a/b/c.3
- filePath() is "a/b/c"
- fileSuffixNo() is 3
- if the suffix is "ns", fileSuffixNo -1
- */
- const RelativePath& relativePath() const {
- DEV verify(!_p._p.empty());
- return _p;
- }
-
- int fileSuffixNo() const {
- return _fileSuffixNo;
- }
- HANDLE getFd() {
- return MemoryMappedFile::getFd();
- }
-
- /** true if we have written.
- set in PREPLOGBUFFER, it is NOT set immediately on write intent declaration.
- reset to false in REMAPPRIVATEVIEW
- */
- bool willNeedRemap() {
- return _willNeedRemap;
- }
- void setWillNeedRemap() {
- _willNeedRemap = true;
- }
-
- void remapThePrivateView(OperationContext* opCtx);
-
- virtual bool isDurableMappedFile() {
- return true;
- }
-
-private:
- void* _view_write;
- void* _view_private;
- bool _willNeedRemap;
- RelativePath _p; // e.g. "somepath/dbname"
- int _fileSuffixNo; // e.g. 3. -1="ns"
-
- void setPath(const std::string& pathAndFileName);
- bool finishOpening();
-};
-
-
-#ifdef _WIN32
-// Simple array based bitset to track COW chunks in memory mapped files on Windows
-// A chunk is a 64MB granular region in virtual memory that we mark as COW everytime we need
-// to write to a memory mapped files on Windows
-//
-class MemoryMappedCOWBitset {
- MONGO_DISALLOW_COPYING(MemoryMappedCOWBitset);
-
-public:
- // Size of the chunks we mark Copy-On-Write with VirtualProtect
- static const unsigned long long ChunkSize = 64 * 1024 * 1024;
-
- // Number of chunks we store in our bitset which are really 32-bit ints
- static const unsigned long long NChunks = 64 * 1024;
-
- // Total Virtual Memory space we can cover with the bitset
- static const unsigned long long MaxChunkMemory = ChunkSize * NChunks * sizeof(unsigned int) * 8;
-
- // Size in bytes of the bitset we allocate
- static const unsigned long long MaxChunkBytes = NChunks * sizeof(unsigned int);
-
- // 128 TB Virtual Memory space in Windows 8.1/2012 R2, 8TB before
- static const unsigned long long MaxWinMemory = 128ULL * 1024 * 1024 * 1024 * 1024;
-
- // Make sure that the chunk memory covers the Max Windows user process VM space
- MONGO_STATIC_ASSERT_MSG(MaxChunkMemory == MaxWinMemory,
- "Need a larger bitset to cover max process VM space");
-
-public:
- MemoryMappedCOWBitset() {
- MONGO_STATIC_ASSERT_MSG(MemoryMappedCOWBitset::MaxChunkBytes == sizeof(bits),
- "Validate our predicted bitset size is correct");
- }
-
- bool get(uintptr_t i) const {
- uintptr_t x = i / 32;
- verify(x < MemoryMappedCOWBitset::NChunks);
- return (bits[x].loadRelaxed() & (1 << (i % 32))) != 0;
- }
-
- // Note: assumes caller holds privateViews.mutex
- void set(uintptr_t i) {
- uintptr_t x = i / 32;
- verify(x < MemoryMappedCOWBitset::NChunks);
- bits[x].store(bits[x].loadRelaxed() | (1 << (i % 32)));
- }
-
- // Note: assumes caller holds privateViews.mutex
- void clear(uintptr_t i) {
- uintptr_t x = i / 32;
- verify(x < MemoryMappedCOWBitset::NChunks);
- bits[x].store(bits[x].loadRelaxed() & ~(1 << (i % 32)));
- }
-
-private:
- // atomic as we are doing double check locking
- AtomicUInt32 bits[MemoryMappedCOWBitset::NChunks];
-};
-#endif
-
-/** for durability support we want to be able to map pointers to specific DurableMappedFile objects.
-*/
-class PointerToDurableMappedFile {
- MONGO_DISALLOW_COPYING(PointerToDurableMappedFile);
-
-public:
- PointerToDurableMappedFile();
-
- /** register view.
- not-threadsafe, caller must hold _mutex()
- */
- void add_inlock(void* view, DurableMappedFile* f);
-
- /** de-register view.
- threadsafe
- */
- void remove(void* view, size_t length);
-
- /** find associated MMF object for a given pointer.
- threadsafe
- @param ofs out returns offset into the view of the pointer, if found.
- @return the DurableMappedFile to which this pointer belongs. null if not found.
- */
- DurableMappedFile* find(void* p, /*out*/ size_t& ofs);
-
- /** for doing many finds in a row with one lock operation */
- stdx::mutex& _mutex() {
- return _m;
- }
-
- /** not-threadsafe, caller must hold _mutex() */
- DurableMappedFile* find_inlock(void* p, /*out*/ size_t& ofs);
-
- /** not-threadsafe, caller must hold _mutex() */
- unsigned numberOfViews_inlock() const {
- return _views.size();
- }
-
- /** make the private map range writable (necessary for our windows implementation) */
- void makeWritable(void*, unsigned len);
-
- void clearWritableBits(void* privateView, size_t len);
-
-private:
- void clearWritableBits_inlock(void* privateView, size_t len);
-
-#ifdef _WIN32
- void makeChunkWritable(size_t chunkno);
-#endif
-
-private:
- // PointerToDurableMappedFile Mutex
- //
- // Protects:
- // Protects internal consistency of data structure
- // Lock Ordering:
- // Must be taken before MapViewMutex if both are taken to prevent deadlocks
- stdx::mutex _m;
- std::map<void*, DurableMappedFile*> _views;
-
-#ifdef _WIN32
- // Tracks which memory mapped regions are marked as Copy on Write
- MemoryMappedCOWBitset writable;
-#endif
-};
-
-#ifdef _WIN32
-inline void PointerToDurableMappedFile::makeWritable(void* privateView, unsigned len) {
- size_t p = reinterpret_cast<size_t>(privateView);
- unsigned a = p / MemoryMappedCOWBitset::ChunkSize;
- unsigned b = (p + len) / MemoryMappedCOWBitset::ChunkSize;
-
- for (unsigned i = a; i <= b; i++) {
- if (!writable.get(i)) {
- makeChunkWritable(i);
- }
- }
-}
-#else
-inline void PointerToDurableMappedFile::makeWritable(void* _p, unsigned len) {}
-#endif
-
-// allows a pointer into any private view of a DurableMappedFile to be resolved to the
-// DurableMappedFile object
-extern PointerToDurableMappedFile privateViews;
-}
diff --git a/src/mongo/db/storage/mmap_v1/durop.cpp b/src/mongo/db/storage/mmap_v1/durop.cpp
deleted file mode 100644
index 627d53df05d..00000000000
--- a/src/mongo/db/storage/mmap_v1/durop.cpp
+++ /dev/null
@@ -1,179 +0,0 @@
-// @file durop.cpp
-
-/**
-* Copyright (C) 2010 10gen Inc.
-*
-* This program is free software: you can redistribute it and/or modify
-* it under the terms of the GNU Affero General Public License, version 3,
-* as published by the Free Software Foundation.
-*
-* This program is distributed in the hope that it will be useful,
-* but WITHOUT ANY WARRANTY; without even the implied warranty of
-* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-* GNU Affero General Public License for more details.
-*
-* You should have received a copy of the GNU Affero General Public License
-* along with this program. If not, see <http://www.gnu.org/licenses/>.
-*
-* As a special exception, the copyright holders give permission to link the
-* code of portions of this program with the OpenSSL library under certain
-* conditions as described in each individual source file and distribute
-* linked combinations including the program with the OpenSSL library. You
-* must comply with the GNU Affero General Public License in all respects for
-* all of the code used other than as permitted herein. If you modify file(s)
-* with this exception, you may extend this exception to your version of the
-* file(s), but you are not obligated to do so. If you do not wish to do so,
-* delete this exception statement from your version. If you delete this
-* exception statement from all source files in the program, then also delete
-* it in the license file.
-*/
-
-#define MONGO_LOG_DEFAULT_COMPONENT ::mongo::logger::LogComponent::kStorage
-
-#include "mongo/platform/basic.h"
-
-#include "mongo/db/storage/mmap_v1/durop.h"
-
-#include <boost/filesystem/operations.hpp>
-
-#include "mongo/db/concurrency/d_concurrency.h"
-#include "mongo/db/storage/mmap_v1/aligned_builder.h"
-#include "mongo/db/storage/mmap_v1/durable_mapped_file.h"
-#include "mongo/db/storage/mmap_v1/file_allocator.h"
-#include "mongo/db/storage/mmap_v1/mmap_v1_engine.h"
-#include "mongo/util/file.h"
-#include "mongo/util/log.h"
-#include "mongo/util/mongoutils/str.h"
-
-namespace mongo {
-
-using std::unique_ptr;
-using std::shared_ptr;
-using std::endl;
-using std::string;
-
-namespace dur {
-
-/** read a durop from journal file referenced by br.
- @param opcode the opcode which has already been written from the bufreader
-*/
-shared_ptr<DurOp> DurOp::read(unsigned opcode, BufReader& br) {
- shared_ptr<DurOp> op;
- switch (opcode) {
- case JEntry::OpCode_FileCreated:
- op = shared_ptr<DurOp>(new FileCreatedOp(br));
- break;
- case JEntry::OpCode_DropDb:
- op = shared_ptr<DurOp>(new DropDbOp(br));
- break;
- default:
- massert(13546,
- (str::stream() << "journal recover: unrecognized opcode in journal " << opcode),
- false);
- }
- return op;
-}
-
-void DurOp::serialize(AlignedBuilder& ab) {
- ab.appendNum(_opcode);
- _serialize(ab);
-}
-
-DropDbOp::DropDbOp(BufReader& log) : DurOp(JEntry::OpCode_DropDb) {
- unsigned long long reserved;
- log.read(reserved);
- log.read(reserved);
- log.readStr(_db);
- string reservedStr;
- log.readStr(reservedStr);
-}
-
-void DropDbOp::_serialize(AlignedBuilder& ab) {
- ab.appendNum((unsigned long long)0); // reserved for future use
- ab.appendNum((unsigned long long)0); // reserved for future use
- ab.appendStr(_db);
- ab.appendStr(""); // reserved
-}
-
-/** throws */
-void DropDbOp::replay() {
- log() << "recover replay drop db " << _db << endl;
- _deleteDataFiles(_db);
-}
-
-FileCreatedOp::FileCreatedOp(const std::string& f, unsigned long long l)
- : DurOp(JEntry::OpCode_FileCreated) {
- _p = RelativePath::fromFullPath(storageGlobalParams.dbpath, f);
- _len = l;
-}
-
-FileCreatedOp::FileCreatedOp(BufReader& log) : DurOp(JEntry::OpCode_FileCreated) {
- unsigned long long reserved;
- log.read(reserved);
- log.read(reserved);
- log.read(_len); // size of file, not length of name
- string s;
- log.readStr(s);
- _p._p = s;
-}
-
-void FileCreatedOp::_serialize(AlignedBuilder& ab) {
- ab.appendNum((unsigned long long)0); // reserved for future use
- ab.appendNum((unsigned long long)0); // reserved for future use
- ab.appendNum(_len);
- ab.appendStr(_p.toString());
-}
-
-string FileCreatedOp::toString() {
- return str::stream() << "FileCreatedOp " << _p.toString() << ' ' << _len / 1024.0 / 1024.0
- << "MB";
-}
-
-// if an operation deletes or creates a file (or moves etc.), it may need files closed.
-bool FileCreatedOp::needFilesClosed() {
- return boost::filesystem::exists(_p.asFullPath());
-}
-
-void FileCreatedOp::replay() {
- // i believe the code assumes new files are filled with zeros. thus we have to recreate the
- // file, or rewrite at least, even if it were the right length. perhaps one day we should
- // change that although easier to avoid defects if we assume it is zeros perhaps.
- string full = _p.asFullPath();
- if (boost::filesystem::exists(full)) {
- try {
- boost::filesystem::remove(full);
- } catch (std::exception& e) {
- LOG(1) << "recover info FileCreateOp::replay unlink " << e.what() << endl;
- }
- }
-
- log() << "recover create file " << full << ' ' << _len / 1024.0 / 1024.0 << "MB" << endl;
- if (boost::filesystem::exists(full)) {
- // first delete if exists.
- try {
- boost::filesystem::remove(full);
- } catch (...) {
- log() << "warning could not delete file " << full << endl;
- }
- }
- ensureParentDirCreated(full);
- File f;
- f.open(full.c_str());
- massert(13547, str::stream() << "recover couldn't create file " << full, f.is_open());
- unsigned long long left = _len;
- const unsigned blksz = 64 * 1024;
- unique_ptr<char[]> v(new char[blksz]);
- memset(v.get(), 0, blksz);
- fileofs ofs = 0;
- while (left) {
- unsigned long long w = left < blksz ? left : blksz;
- f.write(ofs, v.get(), (unsigned)w);
- left -= w;
- ofs += w;
- }
- f.fsync();
- flushMyDirectory(full);
- massert(13628, str::stream() << "recover failure writing file " << full, !f.bad());
-}
-}
-}
diff --git a/src/mongo/db/storage/mmap_v1/durop.h b/src/mongo/db/storage/mmap_v1/durop.h
deleted file mode 100644
index 17a78ff220d..00000000000
--- a/src/mongo/db/storage/mmap_v1/durop.h
+++ /dev/null
@@ -1,129 +0,0 @@
-// @file durop.h class DurOp and descendants
-
-/**
-* Copyright (C) 2010 10gen Inc.
-*
-* This program is free software: you can redistribute it and/or modify
-* it under the terms of the GNU Affero General Public License, version 3,
-* as published by the Free Software Foundation.
-*
-* This program is distributed in the hope that it will be useful,
-* but WITHOUT ANY WARRANTY; without even the implied warranty of
-* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-* GNU Affero General Public License for more details.
-*
-* You should have received a copy of the GNU Affero General Public License
-* along with this program. If not, see <http://www.gnu.org/licenses/>.
-*
-* As a special exception, the copyright holders give permission to link the
-* code of portions of this program with the OpenSSL library under certain
-* conditions as described in each individual source file and distribute
-* linked combinations including the program with the OpenSSL library. You
-* must comply with the GNU Affero General Public License in all respects for
-* all of the code used other than as permitted herein. If you modify file(s)
-* with this exception, you may extend this exception to your version of the
-* file(s), but you are not obligated to do so. If you do not wish to do so,
-* delete this exception statement from your version. If you delete this
-* exception statement from all source files in the program, then also delete
-* it in the license file.
-*/
-
-#pragma once
-
-
-#include "mongo/db/storage/mmap_v1/dur_journalformat.h"
-#include "mongo/db/storage/mmap_v1/paths.h"
-#include "mongo/util/bufreader.h"
-
-namespace mongo {
-
-class AlignedBuilder;
-
-namespace dur {
-
-/** DurOp - Operations we journal that aren't just basic writes.
- *
- * Basic writes are logged as JEntry's, and indicated in ram temporarily as struct
- * dur::WriteIntent. We don't make WriteIntent inherit from DurOp to keep it as lean as possible as
- * there will be millions of them (we don't want a vtable for example there).
- *
- * For each op we want to journal, we define a subclass.
- */
-class DurOp { /* copyable */
-public:
- // @param opcode a sentinel value near max unsigned which uniquely identifies the operation.
- // @see dur::JEntry
- DurOp(unsigned opcode) : _opcode(opcode) {}
-
- virtual ~DurOp() {}
-
- /** serialize the op out to a builder which will then be written (presumably) to the journal */
- void serialize(AlignedBuilder& ab);
-
- /** read a durop from journal file referenced by br.
- @param opcode the opcode which has already been written from the bufreader
- */
- static std::shared_ptr<DurOp> read(unsigned opcode, BufReader& br);
-
- /** replay the operation (during recovery)
- throws
-
- For now, these are not replayed during the normal WRITETODATAFILES phase, since these
- operations are handled in other parts of the code. At some point this may change.
- */
- virtual void replay() = 0;
-
- virtual std::string toString() = 0;
-
- /** if the op requires all file to be closed before doing its work, returns true. */
- virtual bool needFilesClosed() {
- return false;
- }
-
-protected:
- /** DurOp will have already written the opcode for you */
- virtual void _serialize(AlignedBuilder& ab) = 0;
-
-private:
- const unsigned _opcode;
-};
-
-/** indicates creation of a new file */
-class FileCreatedOp : public DurOp {
-public:
- FileCreatedOp(BufReader& log);
- /** param f filename to create with path */
- FileCreatedOp(const std::string& f, unsigned long long l);
- virtual void replay();
- virtual std::string toString();
- virtual bool needFilesClosed();
-
-protected:
- virtual void _serialize(AlignedBuilder& ab);
-
-private:
- RelativePath _p;
- unsigned long long _len; // size of file, not length of name
-};
-
-/** record drop of a database */
-class DropDbOp : public DurOp {
-public:
- DropDbOp(BufReader& log);
- DropDbOp(const std::string& db) : DurOp(JEntry::OpCode_DropDb), _db(db) {}
- virtual void replay();
- virtual std::string toString() {
- return std::string("DropDbOp ") + _db;
- }
- virtual bool needFilesClosed() {
- return true;
- }
-
-protected:
- virtual void _serialize(AlignedBuilder& ab);
-
-private:
- std::string _db;
-};
-}
-}
diff --git a/src/mongo/db/storage/mmap_v1/extent.cpp b/src/mongo/db/storage/mmap_v1/extent.cpp
deleted file mode 100644
index 92dd07933b6..00000000000
--- a/src/mongo/db/storage/mmap_v1/extent.cpp
+++ /dev/null
@@ -1,112 +0,0 @@
-// extent.cpp
-
-/**
-* Copyright (C) 2013 10gen Inc.
-*
-* This program is free software: you can redistribute it and/or modify
-* it under the terms of the GNU Affero General Public License, version 3,
-* as published by the Free Software Foundation.
-*
-* This program is distributed in the hope that it will be useful,
-* but WITHOUT ANY WARRANTY; without even the implied warranty of
-* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-* GNU Affero General Public License for more details.
-*
-* You should have received a copy of the GNU Affero General Public License
-* along with this program. If not, see <http://www.gnu.org/licenses/>.
-*
-* As a special exception, the copyright holders give permission to link the
-* code of portions of this program with the OpenSSL library under certain
-* conditions as described in each individual source file and distribute
-* linked combinations including the program with the OpenSSL library. You
-* must comply with the GNU Affero General Public License in all respects for
-* all of the code used other than as permitted herein. If you modify file(s)
-* with this exception, you may extend this exception to your version of the
-* file(s), but you are not obligated to do so. If you do not wish to do so,
-* delete this exception statement from your version. If you delete this
-* exception statement from all source files in the program, then also delete
-* it in the license file.
-*/
-
-#include "mongo/db/storage/mmap_v1/extent.h"
-
-#include "mongo/base/static_assert.h"
-#include "mongo/db/storage/mmap_v1/extent_manager.h"
-#include "mongo/util/hex.h"
-#include "mongo/util/mongoutils/str.h"
-
-namespace mongo {
-
-using std::iostream;
-using std::string;
-using std::vector;
-
-MONGO_STATIC_ASSERT(sizeof(Extent) - 4 == 48 + 128);
-
-BSONObj Extent::dump() const {
- return BSON("loc" << myLoc.toString() << "xnext" << xnext.toString() << "xprev"
- << xprev.toString()
- << "nsdiag"
- << nsDiagnostic.toString()
- << "size"
- << length
- << "firstRecord"
- << firstRecord.toString()
- << "lastRecord"
- << lastRecord.toString());
-}
-
-void Extent::dump(iostream& s) const {
- s << " loc:" << myLoc.toString() << " xnext:" << xnext.toString()
- << " xprev:" << xprev.toString() << '\n';
- s << " nsdiag:" << nsDiagnostic.toString() << '\n';
- s << " size:" << length << " firstRecord:" << firstRecord.toString()
- << " lastRecord:" << lastRecord.toString() << '\n';
-}
-
-bool Extent::validates(const DiskLoc diskLoc, vector<string>* errors) const {
- bool extentOk = true;
- if (magic != extentSignature) {
- if (errors) {
- StringBuilder sb;
- sb << "bad extent signature " << integerToHex(magic) << " in extent "
- << diskLoc.toString();
- errors->push_back(sb.str());
- }
- extentOk = false;
- }
- if (myLoc != diskLoc) {
- if (errors) {
- StringBuilder sb;
- sb << "extent " << diskLoc.toString() << " self-pointer is " << myLoc.toString();
- errors->push_back(sb.str());
- }
- extentOk = false;
- }
- if (firstRecord.isNull() != lastRecord.isNull()) {
- if (errors) {
- StringBuilder sb;
- if (firstRecord.isNull()) {
- sb << "in extent " << diskLoc.toString()
- << ", firstRecord is null but lastRecord is " << lastRecord.toString();
- } else {
- sb << "in extent " << diskLoc.toString() << ", firstRecord is "
- << firstRecord.toString() << " but lastRecord is null";
- }
- errors->push_back(sb.str());
- }
- extentOk = false;
- }
- static const int minSize = 0x1000;
- if (length < minSize) {
- if (errors) {
- StringBuilder sb;
- sb << "length of extent " << diskLoc.toString() << " is " << length
- << ", which is less than minimum length of " << minSize;
- errors->push_back(sb.str());
- }
- extentOk = false;
- }
- return extentOk;
-}
-}
diff --git a/src/mongo/db/storage/mmap_v1/extent.h b/src/mongo/db/storage/mmap_v1/extent.h
deleted file mode 100644
index 16af89fb42b..00000000000
--- a/src/mongo/db/storage/mmap_v1/extent.h
+++ /dev/null
@@ -1,89 +0,0 @@
-// extent.h
-
-/**
-* Copyright (C) 2013 10gen Inc.
-*
-* This program is free software: you can redistribute it and/or modify
-* it under the terms of the GNU Affero General Public License, version 3,
-* as published by the Free Software Foundation.
-*
-* This program is distributed in the hope that it will be useful,
-* but WITHOUT ANY WARRANTY; without even the implied warranty of
-* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-* GNU Affero General Public License for more details.
-*
-* You should have received a copy of the GNU Affero General Public License
-* along with this program. If not, see <http://www.gnu.org/licenses/>.
-*
-* As a special exception, the copyright holders give permission to link the
-* code of portions of this program with the OpenSSL library under certain
-* conditions as described in each individual source file and distribute
-* linked combinations including the program with the OpenSSL library. You
-* must comply with the GNU Affero General Public License in all respects for
-* all of the code used other than as permitted herein. If you modify file(s)
-* with this exception, you may extend this exception to your version of the
-* file(s), but you are not obligated to do so. If you do not wish to do so,
-* delete this exception statement from your version. If you delete this
-* exception statement from all source files in the program, then also delete
-* it in the license file.
-*/
-
-#pragma once
-
-#include <iosfwd>
-#include <string>
-#include <vector>
-
-#include "mongo/db/storage/mmap_v1/catalog/namespace.h"
-#include "mongo/db/storage/mmap_v1/diskloc.h"
-
-namespace mongo {
-
-/* extents are datafile regions where all the records within the region
- belong to the same namespace.
-
-(11:12:35 AM) dm10gen: when the extent is allocated, all its empty space is stuck into one big
- DeletedRecord
-(11:12:55 AM) dm10gen: and that is placed on the free list
-*/
-#pragma pack(1)
-struct Extent {
- enum { extentSignature = 0x41424344 };
- unsigned magic;
- DiskLoc myLoc;
-
- /* next/prev extent for this namespace */
- DiskLoc xnext;
- DiskLoc xprev;
-
- /* which namespace this extent is for. this is just for troubleshooting really
- and won't even be correct if the collection were renamed!
- */
- Namespace nsDiagnostic;
-
- int length; /* size of the extent, including these fields */
- DiskLoc firstRecord;
- DiskLoc lastRecord;
- char _extentData[4];
-
- // -----
-
- bool validates(const DiskLoc diskLoc, std::vector<std::string>* errors = NULL) const;
-
- BSONObj dump() const;
-
- void dump(std::iostream& s) const;
-
- bool isOk() const {
- return magic == extentSignature;
- }
- void assertOk() const {
- verify(isOk());
- }
-
- static int HeaderSize() {
- return sizeof(Extent) - 4;
- }
-};
-#pragma pack()
-}
diff --git a/src/mongo/db/storage/mmap_v1/extent_manager.cpp b/src/mongo/db/storage/mmap_v1/extent_manager.cpp
deleted file mode 100644
index 15222fac01a..00000000000
--- a/src/mongo/db/storage/mmap_v1/extent_manager.cpp
+++ /dev/null
@@ -1,97 +0,0 @@
-// extent_manager.cpp
-
-/**
-* Copyright (C) 2013 10gen Inc.
-*
-* This program is free software: you can redistribute it and/or modify
-* it under the terms of the GNU Affero General Public License, version 3,
-* as published by the Free Software Foundation.
-*
-* This program is distributed in the hope that it will be useful,
-* but WITHOUT ANY WARRANTY; without even the implied warranty of
-* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-* GNU Affero General Public License for more details.
-*
-* You should have received a copy of the GNU Affero General Public License
-* along with this program. If not, see <http://www.gnu.org/licenses/>.
-*
-* As a special exception, the copyright holders give permission to link the
-* code of portions of this program with the OpenSSL library under certain
-* conditions as described in each individual source file and distribute
-* linked combinations including the program with the OpenSSL library. You
-* must comply with the GNU Affero General Public License in all respects for
-* all of the code used other than as permitted herein. If you modify file(s)
-* with this exception, you may extend this exception to your version of the
-* file(s), but you are not obligated to do so. If you do not wish to do so,
-* delete this exception statement from your version. If you delete this
-* exception statement from all source files in the program, then also delete
-* it in the license file.
-*/
-
-#include "mongo/db/storage/mmap_v1/extent_manager.h"
-
-#include "mongo/db/storage/mmap_v1/extent.h"
-
-namespace mongo {
-
-int ExtentManager::quantizeExtentSize(int size) const {
- if (size == maxSize()) {
- // no point doing quantizing for the entire file
- return size;
- }
-
- invariant(size <= maxSize());
-
- // make sizes align with VM page size
- int newSize = (size + 0xfff) & 0xfffff000;
-
- if (newSize > maxSize()) {
- return maxSize();
- }
-
- if (newSize < minSize()) {
- return minSize();
- }
-
- return newSize;
-}
-
-int ExtentManager::followupSize(int len, int lastExtentLen) const {
- invariant(len < maxSize());
- int x = initialSize(len);
- // changed from 1.20 to 1.35 in v2.1.x to get to larger extent size faster
- int y = (int)(lastExtentLen < 4000000 ? lastExtentLen * 4.0 : lastExtentLen * 1.35);
- int sz = y > x ? y : x;
-
- if (sz < lastExtentLen) {
- // this means there was an int overflow
- // so we should turn it into maxSize
- return maxSize();
- } else if (sz > maxSize()) {
- return maxSize();
- }
-
- sz = quantizeExtentSize(sz);
- verify(sz >= len);
-
- return sz;
-}
-
-int ExtentManager::initialSize(int len) const {
- invariant(len <= maxSize());
-
- long long sz = len * 16;
- if (len < 1000)
- sz = len * 64;
-
- if (sz >= maxSize())
- return maxSize();
-
- if (sz <= minSize())
- return minSize();
-
- int z = ExtentManager::quantizeExtentSize(sz);
- verify(z >= len);
- return z;
-}
-}
diff --git a/src/mongo/db/storage/mmap_v1/extent_manager.h b/src/mongo/db/storage/mmap_v1/extent_manager.h
deleted file mode 100644
index 6b0e18c44f3..00000000000
--- a/src/mongo/db/storage/mmap_v1/extent_manager.h
+++ /dev/null
@@ -1,197 +0,0 @@
-// extent_manager.h
-
-/**
-* Copyright (C) 2013 10gen Inc.
-*
-* This program is free software: you can redistribute it and/or modify
-* it under the terms of the GNU Affero General Public License, version 3,
-* as published by the Free Software Foundation.
-*
-* This program is distributed in the hope that it will be useful,
-* but WITHOUT ANY WARRANTY; without even the implied warranty of
-* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-* GNU Affero General Public License for more details.
-*
-* You should have received a copy of the GNU Affero General Public License
-* along with this program. If not, see <http://www.gnu.org/licenses/>.
-*
-* As a special exception, the copyright holders give permission to link the
-* code of portions of this program with the OpenSSL library under certain
-* conditions as described in each individual source file and distribute
-* linked combinations including the program with the OpenSSL library. You
-* must comply with the GNU Affero General Public License in all respects for
-* all of the code used other than as permitted herein. If you modify file(s)
-* with this exception, you may extend this exception to your version of the
-* file(s), but you are not obligated to do so. If you do not wish to do so,
-* delete this exception statement from your version. If you delete this
-* exception statement from all source files in the program, then also delete
-* it in the license file.
-*/
-
-#pragma once
-
-#include <memory>
-#include <string>
-#include <vector>
-
-#include "mongo/base/status.h"
-#include "mongo/base/string_data.h"
-#include "mongo/db/storage/mmap_v1/diskloc.h"
-
-namespace mongo {
-
-class DataFile;
-class DataFileVersion;
-class MmapV1RecordHeader;
-class RecordFetcher;
-class OperationContext;
-
-struct Extent;
-
-/**
- * ExtentManager basics
- * - one per database
- * - responsible for managing <db>.# files
- * - NOT responsible for .ns file
- * - gives out extents
- * - responsible for figuring out how to get a new extent
- * - can use any method it wants to do so
- * - this structure is NOT stored on disk
- * - files will not be removed from the EM
- * - extent size and loc are immutable
- * - this class is thread safe, once constructed and init()-ialized
- */
-class ExtentManager {
- MONGO_DISALLOW_COPYING(ExtentManager);
-
-public:
- ExtentManager() {}
-
- class Factory {
- public:
- virtual ~Factory() = default;
- virtual std::unique_ptr<ExtentManager> create(StringData dbname,
- StringData path,
- bool directoryPerDB) = 0;
- };
-
- virtual ~ExtentManager() {}
-
- virtual void close(OperationContext* opCtx) = 0;
-
- /**
- * opens all current files
- */
- virtual Status init(OperationContext* opCtx) = 0;
-
- virtual int numFiles() const = 0;
- virtual long long fileSize() const = 0;
-
- // must call Extent::reuse on the returned extent
- virtual DiskLoc allocateExtent(OperationContext* opCtx,
- bool capped,
- int size,
- bool enforceQuota) = 0;
-
- /**
- * firstExt has to be == lastExt or a chain
- */
- virtual void freeExtents(OperationContext* opCtx, DiskLoc firstExt, DiskLoc lastExt) = 0;
-
- /**
- * frees a single extent
- * ignores all fields in the Extent except: magic, myLoc, length
- */
- virtual void freeExtent(OperationContext* opCtx, DiskLoc extent) = 0;
-
- /**
- * Retrieve statistics on the the free list managed by this ExtentManger.
- * @param numExtents - non-null pointer to an int that will receive the number of extents
- * @param totalFreeSizeBytes - non-null pointer to an int64_t receiving the total free
- * space in the free list.
- */
- virtual void freeListStats(OperationContext* opCtx,
- int* numExtents,
- int64_t* totalFreeSizeBytes) const = 0;
-
- /**
- * @param loc - has to be for a specific MmapV1RecordHeader
- * Note(erh): this sadly cannot be removed.
- * A MmapV1RecordHeader DiskLoc has an offset from a file, while a RecordStore really wants an
- * offset from an extent. This intrinsically links an original record store to the original
- * extent manager.
- */
- virtual MmapV1RecordHeader* recordForV1(const DiskLoc& loc) const = 0;
-
- /**
- * The extent manager tracks accesses to DiskLocs. This returns non-NULL if the DiskLoc has
- * been recently accessed, and therefore has likely been paged into physical memory.
- * Returns nullptr if the DiskLoc is Null.
- *
- */
- virtual std::unique_ptr<RecordFetcher> recordNeedsFetch(const DiskLoc& loc) const = 0;
-
- /**
- * @param loc - has to be for a specific MmapV1RecordHeader (not an Extent)
- * Note(erh) see comment on recordFor
- */
- virtual Extent* extentForV1(const DiskLoc& loc) const = 0;
-
- /**
- * @param loc - has to be for a specific MmapV1RecordHeader (not an Extent)
- * Note(erh) see comment on recordFor
- */
- virtual DiskLoc extentLocForV1(const DiskLoc& loc) const = 0;
-
- /**
- * @param loc - has to be for a specific Extent
- */
- virtual Extent* getExtent(const DiskLoc& loc, bool doSanityCheck = true) const = 0;
-
- /**
- * @return maximum size of an Extent
- */
- virtual int maxSize() const = 0;
-
- /**
- * @return minimum size of an Extent
- */
- virtual int minSize() const {
- return 0x1000;
- }
-
- /**
- * @param recordLen length of record we need
- * @param lastExt size of last extent which is a factor in next extent size
- */
- virtual int followupSize(int recordLen, int lastExtentLen) const;
-
- /** get a suggested size for the first extent in a namespace
- * @param recordLen length of record we need to insert
- */
- virtual int initialSize(int recordLen) const;
-
- /**
- * quantizes extent size to >= min + page boundary
- */
- virtual int quantizeExtentSize(int size) const;
-
- // see cacheHint methods
- enum HintType { Sequential, Random };
- class CacheHint {
- public:
- virtual ~CacheHint() {}
- };
- /**
- * Tell the system that for this extent, it will have this kind of disk access.
- * Caller takes owernship of CacheHint
- */
- virtual CacheHint* cacheHint(const DiskLoc& extentLoc, const HintType& hint) = 0;
-
- virtual DataFileVersion getFileFormat(OperationContext* opCtx) const = 0;
- virtual void setFileFormat(OperationContext* opCtx, DataFileVersion newVersion) = 0;
-
- virtual const DataFile* getOpenFile(int n) const = 0;
-};
-
-} // namespace mongo
diff --git a/src/mongo/db/storage/mmap_v1/file_allocator.cpp b/src/mongo/db/storage/mmap_v1/file_allocator.cpp
deleted file mode 100644
index daf9a13c659..00000000000
--- a/src/mongo/db/storage/mmap_v1/file_allocator.cpp
+++ /dev/null
@@ -1,492 +0,0 @@
-// @file file_allocator.cpp
-
-/* Copyright 2009 10gen Inc.
- *
- * This program is free software: you can redistribute it and/or modify
- * it under the terms of the GNU Affero General Public License, version 3,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU Affero General Public License for more details.
- *
- * You should have received a copy of the GNU Affero General Public License
- * along with this program. If not, see <http://www.gnu.org/licenses/>.
- *
- * As a special exception, the copyright holders give permission to link the
- * code of portions of this program with the OpenSSL library under certain
- * conditions as described in each individual source file and distribute
- * linked combinations including the program with the OpenSSL library. You
- * must comply with the GNU Affero General Public License in all respects
- * for all of the code used other than as permitted herein. If you modify
- * file(s) with this exception, you may extend this exception to your
- * version of the file(s), but you are not obligated to do so. If you do not
- * wish to do so, delete this exception statement from your version. If you
- * delete this exception statement from all source files in the program,
- * then also delete it in the license file.
- */
-
-#define MONGO_LOG_DEFAULT_COMPONENT ::mongo::logger::LogComponent::kStorage
-
-#include "mongo/platform/basic.h"
-
-#include "mongo/db/storage/mmap_v1/file_allocator.h"
-
-#include <boost/filesystem/operations.hpp>
-#include <errno.h>
-#include <fcntl.h>
-
-#if defined(__FreeBSD__)
-#include <sys/mount.h>
-#include <sys/param.h>
-#endif
-
-#if defined(__linux__)
-#include <sys/vfs.h>
-#endif
-
-#if defined(_WIN32)
-#include <io.h>
-#endif
-
-#include "mongo/db/storage/mmap_v1/paths.h"
-#include "mongo/platform/posix_fadvise.h"
-#include "mongo/stdx/functional.h"
-#include "mongo/stdx/thread.h"
-#include "mongo/util/concurrency/idle_thread_block.h"
-#include "mongo/util/concurrency/thread_name.h"
-#include "mongo/util/fail_point.h"
-#include "mongo/util/fail_point_service.h"
-#include "mongo/util/log.h"
-#include "mongo/util/mongoutils/str.h"
-#include "mongo/util/processinfo.h"
-#include "mongo/util/text.h"
-#include "mongo/util/time_support.h"
-#include "mongo/util/timer.h"
-
-using namespace mongoutils;
-
-#ifndef O_NOATIME
-#define O_NOATIME (0)
-#endif
-
-namespace mongo {
-
-using std::endl;
-using std::list;
-using std::string;
-using std::stringstream;
-
-// unique number for temporary file names
-unsigned long long FileAllocator::_uniqueNumber = 0;
-static SimpleMutex _uniqueNumberMutex;
-
-MONGO_FAIL_POINT_DEFINE(allocateDiskFull);
-
-/**
- * Aliases for Win32 CRT functions
- */
-#if defined(_WIN32)
-static inline long lseek(int fd, long offset, int origin) {
- return _lseek(fd, offset, origin);
-}
-static inline int write(int fd, const void* data, int count) {
- return _write(fd, data, count);
-}
-static inline int close(int fd) {
- return _close(fd);
-}
-
-typedef BOOL(CALLBACK* GetVolumeInformationByHandleWPtr)(
- HANDLE, LPWSTR, DWORD, LPDWORD, LPDWORD, LPDWORD, LPWSTR, DWORD);
-GetVolumeInformationByHandleWPtr GetVolumeInformationByHandleWFunc;
-
-MONGO_INITIALIZER(InitGetVolumeInformationByHandleW)(InitializerContext* context) {
- HMODULE kernelLib = LoadLibraryA("kernel32.dll");
- if (kernelLib) {
- GetVolumeInformationByHandleWFunc = reinterpret_cast<GetVolumeInformationByHandleWPtr>(
- GetProcAddress(kernelLib, "GetVolumeInformationByHandleW"));
- }
- return Status::OK();
-}
-#endif
-
-boost::filesystem::path ensureParentDirCreated(const boost::filesystem::path& p) {
- const boost::filesystem::path parent = p.branch_path();
-
- if (!boost::filesystem::exists(parent)) {
- ensureParentDirCreated(parent);
- log() << "creating directory " << parent.string() << endl;
- boost::filesystem::create_directory(parent);
- flushMyDirectory(parent); // flushes grandparent to ensure parent exists after crash
- }
-
- verify(boost::filesystem::is_directory(parent));
- return parent;
-}
-
-FileAllocator::FileAllocator() : _failed() {}
-
-
-void FileAllocator::start() {
- stdx::thread t([this] { run(this); });
- t.detach();
-}
-
-void FileAllocator::requestAllocation(const string& name, long& size) {
- stdx::lock_guard<stdx::mutex> lk(_pendingMutex);
- if (_failed)
- return;
- long oldSize = prevSize(name);
- if (oldSize != -1) {
- size = oldSize;
- return;
- }
- _pending.push_back(name);
- _pendingSize[name] = size;
- _pendingUpdated.notify_all();
-}
-
-void FileAllocator::allocateAsap(const string& name, unsigned long long& size) {
- stdx::unique_lock<stdx::mutex> lk(_pendingMutex);
-
- // In case the allocator is in failed state, check once before starting so that subsequent
- // requests for the same database would fail fast after the first one has failed.
- checkFailure();
-
- long oldSize = prevSize(name);
- if (oldSize != -1) {
- size = oldSize;
- if (!inProgress(name))
- return;
- }
- checkFailure();
- _pendingSize[name] = size;
- if (_pending.size() == 0)
- _pending.push_back(name);
- else if (_pending.front() != name) {
- _pending.remove(name);
- list<string>::iterator i = _pending.begin();
- ++i;
- _pending.insert(i, name);
- }
- _pendingUpdated.notify_all();
- while (inProgress(name)) {
- checkFailure();
- _pendingUpdated.wait(lk);
- }
-}
-
-void FileAllocator::waitUntilFinished() const {
- if (_failed)
- return;
- stdx::unique_lock<stdx::mutex> lk(_pendingMutex);
- while (_pending.size() != 0)
- _pendingUpdated.wait(lk);
-}
-
-// TODO: pull this out to per-OS files once they exist
-static bool useSparseFiles(int fd) {
-#if defined(__linux__) || defined(__FreeBSD__)
- struct statfs fs_stats;
- int ret = fstatfs(fd, &fs_stats);
- uassert(16062, "fstatfs failed: " + errnoWithDescription(), ret == 0);
-#endif
-
-#if defined(__linux__)
-// these are from <linux/magic.h> but that isn't available on all systems
-#define NFS_SUPER_MAGIC 0x6969
-#define TMPFS_MAGIC 0x01021994
-#define ZFS_SUPER_MAGIC 0x2fc12fc1
- return (fs_stats.f_type == NFS_SUPER_MAGIC) || (fs_stats.f_type == TMPFS_MAGIC) ||
- (fs_stats.f_type == ZFS_SUPER_MAGIC);
-
-#elif defined(__FreeBSD__)
-
- return (str::equals(fs_stats.f_fstypename, "zfs") ||
- str::equals(fs_stats.f_fstypename, "nfs") ||
- str::equals(fs_stats.f_fstypename, "oldnfs"));
-
-#elif defined(__sun)
- // assume using ZFS which is copy-on-write so no benefit to zero-filling
- // TODO: check which fs we are using like we do elsewhere
- return true;
-#else
- return false;
-#endif
-}
-
-#if defined(_WIN32)
-static bool isFileOnNTFSVolume(int fd) {
- if (!GetVolumeInformationByHandleWFunc) {
- warning() << "Could not retrieve pointer to GetVolumeInformationByHandleW function";
- return false;
- }
-
- HANDLE fileHandle = (HANDLE)_get_osfhandle(fd);
- if (fileHandle == INVALID_HANDLE_VALUE) {
- warning() << "_get_osfhandle() failed with " << _strerror(NULL);
- return false;
- }
-
- WCHAR fileSystemName[MAX_PATH + 1];
- if (!GetVolumeInformationByHandleWFunc(
- fileHandle, NULL, 0, NULL, 0, NULL, fileSystemName, sizeof(fileSystemName))) {
- DWORD gle = GetLastError();
- warning() << "GetVolumeInformationByHandleW failed with " << errnoWithDescription(gle);
- return false;
- }
-
- return lstrcmpW(fileSystemName, L"NTFS") == 0;
-}
-#endif
-
-void FileAllocator::ensureLength(int fd, long size) {
- // Test running out of disk scenarios
- if (MONGO_FAIL_POINT(allocateDiskFull)) {
- uasserted(10444, "File allocation failed due to failpoint.");
- }
-
-#if !defined(_WIN32)
- if (useSparseFiles(fd)) {
- LOG(1) << "using ftruncate to create a sparse file" << endl;
- int ret = ftruncate(fd, size);
- uassert(16063, "ftruncate failed: " + errnoWithDescription(), ret == 0);
- return;
- }
-#endif
-
-#if defined(__linux__)
- int ret = posix_fallocate(fd, 0, size);
- if (ret == 0) {
- LOG(1) << "used fallocate to create empty file";
- return;
- }
-
- log() << "FileAllocator: posix_fallocate failed: " << errnoWithDescription(ret)
- << " falling back" << endl;
-#endif
-
- off_t filelen = lseek(fd, 0, SEEK_END);
- if (filelen < size) {
- if (filelen != 0) {
- stringstream ss;
- ss << "failure creating new datafile; lseek failed for fd " << fd
- << " with errno: " << errnoWithDescription();
- uassert(10440, ss.str(), filelen == 0);
- }
- // Check for end of disk.
-
- uassert(10441,
- str::stream() << "Unable to allocate new file of size " << size << ' '
- << errnoWithDescription(),
- size - 1 == lseek(fd, size - 1, SEEK_SET));
- uassert(10442,
- str::stream() << "Unable to allocate new file of size " << size << ' '
- << errnoWithDescription(),
- 1 == write(fd, "", 1));
-
- // File expansion is completed here. Do not do the zeroing out on OS-es where there
- // is no risk of triggering allocation-related bugs such as
- // http://support.microsoft.com/kb/2731284.
- //
- if (!ProcessInfo::isDataFileZeroingNeeded()) {
- return;
- }
-
-#if defined(_WIN32)
- if (!isFileOnNTFSVolume(fd)) {
- log() << "No need to zero out datafile on non-NTFS volume" << endl;
- return;
- }
-#endif
-
- lseek(fd, 0, SEEK_SET);
-
- log() << "filling with zeroes...";
- const long z = 256 * 1024;
- const std::unique_ptr<char[]> buf_holder(new char[z]);
- char* buf = buf_holder.get();
- memset(buf, 0, z);
- long left = size;
- while (left > 0) {
- long towrite = left;
- if (towrite > z)
- towrite = z;
-
- int written = write(fd, buf, towrite);
- uassert(10443, errnoWithPrefix("FileAllocator: file write failed"), written > 0);
- left -= written;
- }
- }
-}
-
-void FileAllocator::checkFailure() {
- if (_failed) {
- // we want to log the problem (diskfull.js expects it) but we do not want to dump a stack
- // trace
- msgasserted(12520, "new file allocation failure");
- }
-}
-
-long FileAllocator::prevSize(const string& name) const {
- if (_pendingSize.count(name) > 0)
- return _pendingSize[name];
- if (boost::filesystem::exists(name))
- return boost::filesystem::file_size(name);
- return -1;
-}
-
-// caller must hold _pendingMutex lock.
-bool FileAllocator::inProgress(const string& name) const {
- for (list<string>::const_iterator i = _pending.begin(); i != _pending.end(); ++i)
- if (*i == name)
- return true;
- return false;
-}
-
-string FileAllocator::makeTempFileName(boost::filesystem::path root) {
- while (1) {
- boost::filesystem::path p = root / "_tmp";
- stringstream ss;
- unsigned long long thisUniqueNumber;
- {
- // increment temporary file name counter
- // TODO: SERVER-6055 -- Unify temporary file name selection
- stdx::lock_guard<SimpleMutex> lk(_uniqueNumberMutex);
- thisUniqueNumber = _uniqueNumber;
- ++_uniqueNumber;
- }
- ss << thisUniqueNumber;
- p /= ss.str();
- string fn = p.string();
- if (!boost::filesystem::exists(p))
- return fn;
- }
- return "";
-}
-
-void FileAllocator::run(FileAllocator* fa) {
- setThreadName("FileAllocator");
- {
- // initialize unique temporary file name counter
- // TODO: SERVER-6055 -- Unify temporary file name selection
- stdx::lock_guard<SimpleMutex> lk(_uniqueNumberMutex);
- _uniqueNumber = curTimeMicros64();
- }
- while (1) {
- {
- stdx::unique_lock<stdx::mutex> lk(fa->_pendingMutex);
- if (fa->_pending.size() == 0) {
- MONGO_IDLE_THREAD_BLOCK;
- fa->_pendingUpdated.wait(lk);
- }
- }
- while (1) {
- string name;
- long size = 0;
- {
- stdx::lock_guard<stdx::mutex> lk(fa->_pendingMutex);
- if (fa->_pending.size() == 0)
- break;
- name = fa->_pending.front();
- size = fa->_pendingSize[name];
- }
-
- string tmp;
- long fd = 0;
- try {
- log() << "allocating new datafile " << name;
-
- boost::filesystem::path parent = ensureParentDirCreated(name);
- tmp = fa->makeTempFileName(parent);
- ensureParentDirCreated(tmp);
-
-#if defined(_WIN32)
- fd = _wopen(toNativeString(tmp.c_str()).c_str(),
- _O_RDWR | _O_CREAT | O_NOATIME,
- _S_IREAD | _S_IWRITE);
-#else
- fd = open(tmp.c_str(), O_CREAT | O_RDWR | O_NOATIME, S_IRUSR | S_IWUSR);
-#endif
- if (fd < 0) {
- log() << "FileAllocator: couldn't create " << name << " (" << tmp << ") "
- << errnoWithDescription() << endl;
- uasserted(10439, "");
- }
-
-#if defined(POSIX_FADV_DONTNEED)
- if (posix_fadvise(fd, 0, size, POSIX_FADV_DONTNEED)) {
- log() << "warning: posix_fadvise fails " << name << " (" << tmp << ") "
- << errnoWithDescription() << endl;
- }
-#endif
-
- Timer t;
-
- /* make sure the file is the full desired length */
- ensureLength(fd, size);
-
- close(fd);
- fd = 0;
-
- boost::system::error_code ec;
- boost::filesystem::rename(tmp.c_str(), name.c_str(), ec);
- if (ec) {
- const string& errMessage = str::stream() << "error: couldn't rename " << tmp
- << " to " << name << ' '
- << ec.message();
- msgasserted(13653, errMessage);
- }
-
- flushMyDirectory(name);
-
- log() << "done allocating datafile " << name << ", "
- << "size: " << size / 1024 / 1024 << "MB, "
- << " took " << ((double)t.millis()) / 1000.0 << " secs" << endl;
-
- // no longer in a failed state. allow new writers.
- fa->_failed = false;
- } catch (const std::exception& e) {
- log() << "error: failed to allocate new file: " << name << " size: " << size << ' '
- << e.what() << ". will try again in 10 seconds" << endl;
- if (fd > 0)
- close(fd);
- try {
- if (!tmp.empty())
- boost::filesystem::remove(tmp);
- boost::filesystem::remove(name);
- } catch (const std::exception& e) {
- log() << "error removing files: " << e.what() << endl;
- }
-
- {
- stdx::lock_guard<stdx::mutex> lk(fa->_pendingMutex);
- fa->_failed = true;
-
- // TODO: Should we remove the file from pending?
- fa->_pendingUpdated.notify_all();
- }
-
-
- sleepsecs(10);
- continue;
- }
-
- {
- stdx::lock_guard<stdx::mutex> lk(fa->_pendingMutex);
- fa->_pendingSize.erase(name);
- fa->_pending.pop_front();
- fa->_pendingUpdated.notify_all();
- }
- }
- }
-}
-
-FileAllocator* FileAllocator::get() {
- static FileAllocator instance;
- return &instance;
-}
-
-} // namespace mongo
diff --git a/src/mongo/db/storage/mmap_v1/file_allocator.h b/src/mongo/db/storage/mmap_v1/file_allocator.h
deleted file mode 100644
index 589cf908dc0..00000000000
--- a/src/mongo/db/storage/mmap_v1/file_allocator.h
+++ /dev/null
@@ -1,105 +0,0 @@
-// @file file_allocator.h
-
-/* Copyright 2009 10gen Inc.
- *
- * This program is free software: you can redistribute it and/or modify
- * it under the terms of the GNU Affero General Public License, version 3,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU Affero General Public License for more details.
- *
- * You should have received a copy of the GNU Affero General Public License
- * along with this program. If not, see <http://www.gnu.org/licenses/>.
- *
- * As a special exception, the copyright holders give permission to link the
- * code of portions of this program with the OpenSSL library under certain
- * conditions as described in each individual source file and distribute
- * linked combinations including the program with the OpenSSL library. You
- * must comply with the GNU Affero General Public License in all respects
- * for all of the code used other than as permitted herein. If you modify
- * file(s) with this exception, you may extend this exception to your
- * version of the file(s), but you are not obligated to do so. If you do not
- * wish to do so, delete this exception statement from your version. If you
- * delete this exception statement from all source files in the program,
- * then also delete it in the license file.
- */
-
-#include <boost/filesystem/path.hpp>
-#include <list>
-#include <map>
-
-#include "mongo/stdx/condition_variable.h"
-#include "mongo/stdx/mutex.h"
-#include "mongo/util/concurrency/mutex.h"
-
-namespace mongo {
-
-/*
- * Handles allocation of contiguous files on disk. Allocation may be
- * requested asynchronously or synchronously.
- * singleton
- */
-class FileAllocator {
- MONGO_DISALLOW_COPYING(FileAllocator);
- /*
- * The public functions may not be called concurrently. The allocation
- * functions may be called multiple times per file, but only the first
- * size specified per file will be used.
- */
-public:
- void start();
-
- /**
- * May be called if file exists. If file exists, or its allocation has
- * been requested, size is updated to match existing file size.
- */
- void requestAllocation(const std::string& name, long& size);
-
-
- /**
- * Returns when file has been allocated. If file exists, size is
- * updated to match existing file size.
- */
- void allocateAsap(const std::string& name, unsigned long long& size);
-
- void waitUntilFinished() const;
-
- static void ensureLength(int fd, long size);
-
- /** @return the singleton */
- static FileAllocator* get();
-
-private:
- FileAllocator();
-
- void checkFailure();
-
- // caller must hold pendingMutex_ lock. Returns size if allocated or
- // allocation requested, -1 otherwise.
- long prevSize(const std::string& name) const;
-
- // caller must hold pendingMutex_ lock.
- bool inProgress(const std::string& name) const;
-
- /** called from the worked thread */
- static void run(FileAllocator* fa);
-
- // generate a unique name for temporary files
- std::string makeTempFileName(boost::filesystem::path root);
-
- mutable stdx::mutex _pendingMutex;
- mutable stdx::condition_variable _pendingUpdated;
-
- std::list<std::string> _pending;
- mutable std::map<std::string, long> _pendingSize;
-
- // unique number for temporary files
- static unsigned long long _uniqueNumber;
-
- bool _failed;
-};
-
-} // namespace mongo
diff --git a/src/mongo/db/storage/mmap_v1/heap_record_store_btree.cpp b/src/mongo/db/storage/mmap_v1/heap_record_store_btree.cpp
deleted file mode 100644
index 093808ea9c8..00000000000
--- a/src/mongo/db/storage/mmap_v1/heap_record_store_btree.cpp
+++ /dev/null
@@ -1,164 +0,0 @@
-// heap_record_store_btree.cpp
-
-/**
- * Copyright (C) 2014 MongoDB Inc.
- *
- * This program is free software: you can redistribute it and/or modify
- * it under the terms of the GNU Affero General Public License, version 3,
- * as published by the Free Software Foundation.
- *
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU Affero General Public License for more details.
- *
- * You should have received a copy of the GNU Affero General Public License
- * along with this program. If not, see <http://www.gnu.org/licenses/>.
- *
- * As a special exception, the copyright holders give permission to link the
- * code of portions of this program with the OpenSSL library under certain
- * conditions as described in each individual source file and distribute
- * linked combinations including the program with the OpenSSL library. You
- * must comply with the GNU Affero General Public License in all respects for
- * all of the code used other than as permitted herein. If you modify file(s)
- * with this exception, you may extend this exception to your version of the
- * file(s), but you are not obligated to do so. If you do not wish to do so,
- * delete this exception statement from your version. If you delete this
- * exception statement from all source files in the program, then also delete
- * it in the license file.
- */
-
-#define MONGO_LOG_DEFAULT_COMPONENT ::mongo::logger::LogComponent::kStorage
-
-#include "mongo/platform/basic.h"
-
-#include "mongo/db/storage/mmap_v1/heap_record_store_btree.h"
-
-#include "mongo/base/checked_cast.h"
-#include "mongo/db/operation_context.h"
-#include "mongo/util/log.h"
-#include "mongo/util/mongoutils/str.h"
-
-namespace mongo {
-
-RecordData HeapRecordStoreBtree::dataFor(OperationContext* opCtx, const RecordId& loc) const {
- Records::const_iterator it = _records.find(loc);
- invariant(it != _records.end());
- const MmapV1RecordHeader& rec = it->second;
-
- return RecordData(rec.data.get(), rec.dataSize);
-}
-
-bool HeapRecordStoreBtree::findRecord(OperationContext* opCtx,
- const RecordId& loc,
- RecordData* out) const {
- Records::const_iterator it = _records.find(loc);
- if (it == _records.end())
- return false;
- const MmapV1RecordHeader& rec = it->second;
- *out = RecordData(rec.data.get(), rec.dataSize);
- return true;
-}
-
-void HeapRecordStoreBtree::deleteRecord(OperationContext* opCtx, const RecordId& loc) {
- invariant(_records.erase(loc) == 1);
-}
-
-StatusWith<RecordId> HeapRecordStoreBtree::insertRecord(
- OperationContext* opCtx, const char* data, int len, Timestamp, bool enforceQuota) {
- MmapV1RecordHeader rec(len);
- memcpy(rec.data.get(), data, len);
-
- const RecordId loc = allocateLoc();
- _records[loc] = rec;
-
- HeapRecordStoreBtreeRecoveryUnit::notifyInsert(opCtx, this, loc);
-
- return StatusWith<RecordId>(loc);
-}
-
-Status HeapRecordStoreBtree::insertRecordsWithDocWriter(OperationContext* opCtx,
- const DocWriter* const* docs,
- const Timestamp*,
- size_t nDocs,
- RecordId* idsOut) {
- // This class is only for unit tests of the mmapv1 btree code and this is how it is called.
- // If that ever changes, this class will need to be fixed.
- invariant(nDocs == 1);
- invariant(idsOut);
-
- MmapV1RecordHeader rec(docs[0]->documentSize());
- docs[0]->writeDocument(rec.data.get());
-
- const RecordId loc = allocateLoc();
- _records[loc] = rec;
- *idsOut = loc;
-
- HeapRecordStoreBtreeRecoveryUnit::notifyInsert(opCtx, this, loc);
-
- return Status::OK();
-}
-
-RecordId HeapRecordStoreBtree::allocateLoc() {
- const int64_t id = _nextId++;
- // This is a hack, but both the high and low order bits of RecordId offset must be 0, and the
- // file must fit in 23 bits. This gives us a total of 30 + 23 == 53 bits.
- invariant(id < (1LL << 53));
- RecordId dl(int(id >> 30), int((id << 1) & ~(1 << 31)));
- invariant((dl.repr() & 0x1) == 0);
- return dl;
-}
-
-Status HeapRecordStoreBtree::touch(OperationContext* opCtx, BSONObjBuilder* output) const {
- // not currently called from the tests, but called from btree_logic.h
- return Status::OK();
-}
-
-// ---------------------------
-
-void HeapRecordStoreBtreeRecoveryUnit::commitUnitOfWork() {
- _insertions.clear();
- _mods.clear();
-}
-
-void HeapRecordStoreBtreeRecoveryUnit::abortUnitOfWork() {
- // reverse in case we write same area twice
- for (size_t i = _mods.size(); i > 0; i--) {
- ModEntry& e = _mods[i - 1];
- memcpy(e.data, e.old.get(), e.len);
- }
-
- invariant(_insertions.size() == 0); // todo
-}
-
-void* HeapRecordStoreBtreeRecoveryUnit::writingPtr(void* data, size_t len) {
- ModEntry e = {data, len, boost::shared_array<char>(new char[len])};
- memcpy(e.old.get(), data, len);
- _mods.push_back(e);
- return data;
-}
-
-void HeapRecordStoreBtreeRecoveryUnit::notifyInsert(HeapRecordStoreBtree* rs, const RecordId& loc) {
- InsertEntry e = {rs, loc};
- _insertions.push_back(e);
-}
-
-void HeapRecordStoreBtreeRecoveryUnit::notifyInsert(OperationContext* ctx,
- HeapRecordStoreBtree* rs,
- const RecordId& loc) {
- if (!ctx)
- return;
-
- // This dynamic_cast has semantics, should change ideally.
- HeapRecordStoreBtreeRecoveryUnit* ru =
- dynamic_cast<HeapRecordStoreBtreeRecoveryUnit*>(ctx->recoveryUnit());
-
- if (!ru)
- return;
-
- ru->notifyInsert(rs, loc);
-}
-
-
-} // namespace mongo
diff --git a/src/mongo/db/storage/mmap_v1/heap_record_store_btree.h b/src/mongo/db/storage/mmap_v1/heap_record_store_btree.h
deleted file mode 100644
index 4095a910115..00000000000
--- a/src/mongo/db/storage/mmap_v1/heap_record_store_btree.h
+++ /dev/null
@@ -1,237 +0,0 @@
-// heap_record_store_btree.h
-
-/**
-* Copyright (C) 2014 MongoDB Inc.
-*
-* This program is free software: you can redistribute it and/or modify
-* it under the terms of the GNU Affero General Public License, version 3,
-* as published by the Free Software Foundation.
-*
-* This program is distributed in the hope that it will be useful,
-* but WITHOUT ANY WARRANTY; without even the implied warranty of
-* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-* GNU Affero General Public License for more details.
-*
-* You should have received a copy of the GNU Affero General Public License
-* along with this program. If not, see <http://www.gnu.org/licenses/>.
-*
-* As a special exception, the copyright holders give permission to link the
-* code of portions of this program with the OpenSSL library under certain
-* conditions as described in each individual source file and distribute
-* linked combinations including the program with the OpenSSL library. You
-* must comply with the GNU Affero General Public License in all respects for
-* all of the code used other than as permitted herein. If you modify file(s)
-* with this exception, you may extend this exception to your version of the
-* file(s), but you are not obligated to do so. If you do not wish to do so,
-* delete this exception statement from your version. If you delete this
-* exception statement from all source files in the program, then also delete
-* it in the license file.
-*/
-
-#pragma once
-
-#include <boost/shared_array.hpp>
-#include <map>
-
-#include "mongo/db/storage/record_store.h"
-#include "mongo/db/storage/recovery_unit.h"
-
-namespace mongo {
-
-/**
- * A RecordStore that stores all data on the heap. This implementation contains only the
- * functionality necessary to test btree.
- */
-class HeapRecordStoreBtree : public RecordStore {
- struct MmapV1RecordHeader;
-
-public:
- const std::string& getIdent() const override {
- MONGO_UNREACHABLE;
- }
-
- // RecordId(0,0) isn't valid for records.
- explicit HeapRecordStoreBtree(StringData ns) : RecordStore(ns), _nextId(1) {}
-
- virtual RecordData dataFor(OperationContext* opCtx, const RecordId& loc) const;
-
- virtual bool findRecord(OperationContext* opCtx, const RecordId& loc, RecordData* out) const;
-
- virtual void deleteRecord(OperationContext* opCtx, const RecordId& dl);
-
- virtual StatusWith<RecordId> insertRecord(
- OperationContext* opCtx, const char* data, int len, Timestamp, bool enforceQuota);
-
- virtual Status insertRecordsWithDocWriter(OperationContext* opCtx,
- const DocWriter* const* docs,
- const Timestamp*,
- size_t nDocs,
- RecordId* idsOut);
-
- virtual long long numRecords(OperationContext* opCtx) const {
- return _records.size();
- }
-
- virtual Status touch(OperationContext* opCtx, BSONObjBuilder* output) const;
-
- // public methods below here are not necessary to test btree, and will crash when called.
-
- // ------------------------------
-
- virtual Status updateRecord(OperationContext* opCtx,
- const RecordId& oldLocation,
- const char* data,
- int len,
- bool enforceQuota,
- UpdateNotifier* notifier) {
- MONGO_UNREACHABLE;
- }
-
- virtual bool updateWithDamagesSupported() const {
- return true;
- }
-
- virtual StatusWith<RecordData> updateWithDamages(OperationContext* opCtx,
- const RecordId& loc,
- const RecordData& oldRec,
- const char* damageSource,
- const mutablebson::DamageVector& damages) {
- MONGO_UNREACHABLE;
- }
-
- std::unique_ptr<SeekableRecordCursor> getCursor(OperationContext* opCtx,
- bool forward) const final {
- MONGO_UNREACHABLE;
- }
-
-
- virtual Status truncate(OperationContext* opCtx) {
- MONGO_UNREACHABLE;
- }
-
- virtual void cappedTruncateAfter(OperationContext* opCtx, RecordId end, bool inclusive) {
- MONGO_UNREACHABLE;
- }
-
- virtual bool compactSupported() const {
- MONGO_UNREACHABLE;
- }
-
- virtual Status validate(OperationContext* opCtx,
- ValidateCmdLevel level,
- ValidateAdaptor* adaptor,
- ValidateResults* results,
- BSONObjBuilder* output) {
- MONGO_UNREACHABLE;
- }
-
- virtual void appendCustomStats(OperationContext* opCtx,
- BSONObjBuilder* result,
- double scale) const {
- MONGO_UNREACHABLE;
- }
-
- virtual void increaseStorageSize(OperationContext* opCtx, int size, bool enforceQuota) {
- MONGO_UNREACHABLE;
- }
-
- virtual int64_t storageSize(OperationContext* opCtx,
- BSONObjBuilder* extraInfo = NULL,
- int infoLevel = 0) const {
- MONGO_UNREACHABLE;
- }
-
- virtual long long dataSize(OperationContext* opCtx) const {
- MONGO_UNREACHABLE;
- }
-
- virtual MmapV1RecordHeader* recordFor(const RecordId& loc) const {
- MONGO_UNREACHABLE;
- }
-
- virtual bool isCapped() const {
- MONGO_UNREACHABLE;
- }
-
- virtual const char* name() const {
- MONGO_UNREACHABLE;
- }
-
- void waitForAllEarlierOplogWritesToBeVisible(OperationContext* opCtx) const override {
- MONGO_UNREACHABLE;
- }
-
- virtual void updateStatsAfterRepair(OperationContext* opCtx,
- long long numRecords,
- long long dataSize) {
- MONGO_UNREACHABLE;
- }
- // more things that we actually care about below
-
-private:
- struct MmapV1RecordHeader {
- MmapV1RecordHeader() : dataSize(-1), data() {}
- explicit MmapV1RecordHeader(int size) : dataSize(size), data(new char[size]) {}
-
- int dataSize;
- boost::shared_array<char> data;
- };
-
- RecordId allocateLoc();
-
- typedef std::map<RecordId, HeapRecordStoreBtree::MmapV1RecordHeader> Records;
- Records _records;
- int64_t _nextId;
-};
-
-/**
- * A RecoveryUnit for HeapRecordStoreBtree, this is for testing btree only.
- */
-class HeapRecordStoreBtreeRecoveryUnit : public RecoveryUnit {
-public:
- void beginUnitOfWork(OperationContext* opCtx) final{};
- void commitUnitOfWork() final;
- void abortUnitOfWork() final;
-
- virtual bool waitUntilDurable() {
- return true;
- }
-
- virtual void abandonSnapshot() {}
-
- virtual void registerChange(Change* change) {
- change->commit(boost::none);
- delete change;
- }
-
- virtual void* writingPtr(void* data, size_t len);
-
- virtual void setRollbackWritesDisabled() {}
-
- virtual SnapshotId getSnapshotId() const {
- return SnapshotId();
- }
-
- virtual void setOrderedCommit(bool orderedCommit) {}
-
- // -----------------------
-
- void notifyInsert(HeapRecordStoreBtree* rs, const RecordId& loc);
- static void notifyInsert(OperationContext* ctx, HeapRecordStoreBtree* rs, const RecordId& loc);
-
-private:
- struct InsertEntry {
- HeapRecordStoreBtree* rs;
- RecordId loc;
- };
- std::vector<InsertEntry> _insertions;
-
- struct ModEntry {
- void* data;
- size_t len;
- boost::shared_array<char> old;
- };
- std::vector<ModEntry> _mods;
-};
-
-} // namespace mongo
diff --git a/src/mongo/db/storage/mmap_v1/journal_latency_test_cmd.cpp b/src/mongo/db/storage/mmap_v1/journal_latency_test_cmd.cpp
deleted file mode 100644
index 8807dfbb064..00000000000
--- a/src/mongo/db/storage/mmap_v1/journal_latency_test_cmd.cpp
+++ /dev/null
@@ -1,155 +0,0 @@
-/**
- * Copyright (C) 2012 10gen Inc.
- *
- * This program is free software: you can redistribute it and/or modify
- * it under the terms of the GNU Affero General Public License, version 3,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU Affero General Public License for more details.
- *
- * You should have received a copy of the GNU Affero General Public License
- * along with this program. If not, see <http://www.gnu.org/licenses/>.
- *
- * As a special exception, the copyright holders give permission to link the
- * code of portions of this program with the OpenSSL library under certain
- * conditions as described in each individual source file and distribute
- * linked combinations including the program with the OpenSSL library. You
- * must comply with the GNU Affero General Public License in all respects for
- * all of the code used other than as permitted herein. If you modify file(s)
- * with this exception, you may extend this exception to your version of the
- * file(s), but you are not obligated to do so. If you do not wish to do so,
- * delete this exception statement from your version. If you delete this
- * exception statement from all source files in the program, then also delete
- * it in the license file.
- */
-
-#include "mongo/platform/basic.h"
-
-#include <boost/filesystem/convenience.hpp>
-#include <boost/filesystem/operations.hpp>
-#include <string>
-#include <vector>
-
-#include "mongo/base/init.h"
-#include "mongo/base/status.h"
-#include "mongo/db/auth/action_set.h"
-#include "mongo/db/auth/action_type.h"
-#include "mongo/db/auth/privilege.h"
-#include "mongo/db/commands.h"
-#include "mongo/db/commands/test_commands_enabled.h"
-#include "mongo/db/curop.h"
-#include "mongo/db/index/index_access_method.h"
-#include "mongo/db/index/index_descriptor.h"
-#include "mongo/db/jsobj.h"
-#include "mongo/db/query/internal_plans.h"
-#include "mongo/db/storage/mmap_v1/aligned_builder.h"
-#include "mongo/db/storage/mmap_v1/logfile.h"
-#include "mongo/db/storage/mmap_v1/paths.h"
-#include "mongo/db/storage/storage_options.h"
-#include "mongo/scripting/engine.h"
-#include "mongo/util/background.h"
-#include "mongo/util/timer.h"
-
-namespace mongo {
-
-using std::max;
-using std::min;
-using std::string;
-using std::stringstream;
-
-namespace dur {
-boost::filesystem::path getJournalDir();
-}
-
-// Testing-only, enabled via command line
-class JournalLatencyTestCmd : public BasicCommand {
-public:
- JournalLatencyTestCmd() : BasicCommand("journalLatencyTest") {}
-
- AllowedOnSecondary secondaryAllowed(ServiceContext*) const override {
- return AllowedOnSecondary::kAlways;
- }
- virtual bool supportsWriteConcern(const BSONObj& cmd) const override {
- return false;
- }
- virtual bool adminOnly() const {
- return true;
- }
- std::string help() const override {
- return "test how long to write and fsync to a test file in the journal/ directory";
- }
- // No auth needed because it only works when enabled via command line.
- virtual void addRequiredPrivileges(const std::string& dbname,
- const BSONObj& cmdObj,
- std::vector<Privilege>* out) const {}
- bool run(OperationContext* opCtx,
- const string& dbname,
- const BSONObj& cmdObj,
- BSONObjBuilder& result) {
- boost::filesystem::path p = dur::getJournalDir();
- p /= "journalLatencyTest";
-
- // remove file if already present
- try {
- boost::filesystem::remove(p);
- } catch (...) {
- }
-
- BSONObjBuilder bb[2];
- for (int pass = 0; pass < 2; pass++) {
- LogFile f(p.string());
- AlignedBuilder b(1024 * 1024);
- {
- Timer t;
- for (int i = 0; i < 100; i++) {
- f.synchronousAppend(b.buf(), 8192);
- }
- bb[pass].append("8KB", t.millis() / 100.0);
- }
- {
- const int N = 50;
- Timer t2;
- long long x = 0;
- for (int i = 0; i < N; i++) {
- Timer t;
- f.synchronousAppend(b.buf(), 8192);
- x += t.micros();
- sleepmillis(4);
- }
- long long y = t2.micros() - 4 * N * 1000;
- // not really trusting the timer granularity on all platforms so whichever is higher
- // of x and y
- bb[pass].append("8KBWithPauses", max(x, y) / (N * 1000.0));
- }
- {
- Timer t;
- for (int i = 0; i < 20; i++) {
- f.synchronousAppend(b.buf(), 1024 * 1024);
- }
- bb[pass].append("1MB", t.millis() / 20.0);
- }
- // second time around, we are prealloced.
- }
- result.append("timeMillis", bb[0].obj());
- result.append("timeMillisWithPrealloc", bb[1].obj());
-
- try {
- remove(p);
- } catch (...) {
- }
-
- try {
- result.append(
- "onSamePartition",
- onSamePartition(dur::getJournalDir().string(), storageGlobalParams.dbpath));
- } catch (...) {
- }
-
- return 1;
- }
-};
-MONGO_REGISTER_TEST_COMMAND(JournalLatencyTestCmd);
-}
diff --git a/src/mongo/db/storage/mmap_v1/logfile.cpp b/src/mongo/db/storage/mmap_v1/logfile.cpp
deleted file mode 100644
index 98cfabc1f75..00000000000
--- a/src/mongo/db/storage/mmap_v1/logfile.cpp
+++ /dev/null
@@ -1,272 +0,0 @@
-// @file logfile.cpp simple file log writing / journaling
-
-/**
-* Copyright (C) 2008 10gen Inc.
-*
-* This program is free software: you can redistribute it and/or modify
-* it under the terms of the GNU Affero General Public License, version 3,
-* as published by the Free Software Foundation.
-*
-* This program is distributed in the hope that it will be useful,
-* but WITHOUT ANY WARRANTY; without even the implied warranty of
-* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-* GNU Affero General Public License for more details.
-*
-* You should have received a copy of the GNU Affero General Public License
-* along with this program. If not, see <http://www.gnu.org/licenses/>.
-*
-* As a special exception, the copyright holders give permission to link the
-* code of portions of this program with the OpenSSL library under certain
-* conditions as described in each individual source file and distribute
-* linked combinations including the program with the OpenSSL library. You
-* must comply with the GNU Affero General Public License in all respects
-* for all of the code used other than as permitted herein. If you modify
-* file(s) with this exception, you may extend this exception to your
-* version of the file(s), but you are not obligated to do so. If you do not
-* wish to do so, delete this exception statement from your version. If you
-* delete this exception statement from all source files in the program,
-* then also delete it in the license file.
-*/
-
-#define MONGO_LOG_DEFAULT_COMPONENT ::mongo::logger::LogComponent::kControl
-
-#include "mongo/platform/basic.h"
-
-#include "mongo/db/storage/mmap_v1/logfile.h"
-
-#include "mongo/db/storage/mmap_v1/mmap.h"
-#include "mongo/db/storage/mmap_v1/paths.h"
-#include "mongo/platform/posix_fadvise.h"
-#include "mongo/util/allocator.h"
-#include "mongo/util/log.h"
-#include "mongo/util/mongoutils/str.h"
-#include "mongo/util/startup_test.h"
-#include "mongo/util/text.h"
-
-
-using namespace mongoutils;
-
-using std::endl;
-using std::string;
-
-#if defined(_WIN32)
-
-namespace mongo {
-
-LogFile::LogFile(const std::string& name, bool readwrite) : _name(name) {
- _fd = CreateFile(toNativeString(name.c_str()).c_str(),
- (readwrite ? GENERIC_READ : 0) | GENERIC_WRITE,
- FILE_SHARE_READ,
- NULL,
- OPEN_ALWAYS,
- FILE_FLAG_NO_BUFFERING,
- NULL);
- if (_fd == INVALID_HANDLE_VALUE) {
- DWORD e = GetLastError();
- uasserted(13518,
- str::stream() << "couldn't open file " << name << " for writing "
- << errnoWithDescription(e));
- }
- SetFilePointer(_fd, 0, 0, FILE_BEGIN);
-}
-
-LogFile::~LogFile() {
- if (_fd != INVALID_HANDLE_VALUE)
- CloseHandle(_fd);
-}
-
-void LogFile::truncate() {
- verify(_fd != INVALID_HANDLE_VALUE);
-
- if (!SetEndOfFile(_fd)) {
- msgasserted(15871, "Couldn't truncate file: " + errnoWithDescription());
- }
-}
-
-void LogFile::writeAt(unsigned long long offset, const void* _buf, size_t _len) {
- // TODO 64 bit offsets
- OVERLAPPED o;
- memset(&o, 0, sizeof(o));
- (unsigned long long&)o.Offset = offset;
- BOOL ok = WriteFile(_fd, _buf, _len, 0, &o);
- verify(ok);
-}
-
-void LogFile::readAt(unsigned long long offset, void* _buf, size_t _len) {
- // TODO 64 bit offsets
- OVERLAPPED o;
- memset(&o, 0, sizeof(o));
- (unsigned long long&)o.Offset = offset;
- DWORD nr;
- BOOL ok = ReadFile(_fd, _buf, _len, &nr, &o);
- if (!ok) {
- string e = errnoWithDescription();
- // DWORD e = GetLastError();
- log() << "LogFile readAt(" << offset << ") len:" << _len << "errno:" << e << endl;
- verify(false);
- }
-}
-
-void LogFile::synchronousAppend(const void* _buf, size_t _len) {
- const size_t BlockSize = 8 * 1024 * 1024;
- verify(_fd);
- verify(_len % minDirectIOSizeBytes == 0);
- const char* buf = (const char*)_buf;
- size_t left = _len;
- while (left) {
- size_t toWrite = std::min(left, BlockSize);
- DWORD written;
- if (!WriteFile(_fd, buf, toWrite, &written, NULL)) {
- DWORD e = GetLastError();
- if (e == 87)
- msgasserted(13519, "error 87 appending to file - invalid parameter");
- else
- uasserted(13517,
- str::stream() << "error appending to file " << _name << ' ' << _len << ' '
- << toWrite
- << ' '
- << errnoWithDescription(e));
- } else {
- dassert(written == toWrite);
- }
- left -= written;
- buf += written;
- }
-}
-}
-
-#else
-
-/// posix
-
-#include <fcntl.h>
-#include <sys/ioctl.h>
-#include <sys/stat.h>
-#include <sys/types.h>
-
-#ifdef __linux__
-#include <linux/fs.h>
-#endif
-
-namespace mongo {
-
-LogFile::LogFile(const std::string& name, bool readwrite) : _name(name) {
- int options = O_CREAT | (readwrite ? O_RDWR : O_WRONLY)
-#if defined(O_DIRECT)
- | O_DIRECT
-#endif
-#if defined(O_NOATIME)
- | O_NOATIME
-#endif
- ;
-
- _fd = open(name.c_str(), options, S_IRUSR | S_IWUSR);
- _blkSize = minDirectIOSizeBytes;
-
-#if defined(O_DIRECT)
- _direct = true;
- if (_fd < 0) {
- _direct = false;
- options &= ~O_DIRECT;
- _fd = open(name.c_str(), options, S_IRUSR | S_IWUSR);
- }
-#ifdef __linux__
- ssize_t tmpBlkSize = ioctl(_fd, BLKBSZGET);
- // TODO: We need some sanity checking on tmpBlkSize even if ioctl() did not fail.
- if (tmpBlkSize > 0) {
- _blkSize = (size_t)tmpBlkSize;
- }
-#endif
-#else
- _direct = false;
-#endif
-
- if (_fd < 0) {
- uasserted(13516,
- str::stream() << "couldn't open file " << name << " for writing "
- << errnoWithDescription());
- }
-
- flushMyDirectory(name);
-}
-
-LogFile::~LogFile() {
- if (_fd >= 0)
- close(_fd);
- _fd = -1;
-}
-
-void LogFile::truncate() {
- verify(_fd >= 0);
-
- MONGO_STATIC_ASSERT(sizeof(off_t) == 8); // we don't want overflow here
- const off_t pos = lseek(_fd, 0, SEEK_CUR); // doesn't actually seek
- if (ftruncate(_fd, pos) != 0) {
- msgasserted(15873, "Couldn't truncate file: " + errnoWithDescription());
- }
-
- fsync(_fd);
-}
-
-void LogFile::writeAt(unsigned long long offset, const void* buf, size_t len) {
- verify(((size_t)buf) % minDirectIOSizeBytes == 0); // aligned
- ssize_t written = pwrite(_fd, buf, len, offset);
- if (written != (ssize_t)len) {
- log() << "writeAt fails " << errnoWithDescription() << endl;
- }
-#if defined(__linux__)
- fdatasync(_fd);
-#else
- fsync(_fd);
-#endif
-}
-
-void LogFile::readAt(unsigned long long offset, void* _buf, size_t _len) {
- verify(((size_t)_buf) % minDirectIOSizeBytes == 0); // aligned
- ssize_t rd = pread(_fd, _buf, _len, offset);
- verify(rd != -1);
-}
-
-void LogFile::synchronousAppend(const void* b, size_t len) {
- const char* buf = static_cast<const char*>(b);
- ssize_t charsToWrite = static_cast<ssize_t>(len);
-
- fassert(16144, charsToWrite >= 0);
- fassert(16142, _fd >= 0);
- fassert(16143, reinterpret_cast<size_t>(buf) % _blkSize == 0); // aligned
-
-#ifdef POSIX_FADV_DONTNEED
- const off_t pos = lseek(_fd, 0, SEEK_CUR); // doesn't actually seek, just get current position
-#endif
-
- while (charsToWrite > 0) {
- const ssize_t written = write(_fd, buf, static_cast<size_t>(charsToWrite));
- if (-1 == written) {
- log() << "LogFile::synchronousAppend failed with " << charsToWrite
- << " bytes unwritten out of " << len << " bytes; b=" << b << ' '
- << errnoWithDescription() << std::endl;
- fassertFailed(13515);
- }
- buf += written;
- charsToWrite -= written;
- }
-
- if (
-#if defined(__linux__)
- fdatasync(_fd) < 0
-#else
- fsync(_fd)
-#endif
- ) {
- log() << "error appending to file on fsync " << ' ' << errnoWithDescription();
- fassertFailed(13514);
- }
-
-#ifdef POSIX_FADV_DONTNEED
- if (!_direct && pos >= 0) // current position cannot be negative
- posix_fadvise(_fd, pos, len, POSIX_FADV_DONTNEED);
-#endif
-}
-}
-
-#endif
diff --git a/src/mongo/db/storage/mmap_v1/logfile.h b/src/mongo/db/storage/mmap_v1/logfile.h
deleted file mode 100644
index dbb83cf2a2e..00000000000
--- a/src/mongo/db/storage/mmap_v1/logfile.h
+++ /dev/null
@@ -1,83 +0,0 @@
-// @file logfile.h simple file log writing / journaling
-
-/**
-* Copyright (C) 2010 10gen Inc.
-*
-* This program is free software: you can redistribute it and/or modify
-* it under the terms of the GNU Affero General Public License, version 3,
-* as published by the Free Software Foundation.
-*
-* This program is distributed in the hope that it will be useful,
-* but WITHOUT ANY WARRANTY; without even the implied warranty of
-* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-* GNU Affero General Public License for more details.
-*
-* You should have received a copy of the GNU Affero General Public License
-* along with this program. If not, see <http://www.gnu.org/licenses/>.
-*
-* As a special exception, the copyright holders give permission to link the
-* code of portions of this program with the OpenSSL library under certain
-* conditions as described in each individual source file and distribute
-* linked combinations including the program with the OpenSSL library. You
-* must comply with the GNU Affero General Public License in all respects
-* for all of the code used other than as permitted herein. If you modify
-* file(s) with this exception, you may extend this exception to your
-* version of the file(s), but you are not obligated to do so. If you do not
-* wish to do so, delete this exception statement from your version. If you
-* delete this exception statement from all source files in the program,
-* then also delete it in the license file.
-*/
-
-#pragma once
-
-#include <string>
-
-
-namespace mongo {
-
-class LogFile {
-public:
- /** create the file and open. must not already exist.
- throws UserAssertion on i/o error
- */
- LogFile(const std::string& name, bool readwrite = false);
-
- /** closes */
- ~LogFile();
-
- /** append to file. does not return until sync'd. uses direct i/o when possible.
- throws UserAssertion on an i/o error
- note direct i/o may have alignment requirements
- */
- void synchronousAppend(const void* buf, size_t len);
-
- /** write at specified offset. must be aligned. noreturn until physically written. thread safe
- * */
- void writeAt(unsigned long long offset, const void* _bug, size_t _len);
-
- void readAt(unsigned long long offset, void* _buf, size_t _len);
-
- const std::string _name;
-
- void truncate(); // Removes extra data after current position
-
-private:
- // Originally disks had a sector size of 512 bytes, after Advanced Format disks were deployed in
- // 2011, the default minimium size became 4096.
- // The direct io size is based on the physical disk sector, not the VM page size.
- const size_t minDirectIOSizeBytes = 4096;
-
-private:
-#if defined(_WIN32)
- typedef HANDLE fd_type;
-#else
- typedef int fd_type;
-#endif
- fd_type _fd;
- bool _direct; // are we using direct I/O
-
- // Block size, in case of direct I/O we need to test alignment against the page size,
- // which can be different than 4kB.
- size_t _blkSize;
-};
-}
diff --git a/src/mongo/db/storage/mmap_v1/mmap.cpp b/src/mongo/db/storage/mmap_v1/mmap.cpp
deleted file mode 100644
index f8d12295ce3..00000000000
--- a/src/mongo/db/storage/mmap_v1/mmap.cpp
+++ /dev/null
@@ -1,252 +0,0 @@
-// mmap.cpp
-
-/* Copyright 2009 10gen Inc.
- *
- * This program is free software: you can redistribute it and/or modify
- * it under the terms of the GNU Affero General Public License, version 3,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU Affero General Public License for more details.
- *
- * You should have received a copy of the GNU Affero General Public License
- * along with this program. If not, see <http://www.gnu.org/licenses/>.
- *
- * As a special exception, the copyright holders give permission to link the
- * code of portions of this program with the OpenSSL library under certain
- * conditions as described in each individual source file and distribute
- * linked combinations including the program with the OpenSSL library. You
- * must comply with the GNU Affero General Public License in all respects
- * for all of the code used other than as permitted herein. If you modify
- * file(s) with this exception, you may extend this exception to your
- * version of the file(s), but you are not obligated to do so. If you do not
- * wish to do so, delete this exception statement from your version. If you
- * delete this exception statement from all source files in the program,
- * then also delete it in the license file.
- */
-
-#define MONGO_LOG_DEFAULT_COMPONENT ::mongo::logger::LogComponent::kControl
-
-#include "mongo/platform/basic.h"
-
-#include "mongo/db/storage/mmap_v1/mmap.h"
-
-#include <boost/filesystem/operations.hpp>
-
-#include "mongo/base/owned_pointer_vector.h"
-#include "mongo/db/client.h"
-#include "mongo/db/concurrency/locker.h"
-#include "mongo/db/operation_context.h"
-#include "mongo/db/storage/storage_options.h"
-#include "mongo/util/log.h"
-#include "mongo/util/map_util.h"
-#include "mongo/util/mongoutils/str.h"
-#include "mongo/util/processinfo.h"
-#include "mongo/util/progress_meter.h"
-#include "mongo/util/startup_test.h"
-
-namespace mongo {
-
-using std::endl;
-using std::map;
-using std::set;
-using std::string;
-using std::stringstream;
-using std::vector;
-
-void minOSPageSizeBytesTest(size_t minOSPageSizeBytes) {
- fassert(16325, minOSPageSizeBytes > 0);
- fassert(16326, minOSPageSizeBytes < 1000000);
- // check to see if the page size is a power of 2
- fassert(16327, (minOSPageSizeBytes & (minOSPageSizeBytes - 1)) == 0);
-}
-
-namespace {
-set<MongoFile*> mmfiles;
-map<string, MongoFile*> pathToFile;
-mongo::AtomicUInt64 mmfNextId(0);
-} // namespace
-
-MemoryMappedFile::MemoryMappedFile(OperationContext* opCtx, OptionSet options)
- : MongoFile(options), _uniqueId(mmfNextId.fetchAndAdd(1)) {
- created(opCtx);
-}
-
-MemoryMappedFile::~MemoryMappedFile() {
- invariant(isClosed());
-
- auto opCtx = cc().getOperationContext();
- invariant(opCtx);
-
- LockMongoFilesShared lock(opCtx);
- for (std::set<MongoFile*>::const_iterator it = mmfiles.begin(); it != mmfiles.end(); it++) {
- invariant(*it != this);
- }
-}
-
-/*static*/ AtomicUInt64 MemoryMappedFile::totalMappedLength;
-
-void* MemoryMappedFile::create(OperationContext* opCtx,
- const std::string& filename,
- unsigned long long len,
- bool zero) {
- uassert(13468,
- string("can't create file already exists ") + filename,
- !boost::filesystem::exists(filename));
- void* p = map(opCtx, filename.c_str(), len);
- fassert(16331, p);
- if (zero) {
- size_t sz = (size_t)len;
- verify(len == sz);
- memset(p, 0, sz);
- }
- return p;
-}
-
-/*static*/ void MemoryMappedFile::updateLength(const char* filename, unsigned long long& length) {
- if (!boost::filesystem::exists(filename))
- return;
- // make sure we map full length if preexisting file.
- boost::uintmax_t l = boost::filesystem::file_size(filename);
- length = l;
-}
-
-void* MemoryMappedFile::map(OperationContext* opCtx, const char* filename) {
- unsigned long long l;
- try {
- l = boost::filesystem::file_size(filename);
- } catch (boost::filesystem::filesystem_error& e) {
- uasserted(15922,
- mongoutils::str::stream() << "couldn't get file length when opening mapping "
- << filename
- << ' '
- << e.what());
- }
-
- void* ret = map(opCtx, filename, l);
- fassert(16334, ret);
- return ret;
-}
-
-/* --- MongoFile -------------------------------------------------
- this is the administrative stuff
-*/
-
-MongoFile::MongoFile(OptionSet options)
- : _options(storageGlobalParams.readOnly ? (options | READONLY) : options) {}
-
-
-Lock::ResourceMutex LockMongoFilesShared::mmmutex("MMapMutex");
-unsigned LockMongoFilesShared::era = 99; // note this rolls over
-
-set<MongoFile*>& MongoFile::getAllFiles() {
- return mmfiles;
-}
-
-/* subclass must call in destructor (or at close).
- removes this from pathToFile and other maps
- safe to call more than once, albeit might be wasted work
- ideal to call close to the close, if the close is well before object destruction
-*/
-void MongoFile::destroyed(OperationContext* opCtx) {
- LockMongoFilesShared::assertExclusivelyLocked(opCtx);
- mmfiles.erase(this);
- pathToFile.erase(filename());
-}
-
-/*static*/
-void MongoFile::closeAllFiles(OperationContext* opCtx, stringstream& message) {
- static int closingAllFiles = 0;
- if (closingAllFiles) {
- message << "warning closingAllFiles=" << closingAllFiles << endl;
- return;
- }
- ++closingAllFiles;
-
- LockMongoFilesExclusive lk(opCtx);
-
- ProgressMeter pm(mmfiles.size(), 2, 1, "files", "File Closing Progress");
- set<MongoFile*> temp = mmfiles;
- for (set<MongoFile*>::iterator i = temp.begin(); i != temp.end(); i++) {
- (*i)->close(opCtx); // close() now removes from mmfiles
- pm.hit();
- }
- message << "closeAllFiles() finished";
- --closingAllFiles;
-}
-
-/*static*/ int MongoFile::flushAll(OperationContext* opCtx, bool sync) {
- return _flushAll(opCtx, sync);
-}
-
-/*static*/ int MongoFile::_flushAll(OperationContext* opCtx, bool sync) {
- if (!sync) {
- int num = 0;
- LockMongoFilesShared lk(opCtx);
- for (set<MongoFile*>::iterator i = mmfiles.begin(); i != mmfiles.end(); i++) {
- num++;
- MongoFile* mmf = *i;
- if (!mmf)
- continue;
-
- invariant(!mmf->isOptionSet(READONLY));
- mmf->flush(sync);
- }
- return num;
- }
-
- // want to do it sync
-
- // get a thread-safe Flushable object for each file first in a single lock
- // so that we can iterate and flush without doing any locking here
- OwnedPointerVector<Flushable> thingsToFlushWrapper;
- vector<Flushable*>& thingsToFlush = thingsToFlushWrapper.mutableVector();
- {
- LockMongoFilesShared lk(opCtx);
- for (set<MongoFile*>::iterator i = mmfiles.begin(); i != mmfiles.end(); i++) {
- MongoFile* mmf = *i;
- if (!mmf)
- continue;
- thingsToFlush.push_back(mmf->prepareFlush());
- }
- }
-
- for (size_t i = 0; i < thingsToFlush.size(); i++) {
- thingsToFlush[i]->flush(opCtx);
- }
-
- return thingsToFlush.size();
-}
-
-void MongoFile::created(OperationContext* opCtx) {
- // If we're a READONLY mapping, we don't want to ever flush.
- if (!isOptionSet(READONLY)) {
- LockMongoFilesExclusive lk(opCtx);
- mmfiles.insert(this);
- }
-}
-
-void MongoFile::setFilename(OperationContext* opCtx, const std::string& fn) {
- LockMongoFilesExclusive lk(opCtx);
- verify(_filename.empty());
- _filename = boost::filesystem::absolute(fn).generic_string();
- MongoFile*& ptf = pathToFile[_filename];
- massert(13617, "MongoFile : multiple opens of same filename", ptf == 0);
- ptf = this;
-}
-
-MongoFile* MongoFileFinder::findByPath(const std::string& path) const {
- return mapFindWithDefault(pathToFile,
- boost::filesystem::absolute(path).generic_string(),
- static_cast<MongoFile*>(NULL));
-}
-
-void dataSyncFailedHandler() {
- log() << "error syncing data to disk, probably a disk error";
- log() << " shutting down immediately to avoid corruption";
- fassertFailed(17346);
-}
-
-} // namespace mongo
diff --git a/src/mongo/db/storage/mmap_v1/mmap.h b/src/mongo/db/storage/mmap_v1/mmap.h
deleted file mode 100644
index 37a2e9e6fcd..00000000000
--- a/src/mongo/db/storage/mmap_v1/mmap.h
+++ /dev/null
@@ -1,325 +0,0 @@
-/* Copyright 2009 10gen Inc.
- *
- * This program is free software: you can redistribute it and/or modify
- * it under the terms of the GNU Affero General Public License, version 3,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU Affero General Public License for more details.
- *
- * You should have received a copy of the GNU Affero General Public License
- * along with this program. If not, see <http://www.gnu.org/licenses/>.
- *
- * As a special exception, the copyright holders give permission to link the
- * code of portions of this program with the OpenSSL library under certain
- * conditions as described in each individual source file and distribute
- * linked combinations including the program with the OpenSSL library. You
- * must comply with the GNU Affero General Public License in all respects
- * for all of the code used other than as permitted herein. If you modify
- * file(s) with this exception, you may extend this exception to your
- * version of the file(s), but you are not obligated to do so. If you do not
- * wish to do so, delete this exception statement from your version. If you
- * delete this exception statement from all source files in the program,
- * then also delete it in the license file.
- */
-
-#pragma once
-
-#include <set>
-#include <sstream>
-#include <vector>
-
-#include "mongo/base/disallow_copying.h"
-#include "mongo/db/client.h"
-#include "mongo/db/concurrency/d_concurrency.h"
-#include "mongo/db/operation_context.h"
-
-namespace mongo {
-
-#if !defined(_WIN32)
-typedef int HANDLE;
-#endif
-
-extern std::size_t getMinOSPageSizeBytes();
-void minOSPageSizeBytesTest(size_t minOSPageSizeBytes); // lame-o
-
-// call this if syncing data fails
-void dataSyncFailedHandler();
-
-class MAdvise {
- MONGO_DISALLOW_COPYING(MAdvise);
-
-public:
- enum Advice { Sequential = 1, Random = 2 };
- MAdvise(void* p, unsigned len, Advice a);
- ~MAdvise(); // destructor resets the range to MADV_NORMAL
-private:
- void* _p;
- unsigned _len;
-};
-
-// lock order: lock dbMutex before this if you lock both
-class LockMongoFilesShared {
- friend class LockMongoFilesExclusive;
- static Lock::ResourceMutex mmmutex;
- static unsigned era;
-
- Lock::SharedLock lk;
-
-public:
- explicit LockMongoFilesShared(OperationContext* opCtx) : lk(opCtx->lockState(), mmmutex) {
- // JS worker threads may not have cc() setup, as they work on behalf of other clients
- dassert(opCtx == cc().getOperationContext() || !cc().getOperationContext());
- }
-
- static void assertExclusivelyLocked(OperationContext* opCtx) {
- invariant(mmmutex.isExclusivelyLocked(opCtx->lockState()));
- }
-
- static void assertAtLeastReadLocked(OperationContext* opCtx) {
- invariant(mmmutex.isAtLeastReadLocked(opCtx->lockState()));
- }
-
- /** era changes anytime memory maps come and go. thus you can use this as a cheap way to check
- if nothing has changed since the last time you locked. Of course you must be shared locked
- at the time of this call, otherwise someone could be in progress.
-
- This is used for yielding; see PageFaultException::touch().
- */
- static unsigned getEra() {
- return era;
- }
-};
-
-class LockMongoFilesExclusive {
- Lock::ExclusiveLock lk;
-
-public:
- explicit LockMongoFilesExclusive(OperationContext* opCtx)
- : lk(opCtx->lockState(), LockMongoFilesShared::mmmutex) {
- // JS worker threads may not have cc() setup, as they work on behalf of other clients
- dassert(opCtx == cc().getOperationContext() || !cc().getOperationContext());
- LockMongoFilesShared::era++;
- }
-};
-
-/* the administrative-ish stuff here */
-class MongoFile {
- MONGO_DISALLOW_COPYING(MongoFile);
-
-public:
- /** Flushable has to fail nicely if the underlying object gets killed */
- class Flushable {
- public:
- virtual ~Flushable() {}
- virtual void flush(OperationContext* opCtx) = 0;
- };
-
- enum Options {
- NONE = 0,
- SEQUENTIAL = 1 << 0, // hint - e.g. FILE_FLAG_SEQUENTIAL_SCAN on windows.
- READONLY = 1 << 1 // if true, writing to the mapped file will crash the process.
- };
-
- // Integral type used as a BitSet of Options.
- using OptionSet = std::underlying_type<Options>::type;
-
- MongoFile(OptionSet options);
- virtual ~MongoFile() = default;
-
- /** @param fun is called for each MongoFile.
- called from within a mutex that MongoFile uses. so be careful not to deadlock.
- */
- template <class F>
- static void forEach(OperationContext* opCtx, F fun);
-
- /**
- * note: you need to be in mmmutex when using this. forEach (above) handles that for you
- * automatically.
- */
- static std::set<MongoFile*>& getAllFiles();
-
- static int flushAll(OperationContext* opCtx, bool sync); // returns n flushed
- static void closeAllFiles(OperationContext* opCtx, std::stringstream& message);
-
- virtual bool isDurableMappedFile() {
- return false;
- }
-
- std::string filename() const {
- return _filename;
- }
- void setFilename(OperationContext* opCtx, const std::string& fn);
-
- virtual uint64_t getUniqueId() const = 0;
-
-private:
- std::string _filename;
- static int _flushAll(OperationContext* opCtx, bool sync); // returns n flushed
- const OptionSet _options;
-
-protected:
- /**
- * Implementations may assume this is called from within `LockMongoFilesExclusive`.
- */
- virtual void close(OperationContext* opCtx) = 0;
- virtual void flush(bool sync) = 0;
- /**
- * returns a thread safe object that you can call flush on
- * Flushable has to fail nicely if the underlying object gets killed
- */
- virtual Flushable* prepareFlush() = 0;
-
- /**
- * Returns true iff the file is closed.
- */
- virtual bool isClosed() = 0;
-
- void created(OperationContext* opCtx); /* subclass must call after create */
-
- /**
- * Implementations may assume this is called from within `LockMongoFilesExclusive`.
- *
- * subclass must call in destructor (or at close).
- * removes this from pathToFile and other maps
- * safe to call more than once, albeit might be wasted work
- * ideal to call close to the close, if the close is well before object destruction
- */
- void destroyed(OperationContext* opCtx);
-
- virtual unsigned long long length() const = 0;
-
- bool isOptionSet(Options option) const {
- return _options & option;
- }
-};
-
-/** look up a MMF by filename. scoped mutex locking convention.
- example:
- MMFFinderByName finder;
- DurableMappedFile *a = finder.find("file_name_a");
- DurableMappedFile *b = finder.find("file_name_b");
-*/
-class MongoFileFinder {
- MONGO_DISALLOW_COPYING(MongoFileFinder);
-
-public:
- MongoFileFinder(OperationContext* opCtx) : _lk(opCtx) {}
-
- /** @return The MongoFile object associated with the specified file name. If no file is open
- with the specified name, returns null.
- */
- MongoFile* findByPath(const std::string& path) const;
-
-private:
- LockMongoFilesShared _lk;
-};
-
-class MemoryMappedFile : public MongoFile {
-protected:
- virtual void* viewForFlushing() {
- if (views.size() == 0)
- return 0;
- verify(views.size() == 1);
- return views[0];
- }
-
-public:
- MemoryMappedFile(OperationContext* opCtx, OptionSet options = NONE);
-
- virtual ~MemoryMappedFile();
-
- /**
- * Callers must be holding a `LockMongoFilesExclusive`.
- */
- virtual void close(OperationContext* opCtx);
-
- /**
- * uasserts if file doesn't exist. fasserts on mmap error.
- */
- void* map(OperationContext* opCtx, const char* filename);
-
- /**
- * uasserts if file exists. fasserts on mmap error.
- * @param zero fill file with zeros when true
- */
- void* create(OperationContext* opCtx,
- const std::string& filename,
- unsigned long long len,
- bool zero);
-
- void flush(bool sync);
-
- virtual bool isClosed();
-
- virtual Flushable* prepareFlush();
-
- long shortLength() const {
- return (long)len;
- }
- unsigned long long length() const {
- return len;
- }
- HANDLE getFd() const {
- return fd;
- }
-
- /**
- * Creates a new view with the specified properties. Automatically cleaned up upon
- * close/destruction of the MemoryMappedFile object. Returns nullptr on mmap error.
- */
- void* createPrivateMap();
-
- virtual uint64_t getUniqueId() const {
- return _uniqueId;
- }
-
- static int totalMappedLengthInMB() {
- return static_cast<int>(totalMappedLength.load() / 1024 / 1024);
- }
-
-private:
- static void updateLength(const char* filename, unsigned long long& length);
-
- HANDLE fd = 0;
- HANDLE maphandle = 0;
- std::vector<void*> views;
- unsigned long long len = 0u;
- static AtomicUInt64 totalMappedLength;
- const uint64_t _uniqueId;
-#ifdef _WIN32
- // flush Mutex
- //
- // Protects:
- // Prevent flush() and close() from concurrently running.
- // It ensures close() cannot complete while flush() is running
- // Lock Ordering:
- // LockMongoFilesShared must be taken before _flushMutex if both are taken
- stdx::mutex _flushMutex;
-#endif
-
-protected:
- /**
- * Creates with length if DNE, otherwise validates input length. Returns nullptr on mmap
- * error.
- */
- void* map(OperationContext* opCtx, const char* filename, unsigned long long& length);
-
- /**
- * Close the current private view and open a new replacement. Returns nullptr on mmap error.
- */
- void* remapPrivateView(OperationContext* opCtx, void* oldPrivateAddr);
-};
-
-/** p is called from within a mutex that MongoFile uses. so be careful not to deadlock. */
-template <class F>
-inline void MongoFile::forEach(OperationContext* opCtx, F p) {
- LockMongoFilesShared lklk(opCtx);
- const std::set<MongoFile*>& mmfiles = MongoFile::getAllFiles();
- for (std::set<MongoFile*>::const_iterator i = mmfiles.begin(); i != mmfiles.end(); i++)
- p(*i);
-}
-
-} // namespace mongo
diff --git a/src/mongo/db/storage/mmap_v1/mmap_posix.cpp b/src/mongo/db/storage/mmap_v1/mmap_posix.cpp
deleted file mode 100644
index b4f96412d9a..00000000000
--- a/src/mongo/db/storage/mmap_v1/mmap_posix.cpp
+++ /dev/null
@@ -1,333 +0,0 @@
-/* Copyright 2009 10gen Inc.
- *
- * This program is free software: you can redistribute it and/or modify
- * it under the terms of the GNU Affero General Public License, version 3,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU Affero General Public License for more details.
- *
- * You should have received a copy of the GNU Affero General Public License
- * along with this program. If not, see <http://www.gnu.org/licenses/>.
- *
- * As a special exception, the copyright holders give permission to link the
- * code of portions of this program with the OpenSSL library under certain
- * conditions as described in each individual source file and distribute
- * linked combinations including the program with the OpenSSL library. You
- * must comply with the GNU Affero General Public License in all respects
- * for all of the code used other than as permitted herein. If you modify
- * file(s) with this exception, you may extend this exception to your
- * version of the file(s), but you are not obligated to do so. If you do not
- * wish to do so, delete this exception statement from your version. If you
- * delete this exception statement from all source files in the program,
- * then also delete it in the license file.
- */
-
-#define MONGO_LOG_DEFAULT_COMPONENT ::mongo::logger::LogComponent::kControl
-
-#include "mongo/platform/basic.h"
-
-#include <errno.h>
-#include <fcntl.h>
-#include <sys/mman.h>
-#include <sys/stat.h>
-#include <sys/types.h>
-
-#include "mongo/db/concurrency/d_concurrency.h"
-#include "mongo/db/storage/mmap_v1/file_allocator.h"
-#include "mongo/db/storage/mmap_v1/mmap.h"
-#include "mongo/platform/atomic_word.h"
-#include "mongo/util/log.h"
-#include "mongo/util/mongoutils/str.h"
-#include "mongo/util/processinfo.h"
-#include "mongo/util/startup_test.h"
-
-using std::endl;
-using std::numeric_limits;
-using std::vector;
-
-using namespace mongoutils;
-
-namespace mongo {
-
-namespace {
-void printMemInfo() {
- LogstreamBuilder out = log();
- out << "mem info: ";
-
- ProcessInfo pi;
- if (!pi.supported()) {
- out << " not supported";
- return;
- }
-
- out << "vsize: " << pi.getVirtualMemorySize() << " resident: " << pi.getResidentSize()
- << " mapped: " << MemoryMappedFile::totalMappedLengthInMB();
-}
-} // namespace
-} // namespace mongo
-
-std::size_t mongo::getMinOSPageSizeBytes() {
- static const std::size_t cachedSize = [] {
- std::size_t minOSPageSizeBytes = sysconf(_SC_PAGESIZE);
- minOSPageSizeBytesTest(minOSPageSizeBytes);
- return minOSPageSizeBytes;
- }();
- return cachedSize;
-}
-
-namespace mongo {
-
-void MemoryMappedFile::close(OperationContext* opCtx) {
- for (vector<void*>::iterator i = views.begin(); i != views.end(); i++) {
- munmap(*i, len);
- }
- views.clear();
- totalMappedLength.fetchAndSubtract(len);
- len = 0;
-
- if (fd) {
- ::close(fd);
- fd = 0;
- }
- destroyed(opCtx); // cleans up from the master list of mmaps
-}
-
-#ifndef O_NOATIME
-#define O_NOATIME (0)
-#endif
-
-#ifndef MAP_NORESERVE
-#define MAP_NORESERVE (0)
-#endif
-
-namespace {
-void* _pageAlign(void* p) {
- return (void*)((int64_t)p & ~(getMinOSPageSizeBytes() - 1));
-}
-
-class PageAlignTest : public StartupTest {
-public:
- void run() {
- {
- int64_t x = getMinOSPageSizeBytes() + 123;
- void* y = _pageAlign(reinterpret_cast<void*>(x));
- invariant(getMinOSPageSizeBytes() == reinterpret_cast<size_t>(y));
- }
- {
- int64_t a = static_cast<uint64_t>(numeric_limits<int>::max());
- a = a / getMinOSPageSizeBytes();
- a = a * getMinOSPageSizeBytes();
- // a should now be page aligned
-
- // b is not page aligned
- int64_t b = a + 123;
-
- void* y = _pageAlign(reinterpret_cast<void*>(b));
- invariant(a == reinterpret_cast<int64_t>(y));
- }
- }
-} pageAlignTest;
-}
-
-#if defined(__sun)
-MAdvise::MAdvise(void*, unsigned, Advice) {}
-MAdvise::~MAdvise() {}
-#else
-MAdvise::MAdvise(void* p, unsigned len, Advice a) {
- _p = _pageAlign(p);
-
- _len = len + static_cast<unsigned>(reinterpret_cast<size_t>(p) - reinterpret_cast<size_t>(_p));
-
- int advice = 0;
- switch (a) {
- case Sequential:
- advice = MADV_SEQUENTIAL;
- break;
- case Random:
- advice = MADV_RANDOM;
- break;
- }
-
- if (madvise(_p, _len, advice)) {
- error() << "madvise failed: " << errnoWithDescription();
- }
-}
-MAdvise::~MAdvise() {
- madvise(_p, _len, MADV_NORMAL);
-}
-#endif
-
-void* MemoryMappedFile::map(OperationContext* opCtx,
- const char* filename,
- unsigned long long& length) {
- // length may be updated by callee.
- setFilename(opCtx, filename);
- FileAllocator::get()->allocateAsap(filename, length);
-
- const bool readOnly = isOptionSet(READONLY);
-
- massert(
- 10446, str::stream() << "mmap: can't map area of size 0 file: " << filename, length > 0);
-
- const int posixOpenOpts = O_NOATIME | (readOnly ? O_RDONLY : O_RDWR);
- fd = ::open(filename, posixOpenOpts);
- if (fd <= 0) {
- severe() << "couldn't open " << filename << ' ' << errnoWithDescription() << endl;
- fd = 0; // our sentinel for not opened
- return 0;
- }
-
- unsigned long long filelen = lseek(fd, 0, SEEK_END);
- if (filelen != length) {
- severe() << "map file alloc failed, wanted: " << length << " filelen: " << filelen << ' '
- << sizeof(size_t);
- fassertFailed(16330);
- }
- lseek(fd, 0, SEEK_SET);
-
- const int mmapProtectionOpts = readOnly ? PROT_READ : (PROT_READ | PROT_WRITE);
- void* view = mmap(NULL, length, mmapProtectionOpts, MAP_SHARED, fd, 0);
- if (view == MAP_FAILED) {
- severe() << " mmap() failed for " << filename << " len:" << length << " "
- << errnoWithDescription() << endl;
- if (errno == ENOMEM) {
- if (sizeof(void*) == 4)
- severe() << "mmap failed with out of memory. You are using a 32-bit build and "
- "probably need to upgrade to 64"
- << endl;
- else
- severe() << "mmap failed with out of memory. (64 bit build)" << endl;
- }
- return 0;
- }
-
-
-#if !defined(__sun)
- if (isOptionSet(SEQUENTIAL)) {
- if (madvise(view, length, MADV_SEQUENTIAL)) {
- warning() << "map: madvise failed for " << filename << ' ' << errnoWithDescription()
- << endl;
- }
- }
-#endif
-
- // MemoryMappedFile successfully created, now update state.
- len = length;
- MemoryMappedFile::totalMappedLength.fetchAndAdd(len);
-
- views.push_back(view);
-
- return view;
-}
-
-void* MemoryMappedFile::createPrivateMap() {
- void* x = mmap(/*start*/ 0, len, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_NORESERVE, fd, 0);
- if (x == MAP_FAILED) {
- if (errno == ENOMEM) {
- if (sizeof(void*) == 4) {
- severe() << "mmap private failed with out of memory. You are using a 32-bit build "
- "and probably need to upgrade to 64"
- << endl;
- } else {
- severe() << "mmap private failed with out of memory. (64 bit build)" << endl;
- }
- } else {
- severe() << "mmap private failed " << errnoWithDescription() << endl;
- }
- return 0;
- }
-
- views.push_back(x);
- return x;
-}
-
-void* MemoryMappedFile::remapPrivateView(OperationContext* opCtx, void* oldPrivateAddr) {
-#if defined(__sun) // SERVER-8795
- LockMongoFilesExclusive lockMongoFiles(opCtx);
-#endif
-
- // don't unmap, just mmap over the old region
- void* x = mmap(oldPrivateAddr,
- len,
- PROT_READ | PROT_WRITE,
- MAP_PRIVATE | MAP_NORESERVE | MAP_FIXED,
- fd,
- 0);
- if (x == MAP_FAILED) {
- int err = errno;
- severe() << "13601 Couldn't remap private view: " << errnoWithDescription(err) << endl;
- printMemInfo();
- abort();
- }
- verify(x == oldPrivateAddr);
- return x;
-}
-
-void MemoryMappedFile::flush(bool sync) {
- if (views.empty() || fd == 0 || !sync)
- return;
-
- bool useFsync = !ProcessInfo::preferMsyncOverFSync();
-
- if (useFsync ? fsync(fd) != 0 : msync(viewForFlushing(), len, MS_SYNC) != 0) {
- // msync failed, this is very bad
- log() << (useFsync ? "fsync failed: " : "msync failed: ") << errnoWithDescription()
- << " file: " << filename() << endl;
- dataSyncFailedHandler();
- }
-}
-
-bool MemoryMappedFile::isClosed() {
- return !len && !fd && !views.size();
-}
-
-class PosixFlushable : public MemoryMappedFile::Flushable {
-public:
- PosixFlushable(MemoryMappedFile* theFile, void* view, HANDLE fd, long len)
- : _theFile(theFile), _view(view), _fd(fd), _len(len), _id(_theFile->getUniqueId()) {}
-
- void flush(OperationContext* opCtx) {
- if (_view == NULL || _fd == 0)
- return;
-
- if (ProcessInfo::preferMsyncOverFSync() ? msync(_view, _len, MS_SYNC) == 0
- : fsync(_fd) == 0) {
- return;
- }
-
- if (errno == EBADF) {
- // ok, we were unlocked, so this file was closed
- return;
- }
-
- // some error, lets see if we're supposed to exist
- LockMongoFilesShared mmfilesLock(opCtx);
- std::set<MongoFile*> mmfs = MongoFile::getAllFiles();
- std::set<MongoFile*>::const_iterator it = mmfs.find(_theFile);
- if ((it == mmfs.end()) || ((*it)->getUniqueId() != _id)) {
- log() << "msync failed with: " << errnoWithDescription()
- << " but file doesn't exist anymore, so ignoring";
- // this was deleted while we were unlocked
- return;
- }
-
- // we got an error, and we still exist, so this is bad, we fail
- log() << "msync " << errnoWithDescription() << endl;
- dataSyncFailedHandler();
- }
-
- MemoryMappedFile* _theFile;
- void* _view;
- HANDLE _fd;
- long _len;
- const uint64_t _id;
-};
-
-MemoryMappedFile::Flushable* MemoryMappedFile::prepareFlush() {
- return new PosixFlushable(this, viewForFlushing(), fd, len);
-}
-
-} // namespace mongo
diff --git a/src/mongo/db/storage/mmap_v1/mmap_v1_database_catalog_entry.cpp b/src/mongo/db/storage/mmap_v1/mmap_v1_database_catalog_entry.cpp
deleted file mode 100644
index 369681a8298..00000000000
--- a/src/mongo/db/storage/mmap_v1/mmap_v1_database_catalog_entry.cpp
+++ /dev/null
@@ -1,915 +0,0 @@
-/**
- * Copyright (C) 2014 MongoDB Inc.
- *
- * This program is free software: you can redistribute it and/or modify
- * it under the terms of the GNU Affero General Public License, version 3,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU Affero General Public License for more details.
- *
- * You should have received a copy of the GNU Affero General Public License
- * along with this program. If not, see <http://www.gnu.org/licenses/>.
- *
- * As a special exception, the copyright holders give permission to link the
- * code of portions of this program with the OpenSSL library under certain
- * conditions as described in each individual source file and distribute
- * linked combinations including the program with the OpenSSL library. You
- * must comply with the GNU Affero General Public License in all respects for
- * all of the code used other than as permitted herein. If you modify file(s)
- * with this exception, you may extend this exception to your version of the
- * file(s), but you are not obligated to do so. If you do not wish to do so,
- * delete this exception statement from your version. If you delete this
- * exception statement from all source files in the program, then also delete
- * it in the license file.
- */
-
-#define MONGO_LOG_DEFAULT_COMPONENT ::mongo::logger::LogComponent::kStorage
-
-#include "mongo/platform/basic.h"
-
-#include "mongo/db/storage/mmap_v1/mmap_v1_database_catalog_entry.h"
-
-#include <utility>
-
-#include "mongo/db/catalog/database.h"
-#include "mongo/db/catalog/database_holder.h"
-#include "mongo/db/catalog/index_catalog_entry.h"
-#include "mongo/db/index/2d_access_method.h"
-#include "mongo/db/index/btree_access_method.h"
-#include "mongo/db/index/fts_access_method.h"
-#include "mongo/db/index/hash_access_method.h"
-#include "mongo/db/index/haystack_access_method.h"
-#include "mongo/db/index/index_access_method.h"
-#include "mongo/db/index/s2_access_method.h"
-#include "mongo/db/operation_context.h"
-#include "mongo/db/record_id.h"
-#include "mongo/db/server_parameters.h"
-#include "mongo/db/storage/mmap_v1/btree/btree_interface.h"
-#include "mongo/db/storage/mmap_v1/catalog/namespace_details.h"
-#include "mongo/db/storage/mmap_v1/catalog/namespace_details_collection_entry.h"
-#include "mongo/db/storage/mmap_v1/catalog/namespace_details_rsv1_metadata.h"
-#include "mongo/db/storage/mmap_v1/data_file.h"
-#include "mongo/db/storage/mmap_v1/record_store_v1_capped.h"
-#include "mongo/db/storage/mmap_v1/record_store_v1_simple.h"
-#include "mongo/db/storage/record_data.h"
-#include "mongo/util/log.h"
-#include "mongo/util/scopeguard.h"
-
-namespace mongo {
-
-using std::unique_ptr;
-
-namespace {
-
-/**
- * Declaration for the "newCollectionsUsePowerOf2Sizes" server parameter, which is now
- * deprecated in 3.0.
- * Note that:
- * - setting to true performs a no-op.
- * - setting to false will fail.
- */
-// Unused, needed for server parameter.
-AtomicBool newCollectionsUsePowerOf2SizesFlag(true);
-
-class NewCollectionsUsePowerOf2SizesParameter
- : public ExportedServerParameter<bool, ServerParameterType::kStartupAndRuntime> {
-public:
- NewCollectionsUsePowerOf2SizesParameter()
- : ExportedServerParameter<bool, ServerParameterType::kStartupAndRuntime>(
- ServerParameterSet::getGlobal(),
- "newCollectionsUsePowerOf2Sizes",
- &newCollectionsUsePowerOf2SizesFlag) {}
-
- virtual Status validate(const bool& potentialNewValue) {
- if (!potentialNewValue) {
- return Status(ErrorCodes::BadValue,
- "newCollectionsUsePowerOf2Sizes cannot be set to false. "
- "Use noPadding instead during createCollection.");
- }
-
- return Status::OK();
- }
-
-private:
-} exportedNewCollectionsUsePowerOf2SizesParameter;
-
-
-int _massageExtentSize(const ExtentManager* em, long long size) {
- if (size < em->minSize())
- return em->minSize();
- if (size > em->maxSize())
- return em->maxSize();
-
- return static_cast<int>(size);
-}
-
-} // namespace
-
-
-/**
- * Registers the insertion of a new entry in the _collections cache with the RecoveryUnit,
- * allowing for rollback.
- */
-class MMAPV1DatabaseCatalogEntry::EntryInsertion : public RecoveryUnit::Change {
-public:
- EntryInsertion(StringData ns, MMAPV1DatabaseCatalogEntry* entry)
- : _ns(ns.toString()), _entry(entry) {}
-
- void rollback() {
- _entry->_removeFromCache(NULL, _ns);
- }
-
- void commit(boost::optional<Timestamp>) {}
-
-private:
- const std::string _ns;
- MMAPV1DatabaseCatalogEntry* const _entry;
-};
-
-/**
- * Registers the removal of an entry from the _collections cache with the RecoveryUnit,
- * delaying actual deletion of the information until the change is commited. This allows
- * for easy rollback.
- */
-class MMAPV1DatabaseCatalogEntry::EntryRemoval : public RecoveryUnit::Change {
-public:
- // Rollback removing the collection from the cache. Takes ownership of the cachedEntry,
- // and will delete it if removal is final.
- EntryRemoval(StringData ns, MMAPV1DatabaseCatalogEntry* catalogEntry, Entry* cachedEntry)
- : _ns(ns.toString()), _catalogEntry(catalogEntry), _cachedEntry(cachedEntry) {}
-
- void rollback() {
- _catalogEntry->_collections[_ns] = _cachedEntry;
- }
-
- void commit(boost::optional<Timestamp>) {
- delete _cachedEntry;
- }
-
-private:
- const std::string _ns;
- MMAPV1DatabaseCatalogEntry* const _catalogEntry;
- Entry* const _cachedEntry;
-};
-
-MMAPV1DatabaseCatalogEntry::MMAPV1DatabaseCatalogEntry(OperationContext* opCtx,
- StringData name,
- StringData path,
- bool directoryPerDB,
- bool transient,
- std::unique_ptr<ExtentManager> extentManager)
- : DatabaseCatalogEntry(name),
- _path(path.toString()),
- _namespaceIndex(opCtx, _path, name.toString()),
- _extentManager(std::move(extentManager)) {
- ScopeGuard onErrorClose = MakeGuard([&] {
- _namespaceIndex.close(opCtx);
- _extentManager->close(opCtx);
- });
- massert(34469,
- str::stream() << name << " is not a valid database name",
- NamespaceString::validDBName(name));
- invariant(opCtx->lockState()->isDbLockedForMode(name, MODE_X));
-
- try {
- // First init the .ns file. If this fails, we may leak the .ns file, but this is OK
- // because subsequent openDB will go through this code path again.
- _namespaceIndex.init(opCtx);
-
- // Initialize the extent manager. This will create the first data file (.0) if needed
- // and if this fails we would leak the .ns file above. Leaking the .ns or .0 file is
- // acceptable, because subsequent openDB calls will exercise the code path again.
- Status s = _extentManager->init(opCtx);
- if (!s.isOK()) {
- msgasserted(16966, str::stream() << "_extentManager->init failed: " << s.toString());
- }
-
- // This is the actual loading of the on-disk structures into cache.
- _init(opCtx);
- } catch (const DBException& dbe) {
- warning() << "database " << path << " " << name
- << " could not be opened due to DBException " << dbe.code() << ": " << dbe.what();
- throw;
- } catch (const std::exception& e) {
- warning() << "database " << path << " " << name << " could not be opened " << e.what();
- throw;
- }
-
- onErrorClose.Dismiss();
-}
-
-MMAPV1DatabaseCatalogEntry::~MMAPV1DatabaseCatalogEntry() {
- for (CollectionMap::const_iterator i = _collections.begin(); i != _collections.end(); ++i) {
- delete i->second;
- }
- _collections.clear();
-}
-
-intmax_t dbSize(const std::string& database); // from repair_database.cpp
-
-int64_t MMAPV1DatabaseCatalogEntry::sizeOnDisk(OperationContext* opCtx) const {
- return static_cast<int64_t>(dbSize(name()));
-}
-
-void MMAPV1DatabaseCatalogEntry::_removeFromCache(RecoveryUnit* ru, StringData ns) {
- CollectionMap::iterator i = _collections.find(ns.toString());
- if (i == _collections.end()) {
- return;
- }
-
- // If there is an operation context, register a rollback to restore the cache entry
- if (ru) {
- ru->registerChange(new EntryRemoval(ns, this, i->second));
- } else {
- delete i->second;
- }
- _collections.erase(i);
-}
-
-Status MMAPV1DatabaseCatalogEntry::dropCollection(OperationContext* opCtx, StringData ns) {
- invariant(opCtx->lockState()->isCollectionLockedForMode(ns, MODE_X));
-
- NamespaceDetails* details = _namespaceIndex.details(ns);
-
- if (!details) {
- return Status(ErrorCodes::NamespaceNotFound, str::stream() << "ns not found: " << ns);
- }
-
- invariant(details->nIndexes == 0); // TODO: delete instead?
- invariant(details->indexBuildsInProgress == 0); // TODO: delete instead?
-
- _removeNamespaceFromNamespaceCollection(opCtx, ns);
- _removeFromCache(opCtx->recoveryUnit(), ns);
-
- // free extents
- if (!details->firstExtent.isNull()) {
- _extentManager->freeExtents(opCtx, details->firstExtent, details->lastExtent);
- *opCtx->recoveryUnit()->writing(&details->firstExtent) = DiskLoc().setInvalid();
- *opCtx->recoveryUnit()->writing(&details->lastExtent) = DiskLoc().setInvalid();
- }
-
- // remove from the catalog hashtable
- _namespaceIndex.kill_ns(opCtx, ns);
-
- return Status::OK();
-}
-
-
-Status MMAPV1DatabaseCatalogEntry::renameCollection(OperationContext* opCtx,
- StringData fromNS,
- StringData toNS,
- bool stayTemp) {
- Status s = _renameSingleNamespace(opCtx, fromNS, toNS, stayTemp);
- if (!s.isOK())
- return s;
-
- NamespaceDetails* details = _namespaceIndex.details(toNS);
- invariant(details);
-
- RecordStoreV1Base* systemIndexRecordStore = _getIndexRecordStore();
- auto cursor = systemIndexRecordStore->getCursor(opCtx);
- while (auto record = cursor->next()) {
- BSONObj oldIndexSpec = record->data.releaseToBson();
- if (fromNS != oldIndexSpec["ns"].valuestrsafe())
- continue;
-
- BSONObj newIndexSpec;
- {
- BSONObjBuilder b;
- BSONObjIterator i(oldIndexSpec);
- while (i.more()) {
- BSONElement e = i.next();
- if (strcmp(e.fieldName(), "ns") != 0)
- b.append(e);
- else
- b << "ns" << toNS;
- }
- newIndexSpec = b.obj();
- }
- // TODO SERVER-30638: using timestamp 0 for these inserts.
- StatusWith<RecordId> newIndexSpecLoc = systemIndexRecordStore->insertRecord(
- opCtx, newIndexSpec.objdata(), newIndexSpec.objsize(), Timestamp(), false);
- if (!newIndexSpecLoc.isOK())
- return newIndexSpecLoc.getStatus();
-
- const std::string& indexName = oldIndexSpec.getStringField("name");
-
- {
- // Fix the IndexDetails pointer.
- int indexI = getCollectionCatalogEntry(toNS)->_findIndexNumber(opCtx, indexName);
-
- IndexDetails& indexDetails = details->idx(indexI);
- *opCtx->recoveryUnit()->writing(&indexDetails.info) =
- DiskLoc::fromRecordId(newIndexSpecLoc.getValue());
- }
-
- {
- // Move the underlying namespace.
- std::string oldIndexNs = IndexDescriptor::makeIndexNamespace(fromNS, indexName);
- std::string newIndexNs = IndexDescriptor::makeIndexNamespace(toNS, indexName);
-
- Status s = _renameSingleNamespace(opCtx, oldIndexNs, newIndexNs, false);
- if (!s.isOK())
- return s;
- }
- // Invalidate index record for the old collection.
- invalidateSystemCollectionRecord(
- opCtx, NamespaceString(name(), "system.indexes"), record->id);
-
- systemIndexRecordStore->deleteRecord(opCtx, record->id);
- }
-
- return Status::OK();
-}
-
-Status MMAPV1DatabaseCatalogEntry::_renameSingleNamespace(OperationContext* opCtx,
- StringData fromNS,
- StringData toNS,
- bool stayTemp) {
- // some sanity checking
- NamespaceDetails* fromDetails = _namespaceIndex.details(fromNS);
- if (!fromDetails)
- return Status(ErrorCodes::NamespaceNotFound, "from namespace doesn't exist");
-
- if (_namespaceIndex.details(toNS))
- return Status(ErrorCodes::NamespaceExists, "to namespace already exists");
-
- // at this point, we haven't done anything destructive yet
-
- // ----
- // actually start moving
- // ----
-
- // this could throw, but if it does we're ok
- _namespaceIndex.add_ns(opCtx, toNS, fromDetails);
- NamespaceDetails* toDetails = _namespaceIndex.details(toNS);
-
- try {
- toDetails->copyingFrom(opCtx, toNS, _namespaceIndex, fromDetails); // fixes extraOffset
- } catch (DBException&) {
- // could end up here if .ns is full - if so try to clean up / roll back a little
- _namespaceIndex.kill_ns(opCtx, toNS);
- throw;
- }
-
- // at this point, code .ns stuff moved
-
- _namespaceIndex.kill_ns(opCtx, fromNS);
- fromDetails = NULL;
-
- // fix system.namespaces
- BSONObj newSpec;
- RecordId oldSpecLocation = getCollectionCatalogEntry(fromNS)->getNamespacesRecordId();
- invariant(!oldSpecLocation.isNull());
- {
- BSONObj oldSpec =
- _getNamespaceRecordStore()->dataFor(opCtx, oldSpecLocation).releaseToBson();
- invariant(!oldSpec.isEmpty());
-
- BSONObjBuilder b;
- BSONObjIterator i(oldSpec.getObjectField("options"));
- while (i.more()) {
- BSONElement e = i.next();
- if (strcmp(e.fieldName(), "create") != 0) {
- if (stayTemp || (strcmp(e.fieldName(), "temp") != 0))
- b.append(e);
- } else {
- b << "create" << toNS;
- }
- }
- newSpec = b.obj();
- }
-
- RecordId rid =
- _addNamespaceToNamespaceCollection(opCtx, toNS, newSpec.isEmpty() ? 0 : &newSpec);
-
- // Invalidate old namespace record
- invalidateSystemCollectionRecord(
- opCtx, NamespaceString(name(), "system.namespaces"), oldSpecLocation);
-
- _getNamespaceRecordStore()->deleteRecord(opCtx, oldSpecLocation);
-
- Entry*& entry = _collections[toNS.toString()];
- invariant(entry == NULL);
- opCtx->recoveryUnit()->registerChange(new EntryInsertion(toNS, this));
- entry = new Entry();
- _removeFromCache(opCtx->recoveryUnit(), fromNS);
- _insertInCache(opCtx, toNS, rid, entry);
-
- return Status::OK();
-}
-
-void MMAPV1DatabaseCatalogEntry::invalidateSystemCollectionRecord(
- OperationContext* opCtx, NamespaceString systemCollectionNamespace, RecordId record) {
- // Having to go back up through the DatabaseHolder is a bit of a layering
- // violation, but at this point we're not going to add more MMAPv1 specific interfaces.
- StringData dbName = systemCollectionNamespace.db();
- invariant(opCtx->lockState()->isDbLockedForMode(dbName, MODE_X));
- Database* db = DatabaseHolder::getDatabaseHolder().get(opCtx, dbName);
- Collection* systemCollection = db->getCollection(opCtx, systemCollectionNamespace);
- systemCollection->getCursorManager()->invalidateDocument(opCtx, record, INVALIDATION_DELETION);
-}
-
-void MMAPV1DatabaseCatalogEntry::appendExtraStats(OperationContext* opCtx,
- BSONObjBuilder* output,
- double scale) const {
- if (isEmpty()) {
- output->appendNumber("fileSize", 0);
- } else {
- output->appendNumber("fileSize", _extentManager->fileSize() / scale);
- output->appendNumber("nsSizeMB",
- static_cast<int>(_namespaceIndex.fileLength() / (1024 * 1024)));
-
- int freeListSize = 0;
- int64_t freeListSpace = 0;
- _extentManager->freeListStats(opCtx, &freeListSize, &freeListSpace);
-
- BSONObjBuilder extentFreeList(output->subobjStart("extentFreeList"));
- extentFreeList.append("num", freeListSize);
- extentFreeList.appendNumber("totalSize", static_cast<long long>(freeListSpace / scale));
- extentFreeList.done();
-
- {
- const DataFileVersion version = _extentManager->getFileFormat(opCtx);
-
- BSONObjBuilder dataFileVersion(output->subobjStart("dataFileVersion"));
- dataFileVersion.append("major", version.majorRaw());
- dataFileVersion.append("minor", version.minorRaw());
- dataFileVersion.done();
- }
- }
-}
-
-bool MMAPV1DatabaseCatalogEntry::isOlderThan24(OperationContext* opCtx) const {
- if (_extentManager->numFiles() == 0)
- return false;
-
- const DataFileVersion version = _extentManager->getFileFormat(opCtx);
- fassert(40109, version.isCompatibleWithCurrentCode());
-
- return !version.is24IndexClean();
-}
-
-void MMAPV1DatabaseCatalogEntry::markIndexSafe24AndUp(OperationContext* opCtx) {
- if (_extentManager->numFiles() == 0)
- return;
-
- DataFileVersion version = _extentManager->getFileFormat(opCtx);
- fassert(40110, version.isCompatibleWithCurrentCode());
-
- if (version.is24IndexClean())
- return; // nothing to do
-
- version.setIs24IndexClean();
- _extentManager->setFileFormat(opCtx, version);
-}
-
-void MMAPV1DatabaseCatalogEntry::markCollationFeatureAsInUse(OperationContext* opCtx) {
- if (_extentManager->numFiles() == 0)
- return;
-
- DataFileVersion version = _extentManager->getFileFormat(opCtx);
- fassert(40150, version.isCompatibleWithCurrentCode());
-
- if (version.getMayHaveCollationMetadata())
- return;
-
- version.setMayHaveCollationMetadata();
- _extentManager->setFileFormat(opCtx, version);
-}
-
-Status MMAPV1DatabaseCatalogEntry::currentFilesCompatible(OperationContext* opCtx) const {
- if (_extentManager->numFiles() == 0)
- return Status::OK();
-
- return _extentManager->getOpenFile(0)->getHeader()->version.isCompatibleWithCurrentCode();
-}
-
-void MMAPV1DatabaseCatalogEntry::getCollectionNamespaces(std::list<std::string>* tofill) const {
- _namespaceIndex.getCollectionNamespaces(tofill);
-}
-
-void MMAPV1DatabaseCatalogEntry::_ensureSystemCollection(OperationContext* opCtx, StringData ns) {
- NamespaceDetails* details = _namespaceIndex.details(ns);
- if (details) {
- return;
- }
-
- if (storageGlobalParams.readOnly) {
- severe() << "Missing system collection '" << ns << "' for database '" << name() << "'";
- fassertFailed(34372);
- }
-
- _namespaceIndex.add_ns(opCtx, ns, DiskLoc(), false);
-}
-
-void MMAPV1DatabaseCatalogEntry::_init(OperationContext* opCtx) {
- // We wrap the WUOW in an optional as we can't create it if we are in RO mode.
- boost::optional<WriteUnitOfWork> wunit;
- if (!storageGlobalParams.readOnly) {
- wunit.emplace(opCtx);
- }
-
- // Upgrade freelist
- const NamespaceString oldFreeList(name(), "$freelist");
- NamespaceDetails* freeListDetails = _namespaceIndex.details(oldFreeList.ns());
- if (freeListDetails) {
- if (storageGlobalParams.readOnly) {
- severe() << "Legacy storage format detected, but server was started with the "
- "--queryableBackupMode command line parameter.";
- fassertFailedNoTrace(34373);
- }
-
- if (!freeListDetails->firstExtent.isNull()) {
- _extentManager->freeExtents(
- opCtx, freeListDetails->firstExtent, freeListDetails->lastExtent);
- }
-
- _namespaceIndex.kill_ns(opCtx, oldFreeList.ns());
- }
-
- DataFileVersion version = _extentManager->getFileFormat(opCtx);
- if (version.isCompatibleWithCurrentCode().isOK() && !version.mayHave30Freelist()) {
- if (storageGlobalParams.readOnly) {
- severe() << "Legacy storage format detected, but server was started with the "
- "--queryableBackupMode command line parameter.";
- fassertFailedNoTrace(34374);
- }
-
- // Any DB that can be opened and written to gets this flag set.
- version.setMayHave30Freelist();
- _extentManager->setFileFormat(opCtx, version);
- }
-
- const NamespaceString nsi(name(), "system.indexes");
- const NamespaceString nsn(name(), "system.namespaces");
-
- bool isSystemNamespacesGoingToBeNew = _namespaceIndex.details(nsn.toString()) == NULL;
- bool isSystemIndexesGoingToBeNew = _namespaceIndex.details(nsi.toString()) == NULL;
-
- _ensureSystemCollection(opCtx, nsn.toString());
- _ensureSystemCollection(opCtx, nsi.toString());
-
- if (isSystemNamespacesGoingToBeNew) {
- invariant(!storageGlobalParams.readOnly);
- opCtx->recoveryUnit()->registerChange(new EntryInsertion(nsn.toString(), this));
- }
- if (isSystemIndexesGoingToBeNew) {
- invariant(!storageGlobalParams.readOnly);
- opCtx->recoveryUnit()->registerChange(new EntryInsertion(nsi.toString(), this));
- }
-
- Entry*& indexEntry = _collections[nsi.toString()];
- Entry*& nsEntry = _collections[nsn.toString()];
-
- NamespaceDetails* const indexDetails = _namespaceIndex.details(nsi.toString());
- NamespaceDetails* const nsDetails = _namespaceIndex.details(nsn.toString());
-
- // order has to be:
- // 1) ns rs
- // 2) i rs
- // 3) catalog entries
-
- if (!nsEntry) {
- nsEntry = new Entry();
-
- NamespaceDetailsRSV1MetaData* md =
- new NamespaceDetailsRSV1MetaData(nsn.toString(), nsDetails);
- nsEntry->recordStore.reset(
- new SimpleRecordStoreV1(opCtx, nsn.toString(), md, _extentManager.get(), false));
- }
-
- if (!indexEntry) {
- indexEntry = new Entry();
-
- NamespaceDetailsRSV1MetaData* md =
- new NamespaceDetailsRSV1MetaData(nsi.toString(), indexDetails);
-
- indexEntry->recordStore.reset(
- new SimpleRecordStoreV1(opCtx, nsi.toString(), md, _extentManager.get(), true));
- }
-
- RecordId indexNamespaceId;
- if (isSystemIndexesGoingToBeNew) {
- indexNamespaceId = _addNamespaceToNamespaceCollection(opCtx, nsi.toString(), NULL);
- }
-
- if (!nsEntry->catalogEntry) {
- nsEntry->catalogEntry.reset(
- new NamespaceDetailsCollectionCatalogEntry(nsn.toString(),
- nsDetails,
- nsEntry->recordStore.get(),
- RecordId(),
- indexEntry->recordStore.get(),
- this));
- }
-
- if (!indexEntry->catalogEntry) {
- indexEntry->catalogEntry.reset(
- new NamespaceDetailsCollectionCatalogEntry(nsi.toString(),
- indexDetails,
- nsEntry->recordStore.get(),
- indexNamespaceId,
- indexEntry->recordStore.get(),
- this));
- }
-
- if (!storageGlobalParams.readOnly) {
- wunit->commit();
- }
-
- // Now put everything in the cache of namespaces. None of the operations below do any
- // transactional operations.
- RecordStoreV1Base* rs = _getNamespaceRecordStore();
- invariant(rs);
-
- auto cursor = rs->getCursor(opCtx);
- while (auto record = cursor->next()) {
- auto ns = record->data.releaseToBson()["name"].String();
- Entry*& entry = _collections[ns];
-
- // The two cases where entry is not null is for system.indexes and system.namespaces,
- // which we manually instantiated above. It is OK to skip these two collections,
- // because they don't have indexes on them anyway.
- if (entry) {
- if (entry->catalogEntry->getNamespacesRecordId().isNull()) {
- entry->catalogEntry->setNamespacesRecordId(opCtx, record->id);
- } else {
- invariant(entry->catalogEntry->getNamespacesRecordId() == record->id);
- }
- continue;
- }
-
- entry = new Entry();
- _insertInCache(opCtx, ns, record->id, entry);
- }
-}
-
-Status MMAPV1DatabaseCatalogEntry::createCollection(OperationContext* opCtx,
- StringData ns,
- const CollectionOptions& options,
- bool allocateDefaultSpace) {
- if (_namespaceIndex.details(ns)) {
- return Status(ErrorCodes::NamespaceExists,
- str::stream() << "namespace already exists: " << ns);
- }
-
- BSONObj optionsAsBSON = options.toBSON();
- RecordId rid = _addNamespaceToNamespaceCollection(opCtx, ns, &optionsAsBSON);
-
- _namespaceIndex.add_ns(opCtx, ns, DiskLoc(), options.capped);
- NamespaceDetails* details = _namespaceIndex.details(ns);
-
- // Set the flags.
- NamespaceDetailsRSV1MetaData(ns, details).replaceUserFlags(opCtx, options.flags);
-
- if (options.capped && options.cappedMaxDocs > 0) {
- opCtx->recoveryUnit()->writingInt(details->maxDocsInCapped) = options.cappedMaxDocs;
- }
-
- Entry*& entry = _collections[ns.toString()];
- invariant(!entry);
- opCtx->recoveryUnit()->registerChange(new EntryInsertion(ns, this));
- entry = new Entry();
- _insertInCache(opCtx, ns, rid, entry);
-
- if (allocateDefaultSpace) {
- RecordStoreV1Base* rs = _getRecordStore(ns);
- if (options.initialNumExtents > 0) {
- int size = _massageExtentSize(_extentManager.get(), options.cappedSize);
- for (int i = 0; i < options.initialNumExtents; i++) {
- rs->increaseStorageSize(opCtx, size, false);
- }
- } else if (!options.initialExtentSizes.empty()) {
- for (size_t i = 0; i < options.initialExtentSizes.size(); i++) {
- int size = options.initialExtentSizes[i];
- size = _massageExtentSize(_extentManager.get(), size);
- rs->increaseStorageSize(opCtx, size, false);
- }
- } else if (options.capped) {
- // normal
- do {
- // Must do this at least once, otherwise we leave the collection with no
- // extents, which is invalid.
- int sz = _massageExtentSize(_extentManager.get(),
- options.cappedSize - rs->storageSize(opCtx));
- sz &= 0xffffff00;
- rs->increaseStorageSize(opCtx, sz, false);
- } while (rs->storageSize(opCtx) < options.cappedSize);
- } else {
- rs->increaseStorageSize(opCtx, _extentManager->initialSize(128), false);
- }
- }
-
- if (!options.collation.isEmpty()) {
- markCollationFeatureAsInUse(opCtx);
- }
-
- return Status::OK();
-}
-
-void MMAPV1DatabaseCatalogEntry::createNamespaceForIndex(OperationContext* opCtx, StringData name) {
- // This is a simplified form of createCollection.
- invariant(!_namespaceIndex.details(name));
-
- RecordId rid = _addNamespaceToNamespaceCollection(opCtx, name, NULL);
- _namespaceIndex.add_ns(opCtx, name, DiskLoc(), false);
-
- Entry*& entry = _collections[name.toString()];
- invariant(!entry);
- opCtx->recoveryUnit()->registerChange(new EntryInsertion(name, this));
- entry = new Entry();
- _insertInCache(opCtx, name, rid, entry);
-}
-
-NamespaceDetailsCollectionCatalogEntry* MMAPV1DatabaseCatalogEntry::getCollectionCatalogEntry(
- StringData ns) const {
- CollectionMap::const_iterator i = _collections.find(ns.toString());
- if (i == _collections.end()) {
- return NULL;
- }
-
- invariant(i->second->catalogEntry.get());
- return i->second->catalogEntry.get();
-}
-
-void MMAPV1DatabaseCatalogEntry::_insertInCache(OperationContext* opCtx,
- StringData ns,
- RecordId rid,
- Entry* entry) {
- NamespaceDetails* details = _namespaceIndex.details(ns);
- invariant(details);
-
- entry->catalogEntry.reset(new NamespaceDetailsCollectionCatalogEntry(
- ns, details, _getNamespaceRecordStore(), rid, _getIndexRecordStore(), this));
-
- unique_ptr<NamespaceDetailsRSV1MetaData> md(new NamespaceDetailsRSV1MetaData(ns, details));
- const NamespaceString nss(ns);
-
- if (details->isCapped) {
- entry->recordStore.reset(new CappedRecordStoreV1(
- opCtx, NULL, ns, md.release(), _extentManager.get(), nss.coll() == "system.indexes"));
- } else {
- entry->recordStore.reset(new SimpleRecordStoreV1(
- opCtx, ns, md.release(), _extentManager.get(), nss.coll() == "system.indexes"));
- }
-}
-
-RecordStore* MMAPV1DatabaseCatalogEntry::getRecordStore(StringData ns) const {
- return _getRecordStore(ns);
-}
-
-RecordStoreV1Base* MMAPV1DatabaseCatalogEntry::_getRecordStore(StringData ns) const {
- CollectionMap::const_iterator i = _collections.find(ns.toString());
- if (i == _collections.end()) {
- return NULL;
- }
-
- invariant(i->second->recordStore.get());
- return i->second->recordStore.get();
-}
-
-IndexAccessMethod* MMAPV1DatabaseCatalogEntry::getIndex(OperationContext* opCtx,
- const CollectionCatalogEntry* collection,
- IndexCatalogEntry* entry) {
- const std::string& type = entry->descriptor()->getAccessMethodName();
-
- std::string ns = collection->ns().ns();
-
- RecordStoreV1Base* rs = _getRecordStore(entry->descriptor()->indexNamespace());
- invariant(rs);
-
- std::unique_ptr<SortedDataInterface> btree(
- getMMAPV1Interface(entry->headManager(),
- rs,
- &rs->savedCursors,
- entry->ordering(),
- entry->descriptor()->indexNamespace(),
- entry->descriptor()->version(),
- entry->descriptor()->unique()));
-
- if (IndexNames::HASHED == type)
- return new HashAccessMethod(entry, btree.release());
-
- if (IndexNames::GEO_2DSPHERE == type)
- return new S2AccessMethod(entry, btree.release());
-
- if (IndexNames::TEXT == type)
- return new FTSAccessMethod(entry, btree.release());
-
- if (IndexNames::GEO_HAYSTACK == type)
- return new HaystackAccessMethod(entry, btree.release());
-
- if ("" == type)
- return new BtreeAccessMethod(entry, btree.release());
-
- if (IndexNames::GEO_2D == type)
- return new TwoDAccessMethod(entry, btree.release());
-
- log() << "Can't find index for keyPattern " << entry->descriptor()->keyPattern();
- fassertFailed(17489);
-}
-
-RecordStoreV1Base* MMAPV1DatabaseCatalogEntry::_getIndexRecordStore() {
- const NamespaceString nss(name(), "system.indexes");
- Entry* entry = _collections[nss.toString()];
- invariant(entry);
-
- return entry->recordStore.get();
-}
-
-RecordStoreV1Base* MMAPV1DatabaseCatalogEntry::_getNamespaceRecordStore() const {
- const NamespaceString nss(name(), "system.namespaces");
- CollectionMap::const_iterator i = _collections.find(nss.toString());
- invariant(i != _collections.end());
-
- return i->second->recordStore.get();
-}
-
-RecordId MMAPV1DatabaseCatalogEntry::_addNamespaceToNamespaceCollection(OperationContext* opCtx,
- StringData ns,
- const BSONObj* options) {
- if (nsToCollectionSubstring(ns) == "system.namespaces") {
- // system.namespaces holds all the others, so it is not explicitly listed in the catalog.
- return {};
- }
-
- BSONObjBuilder b;
- b.append("name", ns);
- if (options && !options->isEmpty()) {
- b.append("options", *options);
- }
-
- const BSONObj obj = b.done();
-
- RecordStoreV1Base* rs = _getNamespaceRecordStore();
- invariant(rs);
- // TODO SERVER-30638: using timestamp 0 for these inserts.
- StatusWith<RecordId> loc =
- rs->insertRecord(opCtx, obj.objdata(), obj.objsize(), Timestamp(), false);
- massertStatusOK(loc.getStatus());
- return loc.getValue();
-}
-
-void MMAPV1DatabaseCatalogEntry::_removeNamespaceFromNamespaceCollection(OperationContext* opCtx,
- StringData ns) {
- if (nsToCollectionSubstring(ns) == "system.namespaces") {
- // system.namespaces holds all the others, so it is not explicitly listed in the catalog.
- return;
- }
-
- auto entry = _collections.find(ns.toString());
- if (entry == _collections.end()) {
- return;
- }
-
- RecordStoreV1Base* rs = _getNamespaceRecordStore();
- invariant(rs);
-
- // Invalidate old namespace record
- RecordId oldSpecLocation = entry->second->catalogEntry->getNamespacesRecordId();
- invalidateSystemCollectionRecord(
- opCtx, NamespaceString(name(), "system.namespaces"), oldSpecLocation);
-
- rs->deleteRecord(opCtx, oldSpecLocation);
-}
-
-CollectionOptions MMAPV1DatabaseCatalogEntry::getCollectionOptions(OperationContext* opCtx,
- StringData ns) const {
- if (nsToCollectionSubstring(ns) == "system.namespaces") {
- return {};
- }
-
- auto entry = _collections.find(ns.toString());
- if (entry == _collections.end()) {
- return {};
- }
-
- return getCollectionOptions(opCtx, entry->second->catalogEntry->getNamespacesRecordId());
-}
-
-CollectionOptions MMAPV1DatabaseCatalogEntry::getCollectionOptions(OperationContext* opCtx,
- RecordId rid) const {
- CollectionOptions options;
-
- if (rid.isNull()) {
- return options;
- }
-
- RecordStoreV1Base* rs = _getNamespaceRecordStore();
- invariant(rs);
-
- RecordData data;
- invariant(rs->findRecord(opCtx, rid, &data));
-
- if (data.releaseToBson()["options"].isABSONObj()) {
- Status status = options.parse(data.releaseToBson()["options"].Obj(),
- CollectionOptions::parseForStorage);
- fassert(18523, status);
- }
- return options;
-}
-} // namespace mongo
diff --git a/src/mongo/db/storage/mmap_v1/mmap_v1_database_catalog_entry.h b/src/mongo/db/storage/mmap_v1/mmap_v1_database_catalog_entry.h
deleted file mode 100644
index 67e562d4fe2..00000000000
--- a/src/mongo/db/storage/mmap_v1/mmap_v1_database_catalog_entry.h
+++ /dev/null
@@ -1,208 +0,0 @@
-/**
- * Copyright (C) 2014 MongoDB Inc.
- *
- * This program is free software: you can redistribute it and/or modify
- * it under the terms of the GNU Affero General Public License, version 3,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU Affero General Public License for more details.
- *
- * You should have received a copy of the GNU Affero General Public License
- * along with this program. If not, see <http://www.gnu.org/licenses/>.
- *
- * As a special exception, the copyright holders give permission to link the
- * code of portions of this program with the OpenSSL library under certain
- * conditions as described in each individual source file and distribute
- * linked combinations including the program with the OpenSSL library. You
- * must comply with the GNU Affero General Public License in all respects for
- * all of the code used other than as permitted herein. If you modify file(s)
- * with this exception, you may extend this exception to your version of the
- * file(s), but you are not obligated to do so. If you do not wish to do so,
- * delete this exception statement from your version. If you delete this
- * exception statement from all source files in the program, then also delete
- * it in the license file.
- */
-
-#pragma once
-
-#include <map>
-#include <string>
-
-#include "mongo/base/status.h"
-#include "mongo/base/string_data.h"
-#include "mongo/db/catalog/database_catalog_entry.h"
-#include "mongo/db/storage/mmap_v1/catalog/namespace_details_collection_entry.h"
-#include "mongo/db/storage/mmap_v1/catalog/namespace_index.h"
-#include "mongo/db/storage/mmap_v1/mmap_v1_extent_manager.h"
-
-namespace mongo {
-
-class CollectionCatalogEntry;
-struct CollectionOptions;
-class IndexAccessMethod;
-class IndexCatalogEntry;
-class IndexDescriptor;
-class RecordId;
-class RecordStore;
-class RecordStoreV1Base;
-class RecoveryUnit;
-class OperationContext;
-
-class MMAPV1DatabaseCatalogEntry : public DatabaseCatalogEntry {
-public:
- MMAPV1DatabaseCatalogEntry(OperationContext* opCtx,
- StringData name,
- StringData path,
- bool directoryperdb,
- bool transient,
- std::unique_ptr<ExtentManager> extentManager);
-
- virtual ~MMAPV1DatabaseCatalogEntry();
-
- /**
- * Must be called before destruction.
- */
- virtual void close(OperationContext* opCtx) {
- _extentManager->close(opCtx);
- _namespaceIndex.close(opCtx);
- }
-
- // these two seem the same and yet different
- // TODO(ERH): consolidate into one ideally
- virtual bool exists() const {
- return _namespaceIndex.pathExists();
- }
- virtual bool isEmpty() const {
- return !_namespaceIndex.allocated();
- }
- virtual bool hasUserData() const {
- // The two collections which exist and can't be removed are:
- // system.indexes
- // system.namespaces
- return _collections.size() > 2;
- }
-
- virtual int64_t sizeOnDisk(OperationContext* opCtx) const;
-
- virtual bool isOlderThan24(OperationContext* opCtx) const;
- virtual void markIndexSafe24AndUp(OperationContext* opCtx);
-
- // Records in the data file version bits that an index or collection may have an associated
- // collation.
- void markCollationFeatureAsInUse(OperationContext* opCtx);
-
- virtual Status currentFilesCompatible(OperationContext* opCtx) const;
-
- virtual void appendExtraStats(OperationContext* opCtx, BSONObjBuilder* out, double scale) const;
-
- Status createCollection(OperationContext* opCtx,
- StringData ns,
- const CollectionOptions& options,
- bool allocateDefaultSpace);
-
- Status dropCollection(OperationContext* opCtx, StringData ns);
-
- Status renameCollection(OperationContext* opCtx,
- StringData fromNS,
- StringData toNS,
- bool stayTemp);
-
- void getCollectionNamespaces(std::list<std::string>* tofill) const;
-
- /**
- * will return NULL if ns does not exist
- */
- NamespaceDetailsCollectionCatalogEntry* getCollectionCatalogEntry(StringData ns) const;
-
- RecordStore* getRecordStore(StringData ns) const;
-
- IndexAccessMethod* getIndex(OperationContext* opCtx,
- const CollectionCatalogEntry* collection,
- IndexCatalogEntry* index);
-
- const ExtentManager* getExtentManager() const {
- return _extentManager.get();
- }
- ExtentManager* getExtentManager() {
- return _extentManager.get();
- }
-
- CollectionOptions getCollectionOptions(OperationContext* opCtx, StringData ns) const;
-
- CollectionOptions getCollectionOptions(OperationContext* opCtx, RecordId nsRid) const;
-
- /**
- * Creates a CollectionCatalogEntry in the form of an index rather than a collection.
- * MMAPv1 puts both indexes and collections into CCEs. A namespace named 'name' must not
- * exist.
- */
- void createNamespaceForIndex(OperationContext* opCtx, StringData name);
- static void invalidateSystemCollectionRecord(OperationContext* opCtx,
- NamespaceString systemCollectionNamespace,
- RecordId record);
-
-private:
- class EntryInsertion;
- class EntryRemoval;
-
- friend class NamespaceDetailsCollectionCatalogEntry;
-
- // The _collections map is a cache for efficiently looking up namespace information. Access
- // to the cache is protected by holding the appropriate DB lock. Regular operations
- // (insert/update/delete/query) hold intent locks on the database and they access the cache
- // directly. Metadata operations, such as create db/collection, etc acquire exclusive lock
- // on the database, which protects against concurrent readers of the cache.
- //
- // Once initialized, the cache must remain consistent with the data in the memory-mapped
- // database files through _removeFromCache and _insertInCache. These methods use the
- // RecoveryUnit to ensure correct handling of rollback.
-
- struct Entry {
- std::unique_ptr<NamespaceDetailsCollectionCatalogEntry> catalogEntry;
- std::unique_ptr<RecordStoreV1Base> recordStore;
- };
-
- typedef std::map<std::string, Entry*> CollectionMap;
-
-
- RecordStoreV1Base* _getIndexRecordStore();
- RecordStoreV1Base* _getNamespaceRecordStore() const;
- RecordStoreV1Base* _getRecordStore(StringData ns) const;
-
- RecordId _addNamespaceToNamespaceCollection(OperationContext* opCtx,
- StringData ns,
- const BSONObj* options);
-
- void _removeNamespaceFromNamespaceCollection(OperationContext* opCtx, StringData ns);
-
- Status _renameSingleNamespace(OperationContext* opCtx,
- StringData fromNS,
- StringData toNS,
- bool stayTemp);
-
- void _ensureSystemCollection(OperationContext* opCtx, StringData ns);
-
- void _init(OperationContext* opCtx);
-
- /**
- * Populate the _collections cache.
- */
- void _insertInCache(OperationContext* opCtx, StringData ns, RecordId rid, Entry* entry);
-
- /**
- * Drop cached information for specified namespace. If a RecoveryUnit is specified,
- * use it to allow rollback. When ru is null, removal is unconditional.
- */
- void _removeFromCache(RecoveryUnit* ru, StringData ns);
-
-
- const std::string _path;
-
- NamespaceIndex _namespaceIndex;
- std::unique_ptr<ExtentManager> _extentManager;
- CollectionMap _collections;
-};
-}
diff --git a/src/mongo/db/storage/mmap_v1/mmap_v1_engine.cpp b/src/mongo/db/storage/mmap_v1/mmap_v1_engine.cpp
deleted file mode 100644
index 58bb1da6118..00000000000
--- a/src/mongo/db/storage/mmap_v1/mmap_v1_engine.cpp
+++ /dev/null
@@ -1,420 +0,0 @@
-/**
-* Copyright (C) 2014 MongoDB Inc.
-*
-* This program is free software: you can redistribute it and/or modify
-* it under the terms of the GNU Affero General Public License, version 3,
-* as published by the Free Software Foundation.
-*
-* This program is distributed in the hope that it will be useful,
-* but WITHOUT ANY WARRANTY; without even the implied warranty of
-* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-* GNU Affero General Public License for more details.
-*
-* You should have received a copy of the GNU Affero General Public License
-* along with this program. If not, see <http://www.gnu.org/licenses/>.
-*
-* As a special exception, the copyright holders give permission to link the
-* code of portions of this program with the OpenSSL library under certain
-* conditions as described in each individual source file and distribute
-* linked combinations including the program with the OpenSSL library. You
-* must comply with the GNU Affero General Public License in all respects for
-* all of the code used other than as permitted herein. If you modify file(s)
-* with this exception, you may extend this exception to your version of the
-* file(s), but you are not obligated to do so. If you do not wish to do so,
-* delete this exception statement from your version. If you delete this
-* exception statement from all source files in the program, then also delete
-* it in the license file.
-*/
-
-#define MONGO_LOG_DEFAULT_COMPONENT ::mongo::logger::LogComponent::kStorage
-
-#include "mongo/db/storage/mmap_v1/mmap_v1_engine.h"
-
-#include <boost/filesystem/operations.hpp>
-#include <boost/filesystem/path.hpp>
-#include <fstream>
-
-#ifdef __linux__
-#include <sys/sysmacros.h>
-#endif
-
-#include "mongo/db/client.h"
-#include "mongo/db/mongod_options.h"
-#include "mongo/db/operation_context.h"
-#include "mongo/db/storage/mmap_v1/data_file_sync.h"
-#include "mongo/db/storage/mmap_v1/dur.h"
-#include "mongo/db/storage/mmap_v1/dur_journal.h"
-#include "mongo/db/storage/mmap_v1/dur_recover.h"
-#include "mongo/db/storage/mmap_v1/dur_recovery_unit.h"
-#include "mongo/db/storage/mmap_v1/file_allocator.h"
-#include "mongo/db/storage/mmap_v1/mmap.h"
-#include "mongo/db/storage/mmap_v1/mmap_v1_database_catalog_entry.h"
-#include "mongo/db/storage/mmap_v1/mmap_v1_options.h"
-#include "mongo/db/storage/storage_engine_lock_file.h"
-#include "mongo/db/storage/storage_options.h"
-#include "mongo/util/clock_source.h"
-#include "mongo/util/log.h"
-
-
-namespace mongo {
-
-using std::endl;
-using std::ifstream;
-using std::string;
-using std::stringstream;
-using std::vector;
-
-namespace {
-
-#if !defined(__sun)
-// if doingRepair is true don't consider unclean shutdown an error
-void checkForUncleanShutdown(MMAPV1Engine* storageEngine,
- bool doingRepair,
- const StorageEngineLockFile& lockFile) {
- string name = lockFile.getFilespec();
- bool oldFile = lockFile.createdByUncleanShutdown();
-
- if (doingRepair) {
- // This logic was previously in shared option parsing code.
- storageGlobalParams.dur = false;
- }
-
- if (oldFile) {
- // we check this here because we want to see if we can get the lock
- // if we can't, then its probably just another mongod running
-
- string errmsg;
- if (doingRepair && dur::haveJournalFiles()) {
- errmsg =
- "************** \n"
- "You specified --repair but there are dirty journal files. Please\n"
- "restart without --repair to allow the journal files to be replayed.\n"
- "If you wish to repair all databases, please shutdown cleanly and\n"
- "run with --repair again.\n"
- "**************";
- } else if (storageGlobalParams.dur) {
- if (!dur::haveJournalFiles(/*anyFiles=*/true)) {
- // Passing anyFiles=true as we are trying to protect against starting in an
- // unclean state with the journal directory unmounted. If there are any files,
- // even prealloc files, then it means that it is mounted so we can continue.
- // Previously there was an issue (SERVER-5056) where we would fail to start up
- // if killed during prealloc.
-
- vector<string> dbnames;
- storageEngine->listDatabases(&dbnames);
-
- if (dbnames.size() == 0) {
- // this means that mongod crashed
- // between initial startup and when journaling was initialized
- // it is safe to continue
- } else {
- errmsg = str::stream()
- << "************** \n"
- << "old lock file: " << name << ". probably means unclean shutdown,\n"
- << "but there are no journal files to recover.\n"
- << "this is likely human error or filesystem corruption.\n"
- << "please make sure that your journal directory is mounted.\n"
- << "found " << dbnames.size() << " dbs.\n"
- << "see: http://dochub.mongodb.org/core/repair for more information\n"
- << "*************";
- }
- }
- } else {
- if (!dur::haveJournalFiles() && !doingRepair) {
- errmsg = str::stream() << "************** \n"
- << "Unclean shutdown detected.\n"
- << "Please visit http://dochub.mongodb.org/core/repair for "
- "recovery instructions.\n"
- << "*************";
- }
- }
-
- if (!errmsg.empty()) {
- log() << errmsg << endl;
- uassert(12596, "old lock file", 0);
- }
- }
-
- // Not related to lock file, but this is where we handle unclean shutdown
- if (!storageGlobalParams.dur && dur::haveJournalFiles()) {
- log() << "**************" << endl;
- log() << "Error: journal files are present in journal directory, yet starting without "
- "journaling enabled."
- << endl;
- log() << "It is recommended that you start with journaling enabled so that recovery may "
- "occur."
- << endl;
- log() << "**************" << endl;
- uasserted(13597, "can't start without --journal enabled when journal/ files are present");
- }
-}
-#else
-void checkForUncleanShutdown(MMAPV1Engine* storageEngine,
- bool doingRepair,
- const StorageEngineLockFile& lockFile) {
- // TODO - this is very bad that the code above not running here.
-
- if (doingRepair) {
- // This logic was previously in shared option parsing code.
- storageGlobalParams.dur = false;
- }
-
- // Not related to lock file, but this is where we handle unclean shutdown
- if (!storageGlobalParams.dur && dur::haveJournalFiles()) {
- log() << "**************" << endl;
- log() << "Error: journal files are present in journal directory, yet starting without "
- "--journal enabled."
- << endl;
- log() << "It is recommended that you start with journaling enabled so that recovery may "
- "occur."
- << endl;
- log() << "Alternatively (not recommended), you can backup everything, then delete the "
- "journal files, and run --repair"
- << endl;
- log() << "**************" << endl;
- uasserted(13618, "can't start without --journal enabled when journal/ files are present");
- }
-}
-#endif // !defined(__sun)
-
-
-/// warn if readahead > 256KB (gridfs chunk size)
-void checkReadAhead(const string& dir) {
-#ifdef __linux__
- try {
- const dev_t dev = getPartition(dir);
-
- // This path handles the case where the filesystem uses the whole device (including LVM)
- string path = str::stream() << "/sys/dev/block/" << major(dev) << ':' << minor(dev)
- << "/queue/read_ahead_kb";
-
- if (!boost::filesystem::exists(path)) {
- // This path handles the case where the filesystem is on a partition.
- path =
- str::stream() << "/sys/dev/block/" << major(dev) << ':'
- << minor(dev) // this is a symlink
- << "/.." // parent directory of a partition is for the whole device
- << "/queue/read_ahead_kb";
- }
-
- if (boost::filesystem::exists(path)) {
- ifstream file(path.c_str());
- if (file.is_open()) {
- int kb;
- file >> kb;
- if (kb > 256) {
- log() << startupWarningsLog;
-
- log() << "** WARNING: Readahead for " << dir << " is set to " << kb << "KB"
- << startupWarningsLog;
-
- log() << "** We suggest setting it to 256KB (512 sectors) or less"
- << startupWarningsLog;
-
- log() << "** http://dochub.mongodb.org/core/readahead"
- << startupWarningsLog;
- }
- }
- }
- } catch (const std::exception& e) {
- log() << "unable to validate readahead settings due to error: " << e.what()
- << startupWarningsLog;
- log() << "for more information, see http://dochub.mongodb.org/core/readahead"
- << startupWarningsLog;
- }
-#endif // __linux__
-}
-
-// This is unrelated to the _tmp directory in dbpath.
-void clearTmpFiles() {
- boost::filesystem::path path(storageGlobalParams.dbpath);
- for (boost::filesystem::directory_iterator i(path);
- i != boost::filesystem::directory_iterator();
- ++i) {
- string fileName = boost::filesystem::path(*i).leaf().string();
- if (boost::filesystem::is_directory(*i) && fileName.length() && fileName[0] == '$')
- boost::filesystem::remove_all(*i);
- }
-}
-} // namespace
-
-MMAPV1Engine::MMAPV1Engine(const StorageEngineLockFile* lockFile, ClockSource* cs)
- : MMAPV1Engine(lockFile, cs, stdx::make_unique<MmapV1ExtentManager::Factory>()) {}
-
-MMAPV1Engine::MMAPV1Engine(const StorageEngineLockFile* lockFile,
- ClockSource* cs,
- std::unique_ptr<ExtentManager::Factory> extentManagerFactory)
- : _recordAccessTracker(cs),
- _extentManagerFactory(std::move(extentManagerFactory)),
- _clock(cs),
- _startMs(_clock->now().toMillisSinceEpoch()) {
- // TODO check non-journal subdirs if using directory-per-db
- checkReadAhead(storageGlobalParams.dbpath);
-
- if (!storageGlobalParams.readOnly) {
- invariant(lockFile);
- checkForUncleanShutdown(this, storageGlobalParams.repair, *lockFile);
-
- FileAllocator::get()->start();
-
- MONGO_ASSERT_ON_EXCEPTION_WITH_MSG(clearTmpFiles(), "clear tmp files");
- }
-}
-
-void MMAPV1Engine::finishInit() {
- dataFileSync.go();
-
- // Replays the journal (if needed) and starts the background thread. This requires the
- // ability to create OperationContexts.
- dur::startup(_clock, _startMs);
-}
-
-MMAPV1Engine::~MMAPV1Engine() {
- for (EntryMap::const_iterator it = _entryMap.begin(); it != _entryMap.end(); ++it) {
- delete it->second;
- }
- _entryMap.clear();
-}
-
-RecoveryUnit* MMAPV1Engine::newRecoveryUnit() {
- return new DurRecoveryUnit();
-}
-
-void MMAPV1Engine::listDatabases(std::vector<std::string>* out) const {
- _listDatabases(storageGlobalParams.dbpath, out);
-}
-
-DatabaseCatalogEntry* MMAPV1Engine::getDatabaseCatalogEntry(OperationContext* opCtx,
- StringData db) {
- {
- stdx::lock_guard<stdx::mutex> lk(_entryMapMutex);
- EntryMap::const_iterator iter = _entryMap.find(db.toString());
- if (iter != _entryMap.end()) {
- return iter->second;
- }
- }
-
- // This is an on-demand database create/open. At this point, we are locked under X lock for
- // the database (MMAPV1DatabaseCatalogEntry's constructor checks that) so no two threads
- // can be creating the same database concurrenty. We need to create the database outside of
- // the _entryMapMutex so we do not deadlock (see SERVER-15880).
- MMAPV1DatabaseCatalogEntry* entry = new MMAPV1DatabaseCatalogEntry(
- opCtx,
- db,
- storageGlobalParams.dbpath,
- storageGlobalParams.directoryperdb,
- false,
- _extentManagerFactory->create(
- db, storageGlobalParams.dbpath, storageGlobalParams.directoryperdb));
-
- stdx::lock_guard<stdx::mutex> lk(_entryMapMutex);
-
- // Sanity check that we are not overwriting something
- invariant(_entryMap.insert(EntryMap::value_type(db.toString(), entry)).second);
-
- return entry;
-}
-
-Status MMAPV1Engine::closeDatabase(OperationContext* opCtx, StringData db) {
- // Before the files are closed, flush any potentially outstanding changes, which might
- // reference this database. Otherwise we will assert when subsequent applications of the
- // global journal entries occur, which happen to have write intents for the removed files.
- getDur().syncDataAndTruncateJournal(opCtx);
-
- stdx::lock_guard<stdx::mutex> lk(_entryMapMutex);
- MMAPV1DatabaseCatalogEntry* entry = _entryMap[db.toString()];
- if (entry) {
- entry->close(opCtx);
- }
- delete entry;
- _entryMap.erase(db.toString());
- return Status::OK();
-}
-
-Status MMAPV1Engine::dropDatabase(OperationContext* opCtx, StringData db) {
- Status status = closeDatabase(opCtx, db);
- if (!status.isOK())
- return status;
-
- _deleteDataFiles(db.toString());
-
- return Status::OK();
-}
-
-void MMAPV1Engine::_listDatabases(const std::string& directory, std::vector<std::string>* out) {
- boost::filesystem::path path(directory);
- for (boost::filesystem::directory_iterator i(path);
- i != boost::filesystem::directory_iterator();
- ++i) {
- if (storageGlobalParams.directoryperdb) {
- boost::filesystem::path p = *i;
- string dbName = p.leaf().string();
- p /= (dbName + ".ns");
- if (exists(p))
- out->push_back(dbName);
- } else {
- string fileName = boost::filesystem::path(*i).leaf().string();
- if (fileName.length() > 3 && fileName.substr(fileName.length() - 3, 3) == ".ns")
- out->push_back(fileName.substr(0, fileName.length() - 3));
- }
- }
-}
-
-int MMAPV1Engine::flushAllFiles(OperationContext* opCtx, bool sync) {
- return MongoFile::flushAll(opCtx, sync);
-}
-
-Status MMAPV1Engine::beginBackup(OperationContext* opCtx) {
- return Status::OK();
-}
-
-void MMAPV1Engine::endBackup(OperationContext* opCtx) {
- return;
-}
-
-bool MMAPV1Engine::isDurable() const {
- return getDur().isDurable();
-}
-
-bool MMAPV1Engine::isEphemeral() const {
- return false;
-}
-
-RecordAccessTracker& MMAPV1Engine::getRecordAccessTracker() {
- return _recordAccessTracker;
-}
-
-void MMAPV1Engine::cleanShutdown() {
- // wait until file preallocation finishes
- // we would only hang here if the file_allocator code generates a
- // synchronous signal, which we don't expect
- log() << "shutdown: waiting for fs preallocator..." << endl;
- auto opCtx = cc().getOperationContext();
-
- // In some cases we may shutdown early before we have any operation context yet, but we need
- // one for synchronization purposes.
- ServiceContext::UniqueOperationContext newTxn;
- if (!opCtx) {
- newTxn = cc().makeOperationContext();
- opCtx = newTxn.get();
- invariant(opCtx);
- }
-
- FileAllocator::get()->waitUntilFinished();
-
- if (storageGlobalParams.dur) {
- log() << "shutdown: final commit..." << endl;
-
- getDur().commitAndStopDurThread(opCtx);
- }
-
- log() << "shutdown: closing all files..." << endl;
- stringstream ss3;
- MemoryMappedFile::closeAllFiles(opCtx, ss3);
- log() << ss3.str() << endl;
-}
-
-void MMAPV1Engine::setJournalListener(JournalListener* jl) {
- dur::setJournalListener(jl);
-}
-} // namespace mongo
diff --git a/src/mongo/db/storage/mmap_v1/mmap_v1_engine.h b/src/mongo/db/storage/mmap_v1/mmap_v1_engine.h
deleted file mode 100644
index 92ab5bfc6f5..00000000000
--- a/src/mongo/db/storage/mmap_v1/mmap_v1_engine.h
+++ /dev/null
@@ -1,130 +0,0 @@
-// mmap_v1_engine.h
-
-/**
-* Copyright (C) 2014 MongoDB Inc.
-*
-* This program is free software: you can redistribute it and/or modify
-* it under the terms of the GNU Affero General Public License, version 3,
-* as published by the Free Software Foundation.
-*
-* This program is distributed in the hope that it will be useful,
-* but WITHOUT ANY WARRANTY; without even the implied warranty of
-* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-* GNU Affero General Public License for more details.
-*
-* You should have received a copy of the GNU Affero General Public License
-* along with this program. If not, see <http://www.gnu.org/licenses/>.
-*
-* As a special exception, the copyright holders give permission to link the
-* code of portions of this program with the OpenSSL library under certain
-* conditions as described in each individual source file and distribute
-* linked combinations including the program with the OpenSSL library. You
-* must comply with the GNU Affero General Public License in all respects for
-* all of the code used other than as permitted herein. If you modify file(s)
-* with this exception, you may extend this exception to your version of the
-* file(s), but you are not obligated to do so. If you do not wish to do so,
-* delete this exception statement from your version. If you delete this
-* exception statement from all source files in the program, then also delete
-* it in the license file.
-*/
-
-#pragma once
-
-#include <map>
-
-#include "mongo/db/storage/mmap_v1/extent_manager.h"
-#include "mongo/db/storage/mmap_v1/record_access_tracker.h"
-#include "mongo/db/storage/storage_engine.h"
-#include "mongo/stdx/mutex.h"
-
-namespace mongo {
-
-class ClockSource;
-class JournalListener;
-class MMAPV1DatabaseCatalogEntry;
-
-class MMAPV1Engine : public StorageEngine {
-public:
- MMAPV1Engine(const StorageEngineLockFile* lockFile, ClockSource* cs);
-
- MMAPV1Engine(const StorageEngineLockFile* lockFile,
- ClockSource* cs,
- std::unique_ptr<ExtentManager::Factory> extentManagerFactory);
- virtual ~MMAPV1Engine();
-
- void finishInit();
-
- RecoveryUnit* newRecoveryUnit();
- void listDatabases(std::vector<std::string>* out) const;
-
- int flushAllFiles(OperationContext* opCtx, bool sync);
- Status beginBackup(OperationContext* opCtx);
- void endBackup(OperationContext* opCtx);
-
- DatabaseCatalogEntry* getDatabaseCatalogEntry(OperationContext* opCtx, StringData db);
-
- virtual bool supportsDocLocking() const {
- return false;
- }
- virtual bool isMmapV1() const {
- return true;
- }
-
- virtual bool isDurable() const;
-
- virtual bool isEphemeral() const;
-
- virtual Status closeDatabase(OperationContext* opCtx, StringData db);
-
- virtual Status dropDatabase(OperationContext* opCtx, StringData db);
-
- virtual void cleanShutdown();
-
- // Callers should use repairDatabase instead.
- virtual Status repairRecordStore(OperationContext* opCtx, const std::string& ns) {
- return Status(ErrorCodes::InternalError, "MMAPv1 doesn't support repairRecordStore");
- }
-
- // MMAPv1 specific (non-virtual)
- Status repairDatabase(OperationContext* opCtx,
- const std::string& dbName,
- bool preserveClonedFilesOnFailure,
- bool backupOriginalFiles);
-
- /**
- * Gets a reference to the abstraction used by MMAP v1 to track recently used memory
- * addresses.
- *
- * MMAPv1 specific (non-virtual). This is non-const because callers are allowed to use
- * the returned reference to modify the RecordAccessTracker.
- *
- * The RecordAccessTracker is thread-safe (it uses its own mutex internally).
- */
- RecordAccessTracker& getRecordAccessTracker();
-
- void setJournalListener(JournalListener* jl) final;
-
- Timestamp getAllCommittedTimestamp() const override {
- MONGO_UNREACHABLE;
- }
-
-private:
- static void _listDatabases(const std::string& directory, std::vector<std::string>* out);
-
- stdx::mutex _entryMapMutex;
- typedef std::map<std::string, MMAPV1DatabaseCatalogEntry*> EntryMap;
- EntryMap _entryMap;
-
- // A record access tracker is essentially a large table which tracks recently used
- // addresses. It is used when higher layers (e.g. the query system) need to ask
- // the storage engine whether data is likely in physical memory.
- RecordAccessTracker _recordAccessTracker;
-
- std::unique_ptr<ExtentManager::Factory> _extentManagerFactory;
-
- ClockSource* _clock;
- int64_t _startMs;
-};
-
-void _deleteDataFiles(const std::string& database);
-}
diff --git a/src/mongo/db/storage/mmap_v1/mmap_v1_extent_manager.cpp b/src/mongo/db/storage/mmap_v1/mmap_v1_extent_manager.cpp
deleted file mode 100644
index f3127dc56b0..00000000000
--- a/src/mongo/db/storage/mmap_v1/mmap_v1_extent_manager.cpp
+++ /dev/null
@@ -1,675 +0,0 @@
-// mmap_v1_extent_manager.cpp
-
-/**
-* Copyright (C) 2014 MongoDB Inc.
-*
-* This program is free software: you can redistribute it and/or modify
-* it under the terms of the GNU Affero General Public License, version 3,
-* as published by the Free Software Foundation.
-*
-* This program is distributed in the hope that it will be useful,
-* but WITHOUT ANY WARRANTY; without even the implied warranty of
-* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-* GNU Affero General Public License for more details.
-*
-* You should have received a copy of the GNU Affero General Public License
-* along with this program. If not, see <http://www.gnu.org/licenses/>.
-*
-* As a special exception, the copyright holders give permission to link the
-* code of portions of this program with the OpenSSL library under certain
-* conditions as described in each individual source file and distribute
-* linked combinations including the program with the OpenSSL library. You
-* must comply with the GNU Affero General Public License in all respects for
-* all of the code used other than as permitted herein. If you modify file(s)
-* with this exception, you may extend this exception to your version of the
-* file(s), but you are not obligated to do so. If you do not wish to do so,
-* delete this exception statement from your version. If you delete this
-* exception statement from all source files in the program, then also delete
-* it in the license file.
-*/
-
-#define MONGO_LOG_DEFAULT_COMPONENT ::mongo::logger::LogComponent::kStorage
-
-#include <boost/filesystem/operations.hpp>
-
-#include "mongo/db/storage/mmap_v1/mmap_v1_extent_manager.h"
-
-#include "mongo/base/counter.h"
-#include "mongo/db/audit.h"
-#include "mongo/db/client.h"
-#include "mongo/db/concurrency/d_concurrency.h"
-#include "mongo/db/operation_context.h"
-#include "mongo/db/service_context.h"
-#include "mongo/db/storage/mmap_v1/data_file.h"
-#include "mongo/db/storage/mmap_v1/dur.h"
-#include "mongo/db/storage/mmap_v1/extent.h"
-#include "mongo/db/storage/mmap_v1/extent_manager.h"
-#include "mongo/db/storage/mmap_v1/mmap.h"
-#include "mongo/db/storage/mmap_v1/mmap_v1_engine.h"
-#include "mongo/db/storage/mmap_v1/mmap_v1_options.h"
-#include "mongo/db/storage/mmap_v1/record.h"
-#include "mongo/db/storage/record_fetcher.h"
-#include "mongo/stdx/memory.h"
-#include "mongo/util/fail_point_service.h"
-#include "mongo/util/file.h"
-#include "mongo/util/log.h"
-
-namespace mongo {
-
-using std::unique_ptr;
-using std::endl;
-using std::max;
-using std::string;
-using std::stringstream;
-
-// Turn on this failpoint to force the system to yield for a fetch. Setting to "alwaysOn"
-// will cause yields for fetching to occur on every 'kNeedsFetchFailFreq'th call to
-// recordNeedsFetch().
-static const int kNeedsFetchFailFreq = 2;
-static Counter64 needsFetchFailCounter;
-MONGO_FAIL_POINT_DEFINE(recordNeedsFetchFail);
-
-// Used to make sure the compiler doesn't get too smart on us when we're
-// trying to touch records.
-// volatile - avoid compiler optimizations for touching a mmap page
-volatile int __record_touch_dummy = 1; // NOLINT
-
-class MmapV1RecordFetcher : public RecordFetcher {
- MONGO_DISALLOW_COPYING(MmapV1RecordFetcher);
-
-public:
- explicit MmapV1RecordFetcher(const MmapV1RecordHeader* record) : _record(record) {}
-
- virtual void setup(OperationContext* opCtx) {
- invariant(!_filesLock.get());
- _filesLock.reset(new LockMongoFilesShared(opCtx));
- }
-
- virtual void fetch() {
- // It's only legal to touch the record while we're holding a lock on the data files.
- invariant(_filesLock.get());
-
- const char* recordChar = reinterpret_cast<const char*>(_record);
-
- // Here's where we actually deference a pointer into the record. This is where
- // we expect a page fault to occur, so we should this out of the lock.
- __record_touch_dummy += *recordChar;
-
- // We're not going to touch the record anymore, so we can give up our
- // lock on mongo files. We do this here because we have to release the
- // lock on mongo files prior to reacquiring lock mgr locks.
- _filesLock.reset();
- }
-
-private:
- // The record which needs to be touched in order to page fault. Not owned by us.
- const MmapV1RecordHeader* _record;
-
- // This ensures that our MmapV1RecordHeader* does not drop out from under our feet before
- // we dereference it.
- std::unique_ptr<LockMongoFilesShared> _filesLock;
-};
-
-MmapV1ExtentManager::MmapV1ExtentManager(StringData dbname, StringData path, bool directoryPerDB)
- : _dbname(dbname.toString()),
- _path(path.toString()),
- _directoryPerDB(directoryPerDB),
- _rid(RESOURCE_METADATA, dbname) {
- StorageEngine* engine = getGlobalServiceContext()->getStorageEngine();
- invariant(engine->isMmapV1());
- MMAPV1Engine* mmapEngine = static_cast<MMAPV1Engine*>(engine);
- _recordAccessTracker = &mmapEngine->getRecordAccessTracker();
-}
-
-std::unique_ptr<ExtentManager> MmapV1ExtentManager::Factory::create(StringData dbname,
- StringData path,
- bool directoryPerDB) {
- return stdx::make_unique<MmapV1ExtentManager>(
- std::move(dbname), std::move(path), directoryPerDB);
-}
-
-boost::filesystem::path MmapV1ExtentManager::_fileName(int n) const {
- stringstream ss;
- ss << _dbname << '.' << n;
- boost::filesystem::path fullName(_path);
- if (_directoryPerDB)
- fullName /= _dbname;
- fullName /= ss.str();
- return fullName;
-}
-
-
-Status MmapV1ExtentManager::init(OperationContext* opCtx) {
- invariant(_files.empty());
-
- for (int n = 0; n < DiskLoc::MaxFiles; n++) {
- const boost::filesystem::path fullName = _fileName(n);
- if (!boost::filesystem::exists(fullName)) {
- break;
- }
-
- const std::string fullNameString = fullName.string();
-
- {
- // If the file is uninitialized we exit the loop because it is just prealloced. We
- // do this on a bare File object rather than using the DataFile because closing a
- // DataFile triggers dur::closingFileNotification() which is fatal if there are any
- // pending writes. Therefore we must only open files that we know we want to keep.
- File preview;
- preview.open(fullNameString.c_str(), /*readOnly*/ true);
- invariant(preview.is_open());
-
- // File can't be initialized if too small.
- if (preview.len() < sizeof(DataFileHeader)) {
- break;
- }
-
- // This is the equivalent of DataFileHeader::uninitialized().
- int version;
- preview.read(0, reinterpret_cast<char*>(&version), sizeof(version));
- invariant(!preview.bad());
- if (version == 0) {
- break;
- }
- }
-
- unique_ptr<DataFile> df(new DataFile(opCtx, n));
-
- Status s = df->openExisting(opCtx, fullNameString.c_str());
- if (!s.isOK()) {
- df->close(opCtx);
- return s;
- }
-
- invariant(!df->getHeader()->uninitialized());
-
- // We only checkUpgrade on files that we are keeping, not preallocs.
- df->getHeader()->checkUpgrade(opCtx);
-
- _files.push_back(df.release());
- }
-
- // If this is a new database being created, instantiate the first file and one extent so
- // we can have a coherent database.
- if (_files.empty()) {
- WriteUnitOfWork wuow(opCtx);
- _createExtent(opCtx, initialSize(128), false);
- wuow.commit();
-
- // Commit the journal and all changes to disk so that even if exceptions occur during
- // subsequent initialization, we won't have uncommited changes during file close.
- getDur().commitNow(opCtx);
- }
-
- return Status::OK();
-}
-
-const DataFile* MmapV1ExtentManager::_getOpenFile(int fileId) const {
- invariant(fileId >= 0 && fileId < _files.size(),
- str::stream() << "_getOpenFile() invalid file index requested " << fileId);
-
- return _files[fileId];
-}
-
-DataFile* MmapV1ExtentManager::_getOpenFile(int fileId) {
- invariant(fileId >= 0 && fileId < _files.size(),
- str::stream() << "_getOpenFile() invalid file index requested " << fileId);
-
- return _files[fileId];
-}
-
-DataFile* MmapV1ExtentManager::_addAFile(OperationContext* opCtx,
- int sizeNeeded,
- bool preallocateNextFile) {
- // Database must be stable and we need to be in some sort of an update operation in order
- // to add a new file.
- invariant(opCtx->lockState()->isDbLockedForMode(_dbname, MODE_IX));
-
- const int allocFileId = _files.size();
-
- int minSize = 0;
- if (allocFileId > 0) {
- // Make the next file at least as large as the previous
- minSize = _files[allocFileId - 1]->getHeader()->fileLength;
- }
-
- if (minSize < sizeNeeded + DataFileHeader::HeaderSize) {
- minSize = sizeNeeded + DataFileHeader::HeaderSize;
- }
-
- {
- unique_ptr<DataFile> allocFile(new DataFile(opCtx, allocFileId));
- const string allocFileName = _fileName(allocFileId).string();
-
- Timer t;
-
- try {
- allocFile->open(opCtx, allocFileName.c_str(), minSize, false);
- } catch (...) {
- allocFile->close(opCtx);
- throw;
- }
- if (t.seconds() > 1) {
- log() << "MmapV1ExtentManager took " << t.seconds()
- << " seconds to open: " << allocFileName;
- }
-
- // It's all good
- _files.push_back(allocFile.release());
- }
-
- // Preallocate is asynchronous
- if (preallocateNextFile) {
- unique_ptr<DataFile> nextFile(new DataFile(opCtx, allocFileId + 1));
- const string nextFileName = _fileName(allocFileId + 1).string();
-
- try {
- nextFile->open(opCtx, nextFileName.c_str(), minSize, false);
- } catch (...) {
- nextFile->close(opCtx);
- throw;
- }
- }
-
- // Returns the last file added
- return _files[allocFileId];
-}
-
-int MmapV1ExtentManager::numFiles() const {
- return _files.size();
-}
-
-long long MmapV1ExtentManager::fileSize() const {
- long long size = 0;
- for (int n = 0; boost::filesystem::exists(_fileName(n)); n++) {
- size += boost::filesystem::file_size(_fileName(n));
- }
-
- return size;
-}
-
-MmapV1RecordHeader* MmapV1ExtentManager::_recordForV1(const DiskLoc& loc) const {
- loc.assertOk();
- const DataFile* df = _getOpenFile(loc.a());
-
- int ofs = loc.getOfs();
- if (ofs < DataFileHeader::HeaderSize) {
- df->badOfs(ofs); // will msgassert - external call to keep out of the normal code path
- }
-
- return reinterpret_cast<MmapV1RecordHeader*>(df->p() + ofs);
-}
-
-MmapV1RecordHeader* MmapV1ExtentManager::recordForV1(const DiskLoc& loc) const {
- MmapV1RecordHeader* record = _recordForV1(loc);
- _recordAccessTracker->markAccessed(record);
- return record;
-}
-
-std::unique_ptr<RecordFetcher> MmapV1ExtentManager::recordNeedsFetch(const DiskLoc& loc) const {
- if (loc.isNull())
- return {};
- MmapV1RecordHeader* record = _recordForV1(loc);
-
- // For testing: if failpoint is enabled we randomly request fetches without
- // going to the RecordAccessTracker.
- if (MONGO_FAIL_POINT(recordNeedsFetchFail)) {
- needsFetchFailCounter.increment();
- if ((needsFetchFailCounter.get() % kNeedsFetchFailFreq) == 0) {
- return stdx::make_unique<MmapV1RecordFetcher>(record);
- }
- }
-
- if (!_recordAccessTracker->checkAccessedAndMark(record)) {
- return stdx::make_unique<MmapV1RecordFetcher>(record);
- }
-
- return {};
-}
-
-DiskLoc MmapV1ExtentManager::extentLocForV1(const DiskLoc& loc) const {
- MmapV1RecordHeader* record = recordForV1(loc);
- return DiskLoc(loc.a(), record->extentOfs());
-}
-
-Extent* MmapV1ExtentManager::extentForV1(const DiskLoc& loc) const {
- DiskLoc extentLoc = extentLocForV1(loc);
- return getExtent(extentLoc);
-}
-
-Extent* MmapV1ExtentManager::getExtent(const DiskLoc& loc, bool doSanityCheck) const {
- loc.assertOk();
- Extent* e = reinterpret_cast<Extent*>(_getOpenFile(loc.a())->p() + loc.getOfs());
- if (doSanityCheck)
- e->assertOk();
-
- _recordAccessTracker->markAccessed(e);
-
- return e;
-}
-
-void _checkQuota(bool enforceQuota, int fileNo) {
- if (!enforceQuota)
- return;
-
- if (fileNo < mmapv1GlobalOptions.quotaFiles)
- return;
-
- uasserted(12501, "quota exceeded");
-}
-
-int MmapV1ExtentManager::maxSize() const {
- return DataFile::maxSize() - DataFileHeader::HeaderSize - 16;
-}
-
-DiskLoc MmapV1ExtentManager::_createExtentInFile(
- OperationContext* opCtx, int fileNo, DataFile* f, int size, bool enforceQuota) {
- _checkQuota(enforceQuota, fileNo - 1);
-
- massert(10358, "bad new extent size", size >= minSize() && size <= maxSize());
-
- DiskLoc loc = f->allocExtentArea(opCtx, size);
- loc.assertOk();
-
- Extent* e = getExtent(loc, false);
- verify(e);
-
- *opCtx->recoveryUnit()->writing(&e->magic) = Extent::extentSignature;
- *opCtx->recoveryUnit()->writing(&e->myLoc) = loc;
- *opCtx->recoveryUnit()->writing(&e->length) = size;
-
- return loc;
-}
-
-
-DiskLoc MmapV1ExtentManager::_createExtent(OperationContext* opCtx, int size, bool enforceQuota) {
- size = quantizeExtentSize(size);
-
- if (size > maxSize())
- size = maxSize();
-
- verify(size < DataFile::maxSize());
-
- for (int i = numFiles() - 1; i >= 0; i--) {
- DataFile* f = _getOpenFile(i);
- invariant(f);
-
- if (f->getHeader()->unusedLength >= size) {
- return _createExtentInFile(opCtx, i, f, size, enforceQuota);
- }
- }
-
- _checkQuota(enforceQuota, numFiles());
-
- // no space in an existing file
- // allocate files until we either get one big enough or hit maxSize
- for (int i = 0; i < 8; i++) {
- DataFile* f = _addAFile(opCtx, size, false);
-
- if (f->getHeader()->unusedLength >= size) {
- return _createExtentInFile(opCtx, numFiles() - 1, f, size, enforceQuota);
- }
- }
-
- // callers don't check for null return code, so assert
- msgasserted(14810, "couldn't allocate space for a new extent");
-}
-
-DiskLoc MmapV1ExtentManager::_allocFromFreeList(OperationContext* opCtx,
- int approxSize,
- bool capped) {
- // setup extent constraints
-
- int low, high;
- if (capped) {
- // be strict about the size
- low = approxSize;
- if (low > 2048)
- low -= 256;
- high = (int)(approxSize * 1.05) + 256;
- } else {
- low = (int)(approxSize * 0.8);
- high = (int)(approxSize * 1.4);
- }
- if (high <= 0) {
- // overflowed
- high = max(approxSize, maxSize());
- }
- if (high <= minSize()) {
- // the minimum extent size is 4097
- high = minSize() + 1;
- }
-
- // scan free list looking for something suitable
-
- int n = 0;
- Extent* best = 0;
- int bestDiff = 0x7fffffff;
- {
- Timer t;
- DiskLoc L = _getFreeListStart();
- while (!L.isNull()) {
- Extent* e = getExtent(L);
- if (e->length >= low && e->length <= high) {
- int diff = abs(e->length - approxSize);
- if (diff < bestDiff) {
- bestDiff = diff;
- best = e;
- if (((double)diff) / approxSize < 0.1) {
- // close enough
- break;
- }
- if (t.seconds() >= 2) {
- // have spent lots of time in write lock, and we are in [low,high], so close
- // enough could come into play if extent freelist is very long
- break;
- }
- } else {
- OCCASIONALLY {
- if (high < 64 * 1024 && t.seconds() >= 2) {
- // be less picky if it is taking a long time
- high = 64 * 1024;
- }
- }
- }
- }
- L = e->xnext;
- ++n;
- }
- if (t.seconds() >= 10) {
- log() << "warning: slow scan in allocFromFreeList (in write lock)" << endl;
- }
- }
-
- if (n > 128) {
- LOG(n < 512 ? 1 : 0) << "warning: newExtent " << n << " scanned\n";
- }
-
- if (!best)
- return DiskLoc();
-
- // remove from the free list
- if (!best->xprev.isNull())
- *opCtx->recoveryUnit()->writing(&getExtent(best->xprev)->xnext) = best->xnext;
- if (!best->xnext.isNull())
- *opCtx->recoveryUnit()->writing(&getExtent(best->xnext)->xprev) = best->xprev;
- if (_getFreeListStart() == best->myLoc)
- _setFreeListStart(opCtx, best->xnext);
- if (_getFreeListEnd() == best->myLoc)
- _setFreeListEnd(opCtx, best->xprev);
-
- return best->myLoc;
-}
-
-DiskLoc MmapV1ExtentManager::allocateExtent(OperationContext* opCtx,
- bool capped,
- int size,
- bool enforceQuota) {
- Lock::ResourceLock rlk(opCtx->lockState(), _rid, MODE_X);
- bool fromFreeList = true;
- DiskLoc eloc = _allocFromFreeList(opCtx, size, capped);
- if (eloc.isNull()) {
- fromFreeList = false;
- eloc = _createExtent(opCtx, size, enforceQuota);
- }
-
- invariant(!eloc.isNull());
- invariant(eloc.isValid());
-
- LOG(1) << "MmapV1ExtentManager::allocateExtent"
- << " desiredSize:" << size << " fromFreeList: " << fromFreeList << " eloc: " << eloc;
-
- return eloc;
-}
-
-void MmapV1ExtentManager::freeExtent(OperationContext* opCtx, DiskLoc firstExt) {
- Lock::ResourceLock rlk(opCtx->lockState(), _rid, MODE_X);
- Extent* e = getExtent(firstExt);
- opCtx->recoveryUnit()->writing(&e->xnext)->Null();
- opCtx->recoveryUnit()->writing(&e->xprev)->Null();
- opCtx->recoveryUnit()->writing(&e->firstRecord)->Null();
- opCtx->recoveryUnit()->writing(&e->lastRecord)->Null();
-
-
- if (_getFreeListStart().isNull()) {
- _setFreeListStart(opCtx, firstExt);
- _setFreeListEnd(opCtx, firstExt);
- } else {
- DiskLoc a = _getFreeListStart();
- invariant(getExtent(a)->xprev.isNull());
- *opCtx->recoveryUnit()->writing(&getExtent(a)->xprev) = firstExt;
- *opCtx->recoveryUnit()->writing(&getExtent(firstExt)->xnext) = a;
- _setFreeListStart(opCtx, firstExt);
- }
-}
-
-void MmapV1ExtentManager::freeExtents(OperationContext* opCtx, DiskLoc firstExt, DiskLoc lastExt) {
- Lock::ResourceLock rlk(opCtx->lockState(), _rid, MODE_X);
-
- if (firstExt.isNull() && lastExt.isNull())
- return;
-
- {
- verify(!firstExt.isNull() && !lastExt.isNull());
- Extent* f = getExtent(firstExt);
- Extent* l = getExtent(lastExt);
- verify(f->xprev.isNull());
- verify(l->xnext.isNull());
- verify(f == l || !f->xnext.isNull());
- verify(f == l || !l->xprev.isNull());
- }
-
- if (_getFreeListStart().isNull()) {
- _setFreeListStart(opCtx, firstExt);
- _setFreeListEnd(opCtx, lastExt);
- } else {
- DiskLoc a = _getFreeListStart();
- invariant(getExtent(a)->xprev.isNull());
- *opCtx->recoveryUnit()->writing(&getExtent(a)->xprev) = lastExt;
- *opCtx->recoveryUnit()->writing(&getExtent(lastExt)->xnext) = a;
- _setFreeListStart(opCtx, firstExt);
- }
-}
-
-DiskLoc MmapV1ExtentManager::_getFreeListStart() const {
- if (_files.empty())
- return DiskLoc();
- const DataFile* file = _getOpenFile(0);
- return file->header()->freeListStart;
-}
-
-DiskLoc MmapV1ExtentManager::_getFreeListEnd() const {
- if (_files.empty())
- return DiskLoc();
- const DataFile* file = _getOpenFile(0);
- return file->header()->freeListEnd;
-}
-
-void MmapV1ExtentManager::_setFreeListStart(OperationContext* opCtx, DiskLoc loc) {
- invariant(!_files.empty());
- DataFile* file = _files[0];
- *opCtx->recoveryUnit()->writing(&file->header()->freeListStart) = loc;
-}
-
-void MmapV1ExtentManager::_setFreeListEnd(OperationContext* opCtx, DiskLoc loc) {
- invariant(!_files.empty());
- DataFile* file = _files[0];
- *opCtx->recoveryUnit()->writing(&file->header()->freeListEnd) = loc;
-}
-
-void MmapV1ExtentManager::freeListStats(OperationContext* opCtx,
- int* numExtents,
- int64_t* totalFreeSizeBytes) const {
- Lock::ResourceLock rlk(opCtx->lockState(), _rid, MODE_S);
-
- invariant(numExtents);
- invariant(totalFreeSizeBytes);
-
- *numExtents = 0;
- *totalFreeSizeBytes = 0;
-
- DiskLoc a = _getFreeListStart();
- while (!a.isNull()) {
- Extent* e = getExtent(a);
- (*numExtents)++;
- (*totalFreeSizeBytes) += e->length;
- a = e->xnext;
- }
-}
-
-
-namespace {
-class CacheHintMadvise : public ExtentManager::CacheHint {
-public:
- CacheHintMadvise(void* p, unsigned len, MAdvise::Advice a) : _advice(p, len, a) {}
-
-private:
- MAdvise _advice;
-};
-}
-
-ExtentManager::CacheHint* MmapV1ExtentManager::cacheHint(const DiskLoc& extentLoc,
- const ExtentManager::HintType& hint) {
- invariant(hint == Sequential);
- Extent* e = getExtent(extentLoc);
- return new CacheHintMadvise(reinterpret_cast<void*>(e), e->length, MAdvise::Sequential);
-}
-
-MmapV1ExtentManager::FilesArray::~FilesArray() {
- for (int i = 0; i < size(); i++) {
- delete _files[i];
- }
-}
-
-void MmapV1ExtentManager::FilesArray::close(OperationContext* opCtx) {
- for (int i = 0; i < size(); i++) {
- _files[i]->close(opCtx);
- }
-}
-
-void MmapV1ExtentManager::FilesArray::push_back(DataFile* val) {
- stdx::lock_guard<stdx::mutex> lk(_writersMutex);
- const int n = _size.load();
- invariant(n < DiskLoc::MaxFiles);
- // Note ordering: _size update must come after updating the _files array
- _files[n] = val;
- _size.store(n + 1);
-}
-
-DataFileVersion MmapV1ExtentManager::getFileFormat(OperationContext* opCtx) const {
- if (numFiles() == 0)
- return DataFileVersion(0, 0);
-
- // We explicitly only look at the first file.
- return _getOpenFile(0)->getHeader()->version;
-}
-
-void MmapV1ExtentManager::setFileFormat(OperationContext* opCtx, DataFileVersion newVersion) {
- invariant(numFiles() > 0);
-
- DataFile* df = _getOpenFile(0);
- invariant(df);
-
- *opCtx->recoveryUnit()->writing(&df->getHeader()->version) = newVersion;
-}
-}
diff --git a/src/mongo/db/storage/mmap_v1/mmap_v1_extent_manager.h b/src/mongo/db/storage/mmap_v1/mmap_v1_extent_manager.h
deleted file mode 100644
index dff9de9efe9..00000000000
--- a/src/mongo/db/storage/mmap_v1/mmap_v1_extent_manager.h
+++ /dev/null
@@ -1,258 +0,0 @@
-// mmap_v1_extent_manager.h
-
-/**
-* Copyright (C) 2014 MongoDB Inc.
-*
-* This program is free software: you can redistribute it and/or modify
-* it under the terms of the GNU Affero General Public License, version 3,
-* as published by the Free Software Foundation.
-*
-* This program is distributed in the hope that it will be useful,
-* but WITHOUT ANY WARRANTY; without even the implied warranty of
-* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-* GNU Affero General Public License for more details.
-*
-* You should have received a copy of the GNU Affero General Public License
-* along with this program. If not, see <http://www.gnu.org/licenses/>.
-*
-* As a special exception, the copyright holders give permission to link the
-* code of portions of this program with the OpenSSL library under certain
-* conditions as described in each individual source file and distribute
-* linked combinations including the program with the OpenSSL library. You
-* must comply with the GNU Affero General Public License in all respects for
-* all of the code used other than as permitted herein. If you modify file(s)
-* with this exception, you may extend this exception to your version of the
-* file(s), but you are not obligated to do so. If you do not wish to do so,
-* delete this exception statement from your version. If you delete this
-* exception statement from all source files in the program, then also delete
-* it in the license file.
-*/
-
-#pragma once
-
-#include <string>
-
-#include <boost/filesystem/path.hpp>
-
-#include "mongo/base/status.h"
-#include "mongo/base/string_data.h"
-#include "mongo/db/concurrency/lock_manager_defs.h"
-#include "mongo/db/storage/mmap_v1/diskloc.h"
-#include "mongo/db/storage/mmap_v1/extent_manager.h"
-#include "mongo/db/storage/mmap_v1/record_access_tracker.h"
-#include "mongo/platform/atomic_word.h"
-#include "mongo/stdx/mutex.h"
-
-namespace mongo {
-
-class DataFile;
-class DataFileVersion;
-class MmapV1RecordHeader;
-class OperationContext;
-
-struct Extent;
-
-/**
- * ExtentManager basics
- * - one per database
- * - responsible for managing <db>.# files
- * - NOT responsible for .ns file
- * - gives out extents
- * - responsible for figuring out how to get a new extent
- * - can use any method it wants to do so
- * - this structure is NOT stored on disk
- * - this class is thread safe, except as indicated below
- *
- * Implementation:
- * - ExtentManager holds a preallocated list of DataFile
- * - files will not be removed from the EM, so _files access can be lock-free
- * - extent size and loc are immutable
- * - Any non-const public operations on an ExtentManager will acquire an MODE_X lock on its
- * RESOURCE_MMAPv1_EXTENT_MANAGER resource from the lock-manager, which will extend life
- * to during WriteUnitOfWorks that might need rollback. Private methods will only
- * be called from public ones.
- */
-class MmapV1ExtentManager : public ExtentManager {
- MONGO_DISALLOW_COPYING(MmapV1ExtentManager);
-
-public:
- class Factory : public ExtentManager::Factory {
- virtual std::unique_ptr<ExtentManager> create(StringData dbname,
- StringData path,
- bool directoryPerDB) final;
- };
-
- /**
- * @param freeListDetails this is a reference into the .ns file
- * while a bit odd, this is not a layer violation as extents
- * are a peer to the .ns file, without any layering
- */
- MmapV1ExtentManager(StringData dbname, StringData path, bool directoryPerDB);
-
- /**
- * Must be called before destruction.
- */
- void close(OperationContext* opCtx) {
- _files.close(opCtx);
- }
-
- /**
- * opens all current files, not thread safe
- */
- Status init(OperationContext* opCtx);
-
- int numFiles() const;
- long long fileSize() const;
-
- // must call Extent::reuse on the returned extent
- DiskLoc allocateExtent(OperationContext* opCtx, bool capped, int size, bool enforceQuota);
-
- /**
- * firstExt has to be == lastExt or a chain
- */
- void freeExtents(OperationContext* opCtx, DiskLoc firstExt, DiskLoc lastExt);
-
- /**
- * frees a single extent
- * ignores all fields in the Extent except: magic, myLoc, length
- */
- void freeExtent(OperationContext* opCtx, DiskLoc extent);
-
-
- void freeListStats(OperationContext* opCtx, int* numExtents, int64_t* totalFreeSizeBytes) const;
-
- /**
- * @param loc - has to be for a specific MmapV1RecordHeader
- * Note(erh): this sadly cannot be removed.
- * A MmapV1RecordHeader DiskLoc has an offset from a file, while a RecordStore really wants an
- * offset from an extent. This intrinsically links an original record store to the original
- * extent manager.
- */
- MmapV1RecordHeader* recordForV1(const DiskLoc& loc) const;
-
- std::unique_ptr<RecordFetcher> recordNeedsFetch(const DiskLoc& loc) const;
-
- /**
- * @param loc - has to be for a specific MmapV1RecordHeader (not an Extent)
- * Note(erh) see comment on recordFor
- */
- Extent* extentForV1(const DiskLoc& loc) const;
-
- /**
- * @param loc - has to be for a specific MmapV1RecordHeader (not an Extent)
- * Note(erh) see comment on recordFor
- */
- DiskLoc extentLocForV1(const DiskLoc& loc) const;
-
- /**
- * @param loc - has to be for a specific Extent
- */
- Extent* getExtent(const DiskLoc& loc, bool doSanityCheck = true) const;
-
- /**
- * Not thread safe, requires a database exclusive lock
- */
- DataFileVersion getFileFormat(OperationContext* opCtx) const final;
- void setFileFormat(OperationContext* opCtx, DataFileVersion newVersion) final;
-
- const DataFile* getOpenFile(int n) const final {
- return _getOpenFile(n);
- }
-
- virtual int maxSize() const;
-
- virtual CacheHint* cacheHint(const DiskLoc& extentLoc, const HintType& hint);
-
-private:
- /**
- * will return NULL if nothing suitable in free list
- */
- DiskLoc _allocFromFreeList(OperationContext* opCtx, int approxSize, bool capped);
-
- /* allocate a new Extent, does not check free list
- */
- DiskLoc _createExtent(OperationContext* opCtx, int approxSize, bool enforceQuota);
-
- DataFile* _addAFile(OperationContext* opCtx, int sizeNeeded, bool preallocateNextFile);
-
-
- /**
- * Shared record retrieval logic used by the public recordForV1() and likelyInPhysicalMem()
- * above.
- */
- MmapV1RecordHeader* _recordForV1(const DiskLoc& loc) const;
-
- DiskLoc _getFreeListStart() const;
- DiskLoc _getFreeListEnd() const;
- void _setFreeListStart(OperationContext* opCtx, DiskLoc loc);
- void _setFreeListEnd(OperationContext* opCtx, DiskLoc loc);
-
- const DataFile* _getOpenFile(int fileId) const;
- DataFile* _getOpenFile(int fileId);
-
- DiskLoc _createExtentInFile(
- OperationContext* opCtx, int fileNo, DataFile* f, int size, bool enforceQuota);
-
- boost::filesystem::path _fileName(int n) const;
-
- // -----
-
- const std::string _dbname; // i.e. "test"
- const std::string _path; // i.e. "/data/db"
- const bool _directoryPerDB;
- const ResourceId _rid;
-
- // This reference points into the MMAPv1 engine and is only valid as long as the
- // engine is valid. Not owned here.
- RecordAccessTracker* _recordAccessTracker;
-
- /**
- * Simple wrapper around an array object to allow append-only modification of the array,
- * as well as concurrent read-accesses. This class has a minimal interface to keep
- * implementation simple and easy to modify.
- */
- class FilesArray {
- public:
- FilesArray() : _size(0) {}
- ~FilesArray();
-
- /**
- * Must be called before destruction.
- */
- void close(OperationContext* opCtx);
-
- /**
- * Returns file at location 'n' in the array, with 'n' less than number of files added.
- * Will always return the same pointer for a given file.
- */
- DataFile* operator[](int n) const {
- invariant(n >= 0 && n < size());
- return _files[n];
- }
-
- /**
- * Returns true iff no files were added
- */
- bool empty() const {
- return size() == 0;
- }
-
- /**
- * Returns number of files added to the array
- */
- int size() const {
- return _size.load();
- }
-
- // Appends val to the array, taking ownership of its pointer
- void push_back(DataFile* val);
-
- private:
- stdx::mutex _writersMutex;
- AtomicInt32 _size; // number of files in the array
- DataFile* _files[DiskLoc::MaxFiles];
- };
-
- FilesArray _files;
-};
-}
diff --git a/src/mongo/db/storage/mmap_v1/mmap_v1_init.cpp b/src/mongo/db/storage/mmap_v1/mmap_v1_init.cpp
deleted file mode 100644
index 42ba6cb864c..00000000000
--- a/src/mongo/db/storage/mmap_v1/mmap_v1_init.cpp
+++ /dev/null
@@ -1,86 +0,0 @@
-/**
- * Copyright (C) 2014 MongoDB Inc.
- *
- * This program is free software: you can redistribute it and/or modify
- * it under the terms of the GNU Affero General Public License, version 3,
- * as published by the Free Software Foundation.
- *
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU Affero General Public License for more details.
- *
- * You should have received a copy of the GNU Affero General Public License
- * along with this program. If not, see <http://www.gnu.org/licenses/>.
- *
- * As a special exception, the copyright holders give permission to link the
- * code of portions of this program with the OpenSSL library under certain
- * conditions as described in each individual source file and distribute
- * linked combinations including the program with the OpenSSL library. You
- * must comply with the GNU Affero General Public License in all respects for
- * all of the code used other than as permitted herein. If you modify file(s)
- * with this exception, you may extend this exception to your version of the
- * file(s), but you are not obligated to do so. If you do not wish to do so,
- * delete this exception statement from your version. If you delete this
- * exception statement from all source files in the program, then also delete
- * it in the license file.
- */
-
-#include "mongo/platform/basic.h"
-
-#include "mongo/base/init.h"
-#include "mongo/bson/bsonobjbuilder.h"
-#include "mongo/db/service_context.h"
-#include "mongo/db/storage/mmap_v1/mmap_v1_engine.h"
-#include "mongo/db/storage/storage_engine_init.h"
-#include "mongo/db/storage/storage_engine_metadata.h"
-#include "mongo/db/storage/storage_options.h"
-
-namespace mongo {
-
-namespace {
-
-class MMAPV1Factory : public StorageEngine::Factory {
-public:
- virtual ~MMAPV1Factory() {}
- virtual StorageEngine* create(const StorageGlobalParams& params,
- const StorageEngineLockFile* lockFile) const {
- return new MMAPV1Engine(lockFile, getGlobalServiceContext()->getFastClockSource());
- }
-
- virtual StringData getCanonicalName() const {
- return "mmapv1";
- }
-
- virtual Status validateMetadata(const StorageEngineMetadata& metadata,
- const StorageGlobalParams& params) const {
- Status status =
- metadata.validateStorageEngineOption("directoryPerDB", params.directoryperdb);
- if (!status.isOK()) {
- return status;
- }
-
- return Status::OK();
- }
-
- virtual BSONObj createMetadataOptions(const StorageGlobalParams& params) const {
- BSONObjBuilder builder;
- builder.appendBool("directoryPerDB", params.directoryperdb);
- return builder.obj();
- }
-
- bool supportsReadOnly() const override {
- return true;
- }
-};
-
-} // namespace
-
-MONGO_INITIALIZER_WITH_PREREQUISITES(MMAPV1EngineInit, ("ServiceContext"))
-(InitializerContext* context) {
- registerStorageEngine(getGlobalServiceContext(), std::make_unique<MMAPV1Factory>());
- return Status::OK();
-}
-
-} // namespace mongo
diff --git a/src/mongo/db/storage/mmap_v1/mmap_v1_init_test.cpp b/src/mongo/db/storage/mmap_v1/mmap_v1_init_test.cpp
deleted file mode 100644
index dff7166e77a..00000000000
--- a/src/mongo/db/storage/mmap_v1/mmap_v1_init_test.cpp
+++ /dev/null
@@ -1,120 +0,0 @@
-/**
- * Copyright (C) 2015 MongoDB Inc.
- *
- * This program is free software: you can redistribute it and/or modify
- * it under the terms of the GNU Affero General Public License, version 3,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU Affero General Public License for more details.
- *
- * You should have received a copy of the GNU Affero General Public License
- * along with this program. If not, see <http://www.gnu.org/licenses/>.
- *
- * As a special exception, the copyright holders give permission to link the
- * code of portions of this program with the OpenSSL library under certain
- * conditions as described in each individual source file and distribute
- * linked combinations including the program with the OpenSSL library. You
- * must comply with the GNU Affero General Public License in all respects for
- * all of the code used other than as permitted herein. If you modify file(s)
- * with this exception, you may extend this exception to your version of the
- * file(s), but you are not obligated to do so. If you do not wish to do so,
- * delete this exception statement from your version. If you delete this
- * exception statement from all source files in the program, then also delete
- * it in the license file.
- */
-
-#include "mongo/platform/basic.h"
-
-
-#include "mongo/db/json.h"
-#include "mongo/db/service_context.h"
-#include "mongo/db/storage/storage_engine_init.h"
-#include "mongo/db/storage/storage_engine_metadata.h"
-#include "mongo/db/storage/storage_options.h"
-#include "mongo/unittest/unittest.h"
-#include "mongo/util/mongoutils/str.h"
-
-namespace {
-
-using namespace mongo;
-
-class MMAPV1FactoryTest : public mongo::unittest::Test {
-private:
- virtual void setUp() {
- ServiceContext* globalEnv = getGlobalServiceContext();
- ASSERT_TRUE(globalEnv);
- ASSERT_TRUE(isRegisteredStorageEngine(globalEnv, "mmapv1"));
- factory = getFactoryForStorageEngine(globalEnv, "mmapv1");
- ASSERT_TRUE(factory);
- }
-
- virtual void tearDown() {
- factory = nullptr;
- }
-
-protected:
- const StorageEngine::Factory* factory;
-};
-
-void _testValidateMetadata(const StorageEngine::Factory* factory,
- const BSONObj& metadataOptions,
- bool directoryPerDB,
- ErrorCodes::Error expectedCode) {
- // It is fine to specify an invalid data directory for the metadata
- // as long as we do not invoke read() or write().
- StorageEngineMetadata metadata("no_such_directory");
- metadata.setStorageEngineOptions(metadataOptions);
-
- StorageGlobalParams storageOptions;
- storageOptions.directoryperdb = directoryPerDB;
-
- Status status = factory->validateMetadata(metadata, storageOptions);
- if (expectedCode != status.code()) {
- FAIL(str::stream()
- << "Unexpected StorageEngine::Factory::validateMetadata result. Expected: "
- << ErrorCodes::errorString(expectedCode)
- << " but got "
- << status.toString()
- << " instead. metadataOptions: "
- << metadataOptions
- << "; directoryPerDB: "
- << directoryPerDB);
- }
-}
-
-// Do not validate fields that are not present in metadata.
-TEST_F(MMAPV1FactoryTest, ValidateMetadataEmptyOptions) {
- _testValidateMetadata(factory, BSONObj(), false, ErrorCodes::OK);
- _testValidateMetadata(factory, BSONObj(), true, ErrorCodes::OK);
-}
-
-TEST_F(MMAPV1FactoryTest, ValidateMetadataDirectoryPerDB) {
- _testValidateMetadata(
- factory, fromjson("{directoryPerDB: 123}"), false, ErrorCodes::FailedToParse);
- _testValidateMetadata(factory, fromjson("{directoryPerDB: false}"), false, ErrorCodes::OK);
- _testValidateMetadata(
- factory, fromjson("{directoryPerDB: false}"), true, ErrorCodes::InvalidOptions);
- _testValidateMetadata(
- factory, fromjson("{directoryPerDB: true}"), false, ErrorCodes::InvalidOptions);
- _testValidateMetadata(factory, fromjson("{directoryPerDB: true}"), true, ErrorCodes::OK);
-}
-
-void _testCreateMetadataOptions(const StorageEngine::Factory* factory, bool directoryPerDB) {
- StorageGlobalParams storageOptions;
- storageOptions.directoryperdb = directoryPerDB;
-
- BSONObj metadataOptions = factory->createMetadataOptions(storageOptions);
- BSONElement directoryPerDBElement = metadataOptions.getField("directoryPerDB");
- ASSERT_TRUE(directoryPerDBElement.isBoolean());
- ASSERT_EQUALS(directoryPerDB, directoryPerDBElement.boolean());
-}
-
-TEST_F(MMAPV1FactoryTest, CreateMetadataOptions) {
- _testCreateMetadataOptions(factory, false);
- _testCreateMetadataOptions(factory, true);
-}
-
-} // namespace
diff --git a/src/mongo/db/storage/mmap_v1/mmap_v1_noinit.cpp b/src/mongo/db/storage/mmap_v1/mmap_v1_noinit.cpp
deleted file mode 100644
index c6c7737ee88..00000000000
--- a/src/mongo/db/storage/mmap_v1/mmap_v1_noinit.cpp
+++ /dev/null
@@ -1,31 +0,0 @@
-/**
- * Copyright (C) 2016 MongoDB Inc.
- *
- * This program is free software: you can redistribute it and/or modify
- * it under the terms of the GNU Affero General Public License, version 3,
- * as published by the Free Software Foundation.
- *
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU Affero General Public License for more details.
- *
- * You should have received a copy of the GNU Affero General Public License
- * along with this program. If not, see <http://www.gnu.org/licenses/>.
- *
- * As a special exception, the copyright holders give permission to link the
- * code of portions of this program with the OpenSSL library under certain
- * conditions as described in each individual source file and distribute
- * linked combinations including the program with the OpenSSL library. You
- * must comply with the GNU Affero General Public License in all respects for
- * all of the code used other than as permitted herein. If you modify file(s)
- * with this exception, you may extend this exception to your version of the
- * file(s), but you are not obligated to do so. If you do not wish to do so,
- * delete this exception statement from your version. If you delete this
- * exception statement from all source files in the program, then also delete
- * it in the license file.
- */
-
-// Empty file to be used when mmapv1 is not enabled
-//
diff --git a/src/mongo/db/storage/mmap_v1/mmap_v1_options.cpp b/src/mongo/db/storage/mmap_v1/mmap_v1_options.cpp
deleted file mode 100644
index 87986746d93..00000000000
--- a/src/mongo/db/storage/mmap_v1/mmap_v1_options.cpp
+++ /dev/null
@@ -1,31 +0,0 @@
-/**
- * Copyright (C) 2017 MongoDB Inc.
- *
- * This program is free software: you can redistribute it and/or modify
- * it under the terms of the GNU Affero General Public License, version 3,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU Affero General Public License for more details.
- *
- * You should have received a copy of the GNU Affero General Public License
- * along with this program. If not, see <http://www.gnu.org/licenses/>.
- *
- * As a special exception, the copyright holders give permission to link the
- * code of portions of this program with the OpenSSL library under certain
- * conditions as described in each individual source file and distribute
- * linked combinations including the program with the OpenSSL library. You
- * must comply with the GNU Affero General Public License in all respects for
- * all of the code used other than as permitted herein. If you modify file(s)
- * with this exception, you may extend this exception to your version of the
- * file(s), but you are not obligated to do so. If you do not wish to do so,
- * delete this exception statement from your version. If you delete this
- * exception statement from all source files in the program, then also delete
- * it in the license file.
- */
-
-#include "mongo/db/storage/mmap_v1/mmap_v1_options.h"
-
-mongo::MMAPV1Options mongo::mmapv1GlobalOptions;
diff --git a/src/mongo/db/storage/mmap_v1/mmap_v1_options.h b/src/mongo/db/storage/mmap_v1/mmap_v1_options.h
deleted file mode 100644
index f5b101f553c..00000000000
--- a/src/mongo/db/storage/mmap_v1/mmap_v1_options.h
+++ /dev/null
@@ -1,88 +0,0 @@
-/*
- * Copyright (C) 2014 MongoDB Inc.
- *
- * This program is free software: you can redistribute it and/or modify
- * it under the terms of the GNU Affero General Public License, version 3,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU Affero General Public License for more details.
- *
- * You should have received a copy of the GNU Affero General Public License
- * along with this program. If not, see <http://www.gnu.org/licenses/>.
- *
- * As a special exception, the copyright holders give permission to link the
- * code of portions of this program with the OpenSSL library under certain
- * conditions as described in each individual source file and distribute
- * linked combinations including the program with the OpenSSL library. You
- * must comply with the GNU Affero General Public License in all respects for
- * all of the code used other than as permitted herein. If you modify file(s)
- * with this exception, you may extend this exception to your version of the
- * file(s), but you are not obligated to do so. If you do not wish to do so,
- * delete this exception statement from your version. If you delete this
- * exception statement from all source files in the program, then also delete
- * it in the license file.
- */
-
-#pragma once
-
-#include <string>
-
-/*
- * This file defines the storage for options that come from the command line related to the
- * mmap v1 storage engine.
- */
-
-namespace mongo {
-
-struct MMAPV1Options {
- MMAPV1Options()
- : lenForNewNsFiles(16 * 1024 * 1024),
- preallocj(true),
- prealloc(false),
- quota(false),
- quotaFiles(8) {}
-
- // --nssize
- // Specifies the default size for namespace files, which are files that end in .ns.
- // Each collection and index counts as a namespace.
- unsigned lenForNewNsFiles;
-
- bool preallocj; // --nopreallocj no preallocation of journal files
- bool prealloc; // --noprealloc no preallocation of data files
- bool smallfiles; // --smallfiles allocate smaller data files
-
- // --journalOptions 7 dump journal and terminate without doing anything further
- // --journalOptions 4 recover and terminate without listening
- enum { // bits to be ORed
- JournalDumpJournal = 1, // dump diagnostics on the journal during recovery
- JournalScanOnly = 2, // don't do any real work, just scan and dump if dump
- // specified
- JournalRecoverOnly = 4, // terminate after recovery step
- JournalParanoid = 8, // paranoid mode enables extra checks
- JournalAlwaysCommit = 16, // do a group commit every time the writelock is released
- JournalAlwaysRemap = 32, // remap the private view after every group commit
- // (may lag to the next write lock acquisition,
- // but will do all files then)
- JournalNoCheckSpace = 64 // don't check that there is enough room for journal files
- // before startup (for diskfull tests)
- };
- int journalOptions; // --journalOptions <n> for debugging
-
- // --quota
- // Enables a maximum limit for the number data files each database can have.
- // When running with the --quota option, MongoDB has a maximum of 8 data files
- // per database. Adjust the quota with --quotaFiles.
- bool quota;
-
- // --quotaFiles
- // Modifies the limit on the number of data files per database.
- // --quotaFiles option requires that you set --quota.
- int quotaFiles; // --quotaFiles
-};
-
-extern MMAPV1Options mmapv1GlobalOptions;
-
-} // namespace mongo
diff --git a/src/mongo/db/storage/mmap_v1/mmap_v1_record_store_test.cpp b/src/mongo/db/storage/mmap_v1/mmap_v1_record_store_test.cpp
deleted file mode 100644
index 3a1e71fad40..00000000000
--- a/src/mongo/db/storage/mmap_v1/mmap_v1_record_store_test.cpp
+++ /dev/null
@@ -1,105 +0,0 @@
-/**
- * Copyright (C) 2014 MongoDB Inc.
- *
- * This program is free software: you can redistribute it and/or modify
- * it under the terms of the GNU Affero General Public License, version 3,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU Affero General Public License for more details.
- *
- * You should have received a copy of the GNU Affero General Public License
- * along with this program. If not, see <http://www.gnu.org/licenses/>.
- *
- * As a special exception, the copyright holders give permission to link the
- * code of portions of this program with the OpenSSL library under certain
- * conditions as described in each individual source file and distribute
- * linked combinations including the program with the OpenSSL library. You
- * must comply with the GNU Affero General Public License in all respects for
- * all of the code used other than as permitted herein. If you modify file(s)
- * with this exception, you may extend this exception to your version of the
- * file(s), but you are not obligated to do so. If you do not wish to do so,
- * delete this exception statement from your version. If you delete this
- * exception statement from all source files in the program, then also delete
- * it in the license file.
- */
-
-#include "mongo/platform/basic.h"
-
-#include "mongo/base/init.h"
-#include "mongo/db/operation_context_noop.h"
-#include "mongo/db/storage/mmap_v1/extent.h"
-#include "mongo/db/storage/mmap_v1/record.h"
-#include "mongo/db/storage/mmap_v1/record_store_v1_base.h"
-#include "mongo/db/storage/mmap_v1/record_store_v1_capped.h"
-#include "mongo/db/storage/mmap_v1/record_store_v1_simple.h"
-#include "mongo/db/storage/mmap_v1/record_store_v1_test_help.h"
-#include "mongo/db/storage/record_store_test_harness.h"
-#include "mongo/db/storage/recovery_unit_noop.h"
-#include "mongo/unittest/unittest.h"
-
-namespace mongo {
-namespace {
-
-class MyHarnessHelper : public RecordStoreHarnessHelper {
-public:
- MyHarnessHelper() {}
-
- virtual std::unique_ptr<RecordStore> newNonCappedRecordStore() {
- return newNonCappedRecordStore("a.b");
- }
-
- virtual std::unique_ptr<RecordStore> newNonCappedRecordStore(const std::string& ns) {
- OperationContextNoop opCtx;
- auto md = stdx::make_unique<DummyRecordStoreV1MetaData>(false, 0);
- md->setUserFlag(&opCtx, CollectionOptions::Flag_NoPadding);
- return stdx::make_unique<SimpleRecordStoreV1>(&opCtx, ns, md.release(), &_em, false);
- }
-
- virtual std::unique_ptr<RecordStore> newCappedRecordStore(int64_t cappedMaxSize,
- int64_t cappedMaxDocs) {
- return newCappedRecordStore("a.b", cappedMaxSize, cappedMaxDocs);
- }
-
- virtual std::unique_ptr<RecordStore> newCappedRecordStore(const std::string& ns,
- int64_t cappedMaxSize,
- int64_t cappedMaxDocs) {
- OperationContextNoop opCtx;
- auto md = stdx::make_unique<DummyRecordStoreV1MetaData>(true, 0);
- auto md_ptr = md.get();
- std::unique_ptr<RecordStore> rs =
- stdx::make_unique<CappedRecordStoreV1>(&opCtx, nullptr, ns, md.release(), &_em, false);
-
- LocAndSize records[] = {{}};
- LocAndSize drecs[] = {{DiskLoc(0, 1000), 1000}, {}};
- md->setCapExtent(&opCtx, DiskLoc(0, 0));
- md->setCapFirstNewRecord(&opCtx, DiskLoc().setInvalid());
- initializeV1RS(&opCtx, records, drecs, NULL, &_em, md_ptr);
-
- return rs;
- }
-
- std::unique_ptr<RecoveryUnit> newRecoveryUnit() override {
- return stdx::make_unique<RecoveryUnitNoop>();
- }
-
- bool supportsDocLocking() final {
- return false;
- }
-
-private:
- DummyExtentManager _em;
-};
-
-std::unique_ptr<HarnessHelper> makeHarnessHelper() {
- return stdx::make_unique<MyHarnessHelper>();
-}
-
-MONGO_INITIALIZER(RegisterHarnessFactory)(InitializerContext* const) {
- mongo::registerHarnessHelperFactory(makeHarnessHelper);
- return Status::OK();
-}
-} // namespace
-} // namespace mongo
diff --git a/src/mongo/db/storage/mmap_v1/mmap_windows.cpp b/src/mongo/db/storage/mmap_v1/mmap_windows.cpp
deleted file mode 100644
index 8d949a22243..00000000000
--- a/src/mongo/db/storage/mmap_v1/mmap_windows.cpp
+++ /dev/null
@@ -1,487 +0,0 @@
-/* Copyright 2009 10gen Inc.
- *
- * This program is free software: you can redistribute it and/or modify
- * it under the terms of the GNU Affero General Public License, version 3,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU Affero General Public License for more details.
- *
- * You should have received a copy of the GNU Affero General Public License
- * along with this program. If not, see <http://www.gnu.org/licenses/>.
- *
- * As a special exception, the copyright holders give permission to link the
- * code of portions of this program with the OpenSSL library under certain
- * conditions as described in each individual source file and distribute
- * linked combinations including the program with the OpenSSL library. You
- * must comply with the GNU Affero General Public License in all respects
- * for all of the code used other than as permitted herein. If you modify
- * file(s) with this exception, you may extend this exception to your
- * version of the file(s), but you are not obligated to do so. If you do not
- * wish to do so, delete this exception statement from your version. If you
- * delete this exception statement from all source files in the program,
- * then also delete it in the license file.
- */
-
-#define MONGO_LOG_DEFAULT_COMPONENT ::mongo::logger::LogComponent::kControl
-
-#include "mongo/platform/basic.h"
-
-#include "mongo/db/storage/mmap_v1/mmap.h"
-
-#include "mongo/db/storage/mmap_v1/durable_mapped_file.h"
-#include "mongo/db/storage/mmap_v1/file_allocator.h"
-#include "mongo/stdx/mutex.h"
-#include "mongo/util/log.h"
-#include "mongo/util/processinfo.h"
-#include "mongo/util/text.h"
-#include "mongo/util/timer.h"
-
-using std::endl;
-using std::string;
-using std::vector;
-
-std::size_t mongo::getMinOSPageSizeBytes() {
- static const std::size_t cachedSize = [] {
- SYSTEM_INFO si;
- GetSystemInfo(&si);
- std::size_t minOSPageSizeBytes = si.dwPageSize;
- minOSPageSizeBytesTest(minOSPageSizeBytes);
- return minOSPageSizeBytes;
- }();
- return cachedSize;
-}
-
-namespace mongo {
-
-// MapViewMutex
-//
-// Protects:
-// 1. Ensures all MapViewOfFile/UnMapViewOfFile operations are serialized to reduce chance of
-// "address in use" errors (error code 487)
-// - These errors can still occur if the memory is used for other purposes
-// (stack storage, heap)
-// 2. Prevents calls to VirtualProtect while we remapping files.
-// Lock Ordering:
-// - If taken, must be after previewViews._m to prevent deadlocks
-stdx::mutex mapViewMutex;
-
-MAdvise::MAdvise(void*, unsigned, Advice) {}
-MAdvise::~MAdvise() {}
-
-const unsigned long long memoryMappedFileLocationFloor = 256LL * 1024LL * 1024LL * 1024LL;
-static unsigned long long _nextMemoryMappedFileLocation = memoryMappedFileLocationFloor;
-
-// nextMemoryMappedFileLocationMutex
-//
-// Protects:
-// Windows 64-bit specific allocation of virtual memory regions for
-// placing memory mapped files in memory
-// Lock Ordering:
-// No restrictions
-static SimpleMutex _nextMemoryMappedFileLocationMutex;
-
-unsigned long long AlignNumber(unsigned long long number, unsigned long long granularity) {
- return (number + granularity - 1) & ~(granularity - 1);
-}
-
-static void* getNextMemoryMappedFileLocation(unsigned long long mmfSize) {
- if (4 == sizeof(void*)) {
- return 0;
- }
- stdx::lock_guard<SimpleMutex> lk(_nextMemoryMappedFileLocationMutex);
-
- static unsigned long long granularity = 0;
-
- if (0 == granularity) {
- SYSTEM_INFO systemInfo;
- GetSystemInfo(&systemInfo);
- granularity = static_cast<unsigned long long>(systemInfo.dwAllocationGranularity);
- }
-
- unsigned long long thisMemoryMappedFileLocation = _nextMemoryMappedFileLocation;
-
- int current_retry = 1;
-
- while (true) {
- MEMORY_BASIC_INFORMATION memInfo;
-
- if (VirtualQuery(reinterpret_cast<LPCVOID>(thisMemoryMappedFileLocation),
- &memInfo,
- sizeof(memInfo)) == 0) {
- DWORD gle = GetLastError();
-
- // If we exceed the limits of Virtual Memory
- // - 8TB before Windows 8.1/2012 R2, 128 TB after
- // restart scanning from our memory mapped floor once more
- // This is a linear scan of regions, not of every VM page
- if (gle == ERROR_INVALID_PARAMETER && current_retry == 1) {
- thisMemoryMappedFileLocation = memoryMappedFileLocationFloor;
- ++current_retry;
- continue;
- }
-
- log() << "VirtualQuery of " << thisMemoryMappedFileLocation << " failed with error "
- << errnoWithDescription(gle);
- fassertFailed(17484);
- }
-
- // Free memory regions that we can use for memory map files
- // 1. Marked MEM_FREE, not MEM_RESERVE
- // 2. Marked as PAGE_NOACCESS, not anything else
- if (memInfo.Protect == PAGE_NOACCESS && memInfo.State == MEM_FREE &&
- memInfo.RegionSize > mmfSize)
- break;
-
- // Align the memory location in case RegionSize is not aligned to the OS allocation
- // granularity size
- thisMemoryMappedFileLocation = AlignNumber(
- reinterpret_cast<unsigned long long>(memInfo.BaseAddress) + memInfo.RegionSize,
- granularity);
- }
-
- _nextMemoryMappedFileLocation =
- thisMemoryMappedFileLocation + AlignNumber(mmfSize, granularity);
-
- return reinterpret_cast<void*>(static_cast<uintptr_t>(thisMemoryMappedFileLocation));
-}
-
-void MemoryMappedFile::close(OperationContext* opCtx) {
- LockMongoFilesShared::assertExclusivelyLocked(opCtx);
-
- // Prevent flush and close from concurrently running
- stdx::lock_guard<stdx::mutex> lk(_flushMutex);
-
- {
- stdx::lock_guard<stdx::mutex> lk(mapViewMutex);
-
- for (vector<void*>::iterator i = views.begin(); i != views.end(); i++) {
- UnmapViewOfFile(*i);
- }
- }
-
- views.clear();
- totalMappedLength.fetchAndSubtract(len);
- len = 0;
-
- if (maphandle)
- CloseHandle(maphandle);
- maphandle = 0;
- if (fd) {
- CloseHandle(fd);
- fd = 0;
- }
-
- destroyed(opCtx); // cleans up from the master list of mmaps
-}
-
-bool MemoryMappedFile::isClosed() {
- return !len && !fd && !views.size();
-}
-
-void* MemoryMappedFile::map(OperationContext* opCtx,
- const char* filenameIn,
- unsigned long long& length) {
- verify(fd == 0 && len == 0); // can't open more than once
- setFilename(opCtx, filenameIn);
- FileAllocator::get()->allocateAsap(filenameIn, length);
- /* big hack here: Babble uses db names with colons. doesn't seem to work on windows. temporary
- * perhaps. */
- char filename[256];
- strncpy(filename, filenameIn, 255);
- filename[255] = 0;
- {
- size_t len = strlen(filename);
- for (int i = len - 1; i >= 0; i--) {
- if (filename[i] == '/' || filename[i] == '\\')
- break;
-
- if (filename[i] == ':')
- filename[i] = '_';
- }
- }
-
- updateLength(filename, length);
-
- const bool readOnly = isOptionSet(READONLY);
-
- {
- DWORD createOptions = FILE_ATTRIBUTE_NORMAL;
- if (isOptionSet(SEQUENTIAL))
- createOptions |= FILE_FLAG_SEQUENTIAL_SCAN;
-
- DWORD desiredAccess = readOnly ? GENERIC_READ : (GENERIC_READ | GENERIC_WRITE);
- DWORD shareMode = readOnly ? FILE_SHARE_READ : (FILE_SHARE_WRITE | FILE_SHARE_READ);
-
- fd = CreateFileW(toWideString(filename).c_str(),
- desiredAccess, // desired access
- shareMode, // share mode
- NULL, // security
- OPEN_ALWAYS, // create disposition
- createOptions, // flags
- NULL); // hTempl
- if (fd == INVALID_HANDLE_VALUE) {
- DWORD dosError = GetLastError();
- severe() << "CreateFileW for " << filename << " failed with "
- << errnoWithDescription(dosError) << " (file size is " << length << ")"
- << " in MemoryMappedFile::map" << endl;
- return 0;
- }
- }
-
- {
- DWORD flProtect = readOnly ? PAGE_READONLY : PAGE_READWRITE;
- maphandle = CreateFileMappingW(fd,
- NULL,
- flProtect,
- length >> 32 /*maxsizehigh*/,
- (unsigned)length /*maxsizelow*/,
- NULL /*lpName*/);
- if (maphandle == NULL) {
- DWORD dosError = GetLastError();
- severe() << "CreateFileMappingW for " << filename << " failed with "
- << errnoWithDescription(dosError) << " (file size is " << length << ")"
- << " in MemoryMappedFile::map" << endl;
- LockMongoFilesExclusive lock(opCtx);
- close(opCtx);
- fassertFailed(16225);
- }
- }
-
- void* view = 0;
- {
- stdx::lock_guard<stdx::mutex> lk(mapViewMutex);
- DWORD access = readOnly ? FILE_MAP_READ : FILE_MAP_ALL_ACCESS;
-
- int current_retry = 0;
- while (true) {
- LPVOID thisAddress = getNextMemoryMappedFileLocation(length);
-
- view = MapViewOfFileEx(maphandle, // file mapping handle
- access, // access
- 0,
- 0, // file offset, high and low
- 0, // bytes to map, 0 == all
- thisAddress); // address to place file
-
- if (view == 0) {
- DWORD dosError = GetLastError();
-
- ++current_retry;
-
- // If we failed to allocate a memory mapped file, try again in case we picked
- // an address that Windows is also trying to use for some other VM allocations
- if (dosError == ERROR_INVALID_ADDRESS && current_retry < 5) {
- continue;
- }
-
-#ifndef _WIN64
- // Warn user that if they are running a 32-bit app on 64-bit Windows
- if (dosError == ERROR_NOT_ENOUGH_MEMORY) {
- BOOL wow64Process;
- BOOL retWow64 = IsWow64Process(GetCurrentProcess(), &wow64Process);
- if (retWow64 && wow64Process) {
- log() << "This is a 32-bit MongoDB binary running on a 64-bit"
- " operating system that has run out of virtual memory for"
- " databases. Switch to a 64-bit build of MongoDB to open"
- " the databases.";
- }
- }
-#endif
-
- severe() << "MapViewOfFileEx for " << filename << " at address " << thisAddress
- << " failed with " << errnoWithDescription(dosError) << " (file size is "
- << length << ")"
- << " in MemoryMappedFile::map" << endl;
-
- LockMongoFilesExclusive lock(opCtx);
- close(opCtx);
- fassertFailed(16166);
- }
-
- break;
- }
- }
-
- // MemoryMappedFile successfully created, now update state.
- len = length;
- totalMappedLength.fetchAndAdd(len);
-
- views.push_back(view);
-
- return view;
-}
-
-extern stdx::mutex mapViewMutex;
-
-void* MemoryMappedFile::createPrivateMap() {
- verify(maphandle);
-
- stdx::lock_guard<stdx::mutex> lk(mapViewMutex);
-
- LPVOID thisAddress = getNextMemoryMappedFileLocation(len);
-
- void* privateMapAddress = NULL;
- int current_retry = 0;
-
- while (true) {
- privateMapAddress = MapViewOfFileEx(maphandle, // file mapping handle
- FILE_MAP_READ, // access
- 0,
- 0, // file offset, high and low
- 0, // bytes to map, 0 == all
- thisAddress); // address to place file
-
- if (privateMapAddress == 0) {
- DWORD dosError = GetLastError();
-
- ++current_retry;
-
- // If we failed to allocate a memory mapped file, try again in case we picked
- // an address that Windows is also trying to use for some other VM allocations
- if (dosError == ERROR_INVALID_ADDRESS && current_retry < 5) {
- continue;
- }
-
- severe() << "MapViewOfFileEx for " << filename() << " failed with error "
- << errnoWithDescription(dosError) << " (file size is " << len << ")"
- << " in MemoryMappedFile::createPrivateMap" << endl;
-
- fassertFailed(16167);
- }
-
- break;
- }
-
- views.push_back(privateMapAddress);
- return privateMapAddress;
-}
-
-void* MemoryMappedFile::remapPrivateView(OperationContext* opCtx, void* oldPrivateAddr) {
- LockMongoFilesExclusive lockMongoFiles(opCtx);
-
- privateViews.clearWritableBits(oldPrivateAddr, len);
-
- stdx::lock_guard<stdx::mutex> lk(mapViewMutex);
-
- if (!UnmapViewOfFile(oldPrivateAddr)) {
- DWORD dosError = GetLastError();
- severe() << "UnMapViewOfFile for " << filename() << " failed with error "
- << errnoWithDescription(dosError) << " in MemoryMappedFile::remapPrivateView"
- << endl;
- fassertFailed(16168);
- }
-
- void* newPrivateView =
- MapViewOfFileEx(maphandle, // file mapping handle
- FILE_MAP_READ, // access
- 0,
- 0, // file offset, high and low
- 0, // bytes to map, 0 == all
- oldPrivateAddr); // we want the same address we had before
- if (0 == newPrivateView) {
- DWORD dosError = GetLastError();
- severe() << "MapViewOfFileEx for " << filename() << " failed with error "
- << errnoWithDescription(dosError) << " (file size is " << len << ")"
- << " in MemoryMappedFile::remapPrivateView" << endl;
- }
- fassert(16148, newPrivateView == oldPrivateAddr);
- return newPrivateView;
-}
-
-class WindowsFlushable : public MemoryMappedFile::Flushable {
-public:
- WindowsFlushable(MemoryMappedFile* theFile,
- void* view,
- HANDLE fd,
- const uint64_t id,
- const std::string& filename,
- stdx::mutex& flushMutex)
- : _theFile(theFile),
- _view(view),
- _fd(fd),
- _id(id),
- _filename(filename),
- _flushMutex(flushMutex) {}
-
- void flush(OperationContext* opCtx) {
- if (!_view || !_fd)
- return;
-
- {
- LockMongoFilesShared mmfilesLock(opCtx);
-
- std::set<MongoFile*> mmfs = MongoFile::getAllFiles();
- std::set<MongoFile*>::const_iterator it = mmfs.find(_theFile);
- if (it == mmfs.end() || (*it)->getUniqueId() != _id) {
- // this was deleted while we were unlocked
- return;
- }
-
- // Hold the flush mutex to ensure the file is not closed during flush
- _flushMutex.lock();
- }
-
- stdx::lock_guard<stdx::mutex> lk(_flushMutex, stdx::adopt_lock);
-
- int loopCount = 0;
- bool success = false;
- bool timeout = false;
- int dosError = ERROR_SUCCESS;
- const int maximumTimeInSeconds = 60 * 15;
- Timer t;
- while (!success && !timeout) {
- ++loopCount;
- success = FALSE != FlushViewOfFile(_view, 0);
- if (!success) {
- dosError = GetLastError();
- if (dosError != ERROR_LOCK_VIOLATION) {
- break;
- }
- timeout = t.seconds() > maximumTimeInSeconds;
- }
- }
- if (success && loopCount > 1) {
- log() << "FlushViewOfFile for " << _filename << " succeeded after " << loopCount
- << " attempts taking " << t.millis() << "ms" << endl;
- } else if (!success) {
- log() << "FlushViewOfFile for " << _filename << " failed with error " << dosError
- << " after " << loopCount << " attempts taking " << t.millis() << "ms" << endl;
- // Abort here to avoid data corruption
- fassert(16387, false);
- }
-
- success = FALSE != FlushFileBuffers(_fd);
- if (!success) {
- int err = GetLastError();
- log() << "FlushFileBuffers failed: " << errnoWithDescription(err)
- << " file: " << _filename << endl;
- dataSyncFailedHandler();
- }
- }
-
- MemoryMappedFile* _theFile; // this may be deleted while we are running
- void* _view;
- HANDLE _fd;
- const uint64_t _id;
- string _filename;
- stdx::mutex& _flushMutex;
-};
-
-void MemoryMappedFile::flush(bool sync) {
- invariant(!(isOptionSet(Options::READONLY)));
- uassert(13056, "Async flushing not supported on windows", sync);
- if (!views.empty()) {
- WindowsFlushable f(this, viewForFlushing(), fd, _uniqueId, filename(), _flushMutex);
- auto opCtx = cc().getOperationContext();
- invariant(opCtx);
- f.flush(opCtx);
- }
-}
-
-MemoryMappedFile::Flushable* MemoryMappedFile::prepareFlush() {
- return new WindowsFlushable(this, viewForFlushing(), fd, _uniqueId, filename(), _flushMutex);
-}
-} // namespace mongo
diff --git a/src/mongo/db/storage/mmap_v1/paths.cpp b/src/mongo/db/storage/mmap_v1/paths.cpp
deleted file mode 100644
index 8e17c6cf716..00000000000
--- a/src/mongo/db/storage/mmap_v1/paths.cpp
+++ /dev/null
@@ -1,113 +0,0 @@
-/* Copyright 2010 10gen Inc.
- *
- * This program is free software: you can redistribute it and/or modify
- * it under the terms of the GNU Affero General Public License, version 3,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU Affero General Public License for more details.
- *
- * You should have received a copy of the GNU Affero General Public License
- * along with this program. If not, see <http://www.gnu.org/licenses/>.
- *
- * As a special exception, the copyright holders give permission to link the
- * code of portions of this program with the OpenSSL library under certain
- * conditions as described in each individual source file and distribute
- * linked combinations including the program with the OpenSSL library. You
- * must comply with the GNU Affero General Public License in all respects
- * for all of the code used other than as permitted herein. If you modify
- * file(s) with this exception, you may extend this exception to your
- * version of the file(s), but you are not obligated to do so. If you do not
- * wish to do so, delete this exception statement from your version. If you
- * delete this exception statement from all source files in the program,
- * then also delete it in the license file.
- */
-
-#define MONGO_LOG_DEFAULT_COMPONENT ::mongo::logger::LogComponent::kStorage
-
-#include "mongo/platform/basic.h"
-
-#include "mongo/db/storage/mmap_v1/paths.h"
-
-#include "mongo/util/log.h"
-
-namespace mongo {
-
-/** from a full path */
-RelativePath RelativePath::fromFullPath(boost::filesystem::path dbp, boost::filesystem::path f) {
- // filesystem::path normalizes / and backslash
- std::string fullpath = f.string();
- std::string relative = str::after(fullpath, dbp.string());
- if (relative.empty()) {
- log() << "warning file is not under db path? " << fullpath << ' ' << dbp.string();
- RelativePath rp;
- rp._p = fullpath;
- return rp;
- }
- if (str::startsWith(relative, "/") || str::startsWith(relative, "\\")) {
- relative.erase(0, 1);
- }
- RelativePath rp;
- rp._p = relative;
- return rp;
-}
-
-dev_t getPartition(const std::string& path) {
- struct stat stats;
-
- if (stat(path.c_str(), &stats) != 0) {
- uasserted(13646,
- str::stream() << "stat() failed for file: " << path << " "
- << errnoWithDescription());
- }
-
- return stats.st_dev;
-}
-
-void flushMyDirectory(const boost::filesystem::path& file) {
-#ifdef __linux__ // this isn't needed elsewhere
- static bool _warnedAboutFilesystem = false;
- // if called without a fully qualified path it asserts; that makes mongoperf fail.
- // so make a warning. need a better solution longer term.
- // massert(13652, str::stream() << "Couldn't find parent dir for file: " << file.string(),);
- if (!file.has_branch_path()) {
- log() << "warning flushMyDirectory couldn't find parent dir for file: " << file.string();
- return;
- }
-
-
- boost::filesystem::path dir = file.branch_path(); // parent_path in new boosts
-
- LOG(1) << "flushing directory " << dir.string();
-
- int fd = ::open(dir.string().c_str(), O_RDONLY); // DO NOT THROW OR ASSERT BEFORE CLOSING
- massert(13650,
- str::stream() << "Couldn't open directory '" << dir.string() << "' for flushing: "
- << errnoWithDescription(),
- fd >= 0);
- if (fsync(fd) != 0) {
- int e = errno;
- if (e == EINVAL) { // indicates filesystem does not support synchronization
- if (!_warnedAboutFilesystem) {
- log() << "\tWARNING: This file system is not supported. For further information"
- << " see:" << startupWarningsLog;
- log() << "\t\t\thttp://dochub.mongodb.org/core/unsupported-filesystems"
- << startupWarningsLog;
- log() << "\t\tPlease notify MongoDB, Inc. if an unlisted filesystem generated "
- << "this warning." << startupWarningsLog;
- _warnedAboutFilesystem = true;
- }
- } else {
- close(fd);
- massert(13651,
- str::stream() << "Couldn't fsync directory '" << dir.string() << "': "
- << errnoWithDescription(e),
- false);
- }
- }
- close(fd);
-#endif
-}
-}
diff --git a/src/mongo/db/storage/mmap_v1/paths.h b/src/mongo/db/storage/mmap_v1/paths.h
deleted file mode 100644
index 384b6459419..00000000000
--- a/src/mongo/db/storage/mmap_v1/paths.h
+++ /dev/null
@@ -1,100 +0,0 @@
-// @file paths.h
-// file paths and directory handling
-
-/* Copyright 2010 10gen Inc.
- *
- * This program is free software: you can redistribute it and/or modify
- * it under the terms of the GNU Affero General Public License, version 3,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU Affero General Public License for more details.
- *
- * You should have received a copy of the GNU Affero General Public License
- * along with this program. If not, see <http://www.gnu.org/licenses/>.
- *
- * As a special exception, the copyright holders give permission to link the
- * code of portions of this program with the OpenSSL library under certain
- * conditions as described in each individual source file and distribute
- * linked combinations including the program with the OpenSSL library. You
- * must comply with the GNU Affero General Public License in all respects
- * for all of the code used other than as permitted herein. If you modify
- * file(s) with this exception, you may extend this exception to your
- * version of the file(s), but you are not obligated to do so. If you do not
- * wish to do so, delete this exception statement from your version. If you
- * delete this exception statement from all source files in the program,
- * then also delete it in the license file.
- */
-
-#pragma once
-
-#include <boost/filesystem/path.hpp>
-#include <fcntl.h>
-#include <sys/stat.h>
-#include <sys/types.h>
-
-#include "mongo/util/mongoutils/str.h"
-
-#include "mongo/db/storage/storage_options.h"
-
-namespace mongo {
-
-using namespace mongoutils;
-
-/** this is very much like a boost::path. however, we define a new type to get some type
- checking. if you want to say 'my param MUST be a relative path", use this.
-*/
-struct RelativePath {
- std::string _p;
-
- bool empty() const {
- return _p.empty();
- }
-
- static RelativePath fromRelativePath(const std::string& f) {
- RelativePath rp;
- rp._p = f;
- return rp;
- }
-
- /**
- * Returns path relative to 'dbpath' from a full path 'f'.
- */
- static RelativePath fromFullPath(boost::filesystem::path dbpath, boost::filesystem::path f);
-
- std::string toString() const {
- return _p;
- }
-
- bool operator!=(const RelativePath& r) const {
- return _p != r._p;
- }
- bool operator==(const RelativePath& r) const {
- return _p == r._p;
- }
- bool operator<(const RelativePath& r) const {
- return _p < r._p;
- }
-
- std::string asFullPath() const {
- boost::filesystem::path x(storageGlobalParams.dbpath);
- x /= _p;
- return x.string();
- }
-};
-
-dev_t getPartition(const std::string& path);
-
-inline bool onSamePartition(const std::string& path1, const std::string& path2) {
- dev_t dev1 = getPartition(path1);
- dev_t dev2 = getPartition(path2);
-
- return dev1 == dev2;
-}
-
-void flushMyDirectory(const boost::filesystem::path& file);
-
-boost::filesystem::path ensureParentDirCreated(const boost::filesystem::path& p);
-}
diff --git a/src/mongo/db/storage/mmap_v1/record.h b/src/mongo/db/storage/mmap_v1/record.h
deleted file mode 100644
index 401808742a9..00000000000
--- a/src/mongo/db/storage/mmap_v1/record.h
+++ /dev/null
@@ -1,181 +0,0 @@
-// database.h
-
-/**
-* Copyright (C) 2008 10gen Inc.
-*
-* This program is free software: you can redistribute it and/or modify
-* it under the terms of the GNU Affero General Public License, version 3,
-* as published by the Free Software Foundation.
-*
-* This program is distributed in the hope that it will be useful,
-* but WITHOUT ANY WARRANTY; without even the implied warranty of
-* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-* GNU Affero General Public License for more details.
-*
-* You should have received a copy of the GNU Affero General Public License
-* along with this program. If not, see <http://www.gnu.org/licenses/>.
-*
-* As a special exception, the copyright holders give permission to link the
-* code of portions of this program with the OpenSSL library under certain
-* conditions as described in each individual source file and distribute
-* linked combinations including the program with the OpenSSL library. You
-* must comply with the GNU Affero General Public License in all respects for
-* all of the code used other than as permitted herein. If you modify file(s)
-* with this exception, you may extend this exception to your version of the
-* file(s), but you are not obligated to do so. If you do not wish to do so,
-* delete this exception statement from your version. If you delete this
-* exception statement from all source files in the program, then also delete
-* it in the license file.
-*/
-
-#pragma once
-
-#include "mongo/base/static_assert.h"
-#include "mongo/bson/bsonobjbuilder.h"
-#include "mongo/db/storage/mmap_v1/diskloc.h"
-#include "mongo/db/storage/record_data.h"
-#include "mongo/platform/atomic_word.h"
-
-namespace mongo {
-
-class DeletedRecord;
-
-/* MmapV1RecordHeader is a record in a datafile. DeletedRecord is similar but for deleted space.
-
-*11:03:20 AM) dm10gen: regarding extentOfs...
-(11:03:42 AM) dm10gen: an extent is a continugous disk area, which contains many Records and
- DeleteRecords
-(11:03:56 AM) dm10gen: a DiskLoc has two pieces, the fileno and ofs. (64 bit total)
-(11:04:16 AM) dm10gen: to keep the headesr small, instead of storing a 64 bit ptr to the full extent
- address, we keep just the offset
-(11:04:29 AM) dm10gen: we can do this as we know the record's address, and it has the same fileNo
-(11:04:33 AM) dm10gen: see class DiskLoc for more info
-(11:04:43 AM) dm10gen: so that is how MmapV1RecordHeader::myExtent() works
-(11:04:53 AM) dm10gen: on an alloc(), when we build a new MmapV1RecordHeader, we must populate its
- extentOfs then
-*/
-#pragma pack(1)
-class MmapV1RecordHeader {
-public:
- enum HeaderSizeValue { HeaderSize = 16 };
-
- int lengthWithHeaders() const {
- return _lengthWithHeaders;
- }
- int& lengthWithHeaders() {
- return _lengthWithHeaders;
- }
-
- int extentOfs() const {
- return _extentOfs;
- }
- int& extentOfs() {
- return _extentOfs;
- }
-
- int nextOfs() const {
- return _nextOfs;
- }
- int& nextOfs() {
- return _nextOfs;
- }
-
- int prevOfs() const {
- return _prevOfs;
- }
- int& prevOfs() {
- return _prevOfs;
- }
-
- const char* data() const {
- return _data;
- }
- char* data() {
- return _data;
- }
-
- // XXX remove
- const char* dataNoThrowing() const {
- return _data;
- }
- char* dataNoThrowing() {
- return _data;
- }
-
- int netLength() const {
- return _netLength();
- }
-
- /* use this when a record is deleted. basically a union with next/prev fields */
- DeletedRecord& asDeleted() {
- return *((DeletedRecord*)this);
- }
-
- DiskLoc myExtentLoc(const DiskLoc& myLoc) const {
- return DiskLoc(myLoc.a(), extentOfs());
- }
-
- struct NP {
- int nextOfs;
- int prevOfs;
- };
-
- NP* np() {
- return (NP*)&_nextOfs;
- }
-
- RecordData toRecordData() const {
- return RecordData(_data, _netLength());
- }
-
-private:
- int _netLength() const {
- return _lengthWithHeaders - HeaderSize;
- }
-
- int _lengthWithHeaders;
- int _extentOfs;
- int _nextOfs;
- int _prevOfs;
-
- /** be careful when referencing this that your write intent was correct */
- char _data[4];
-
-public:
- static bool MemoryTrackingEnabled;
-};
-#pragma pack()
-
-// TODO: this probably moves to record_store.h
-class DeletedRecord {
-public:
- int lengthWithHeaders() const {
- return _lengthWithHeaders;
- }
- int& lengthWithHeaders() {
- return _lengthWithHeaders;
- }
-
- int extentOfs() const {
- return _extentOfs;
- }
- int& extentOfs() {
- return _extentOfs;
- }
-
- // TODO: we need to not const_cast here but problem is DiskLoc::writing
- DiskLoc& nextDeleted() const {
- return const_cast<DiskLoc&>(_nextDeleted);
- }
-
-private:
- int _lengthWithHeaders;
-
- int _extentOfs;
-
- DiskLoc _nextDeleted;
-};
-
-MONGO_STATIC_ASSERT(16 == sizeof(DeletedRecord));
-
-} // namespace mongo
diff --git a/src/mongo/db/storage/mmap_v1/record_access_tracker.cpp b/src/mongo/db/storage/mmap_v1/record_access_tracker.cpp
deleted file mode 100644
index 1d55d272efc..00000000000
--- a/src/mongo/db/storage/mmap_v1/record_access_tracker.cpp
+++ /dev/null
@@ -1,338 +0,0 @@
-/**
- * Copyright (C) 2014 MongoDB Inc.
- *
- * This program is free software: you can redistribute it and/or modify
- * it under the terms of the GNU Affero General Public License, version 3,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU Affero General Public License for more details.
- *
- * You should have received a copy of the GNU Affero General Public License
- * along with this program. If not, see <http://www.gnu.org/licenses/>.
- *
- * As a special exception, the copyright holders give permission to link the
- * code of portions of this program with the OpenSSL library under certain
- * conditions as described in each individual source file and distribute
- * linked combinations including the program with the OpenSSL library. You
- * must comply with the GNU Affero General Public License in all respects for
- * all of the code used other than as permitted herein. If you modify file(s)
- * with this exception, you may extend this exception to your version of the
- * file(s), but you are not obligated to do so. If you do not wish to do so,
- * delete this exception statement from your version. If you delete this
- * exception statement from all source files in the program, then also delete
- * it in the license file.
- */
-
-#include "mongo/platform/basic.h"
-
-#include "mongo/db/storage/mmap_v1/record_access_tracker.h"
-
-#include <cstring>
-
-#include "mongo/base/init.h"
-#include "mongo/config.h"
-#include "mongo/db/storage/mmap_v1/record.h"
-#include "mongo/platform/bits.h"
-#include "mongo/stdx/memory.h"
-#include "mongo/util/clock_source.h"
-#include "mongo/util/debug_util.h"
-#include "mongo/util/processinfo.h"
-
-namespace mongo {
-
-namespace {
-
-static bool blockSupported = false;
-
-MONGO_INITIALIZER(RecordBlockSupported)(InitializerContext* cx) {
- blockSupported = ProcessInfo::blockCheckSupported();
- return Status::OK();
-}
-
-int hash(size_t region) {
- return abs(((7 + (int)(region & 0xFFFF)) * (11 + (int)((region >> 16) & 0xFFFF))
-#if defined(_WIN64) || defined(__amd64__)
- *
- (13 + (int)((region >> 32) & 0xFFFF)) * (17 + (int)((region >> 48) & 0xFFFF))
-#endif
- ) %
- RecordAccessTracker::SliceSize);
-}
-
-int bigHash(size_t region) {
- return hash(region) % RecordAccessTracker::BigHashSize;
-}
-
-namespace PointerTable {
-
-/* A "superpage" is a group of 16 contiguous pages that differ
- * only in the low-order 16 bits. This means that there is
- * enough room in the low-order bits to store a bitmap for each
- * page in the superpage.
- */
-static const size_t superpageMask = ~0xffffLL;
-static const size_t superpageShift = 16;
-static const size_t pageSelectorMask = 0xf000LL; // selects a page in a superpage
-static const int pageSelectorShift = 12;
-
-// Tunables
-static const int capacity = 128; // in superpages
-static const int bucketSize = 4; // half cache line
-static const int buckets = capacity / bucketSize;
-
-struct Data {
- /** organized similar to a CPU cache
- * bucketSize-way set associative
- * least-recently-inserted replacement policy
- */
- size_t _table[buckets][bucketSize];
- long long _lastReset; // time in millis
-};
-
-void reset(Data* data, ClockSource* cs) {
- memset(data->_table, 0, sizeof(data->_table));
- data->_lastReset = cs->now().toMillisSinceEpoch();
-}
-
-inline void resetIfNeeded(Data* data, ClockSource* cs) {
- const long long sinceReset = cs->now().toMillisSinceEpoch() - data->_lastReset;
- if (MONGO_unlikely(sinceReset > RecordAccessTracker::RotateTimeSecs * 1000)) {
- reset(data, cs);
- }
-}
-
-inline size_t pageBitOf(size_t ptr) {
- return 1LL << ((ptr & pageSelectorMask) >> pageSelectorShift);
-}
-
-inline size_t superpageOf(size_t ptr) {
- return ptr & superpageMask;
-}
-
-inline size_t bucketFor(size_t ptr) {
- return (ptr >> superpageShift) % buckets;
-}
-
-inline bool haveSeenPage(size_t superpage, size_t ptr) {
- return superpage & pageBitOf(ptr);
-}
-
-inline void markPageSeen(size_t& superpage, size_t ptr) {
- superpage |= pageBitOf(ptr);
-}
-
-/** call this to check a page has been seen yet. */
-inline bool seen(Data* data, size_t ptr, ClockSource* cs) {
- resetIfNeeded(data, cs);
-
- // A bucket contains 4 superpages each containing 16 contiguous pages
- // See above for a more detailed explanation of superpages
- size_t* bucket = data->_table[bucketFor(ptr)];
-
- for (int i = 0; i < bucketSize; i++) {
- if (superpageOf(ptr) == superpageOf(bucket[i])) {
- if (haveSeenPage(bucket[i], ptr))
- return true;
-
- markPageSeen(bucket[i], ptr);
- return false;
- }
- }
-
- // superpage isn't in thread-local cache
- // slide bucket forward and add new superpage at front
- for (int i = bucketSize - 1; i > 0; i--)
- bucket[i] = bucket[i - 1];
-
- bucket[0] = superpageOf(ptr);
- markPageSeen(bucket[0], ptr);
-
- return false;
-}
-
-Data* getData();
-
-}; // namespace PointerTable
-
-} // namespace
-
-//
-// Slice
-//
-
-RecordAccessTracker::Slice::Slice() {
- reset();
-}
-
-void RecordAccessTracker::Slice::reset() {
- memset(_data, 0, sizeof(_data));
-}
-
-RecordAccessTracker::State RecordAccessTracker::Slice::get(int regionHash,
- size_t region,
- short offset) {
- DEV verify(hash(region) == regionHash);
-
- Entry* e = _get(regionHash, region, false);
- if (!e)
- return Unk;
-
- return (e->value & (1ULL << offset)) ? In : Out;
-}
-
-bool RecordAccessTracker::Slice::put(int regionHash, size_t region, short offset) {
- DEV verify(hash(region) == regionHash);
-
- Entry* e = _get(regionHash, region, true);
- if (!e)
- return false;
-
- e->value |= 1ULL << offset;
- return true;
-}
-
-RecordAccessTracker::Entry* RecordAccessTracker::Slice::_get(int start, size_t region, bool add) {
- for (int i = 0; i < MaxChain; i++) {
- int bucket = (start + i) % SliceSize;
-
- if (_data[bucket].region == 0) {
- if (!add)
- return NULL;
-
- _data[bucket].region = region;
- return &_data[bucket];
- }
-
- if (_data[bucket].region == region) {
- return &_data[bucket];
- }
- }
-
- return NULL;
-}
-
-//
-// Rolling
-//
-
-bool RecordAccessTracker::Rolling::access(size_t region,
- short offset,
- bool doHalf,
- ClockSource* cs) {
- int regionHash = hash(region);
-
- stdx::lock_guard<SimpleMutex> lk(_lock);
-
- static int rarelyCount = 0;
- if (rarelyCount++ % (2048 / BigHashSize) == 0) {
- Date_t now = cs->now();
-
- if (now - _lastRotate > Seconds(static_cast<int64_t>(RotateTimeSecs))) {
- _rotate(cs);
- }
- }
-
- for (int i = 0; i < NumSlices / (doHalf ? 2 : 1); i++) {
- int pos = (_curSlice + i) % NumSlices;
- State s = _slices[pos].get(regionHash, region, offset);
-
- if (s == In)
- return true;
-
- if (s == Out) {
- _slices[pos].put(regionHash, region, offset);
- return false;
- }
- }
-
- // we weren't in any slice
- // so add to cur
- if (!_slices[_curSlice].put(regionHash, region, offset)) {
- _rotate(cs);
- _slices[_curSlice].put(regionHash, region, offset);
- }
- return false;
-}
-
-void RecordAccessTracker::Rolling::updateLastRotate(ClockSource* cs) {
- _lastRotate = cs->now();
-}
-
-void RecordAccessTracker::Rolling::_rotate(ClockSource* cs) {
- _curSlice = (_curSlice + 1) % NumSlices;
- _slices[_curSlice].reset();
- updateLastRotate(cs);
-}
-
-PointerTable::Data* PointerTable::getData() {
- thread_local std::unique_ptr<PointerTable::Data> data;
- if (!data)
- data = stdx::make_unique<PointerTable::Data>();
- return data.get();
-}
-
-//
-// RecordAccessTracker
-//
-
-RecordAccessTracker::RecordAccessTracker(ClockSource* cs)
- : _blockSupported(blockSupported), _clock(cs) {
- reset();
-}
-
-void RecordAccessTracker::reset() {
- PointerTable::reset(PointerTable::getData(), _clock);
- _rollingTable.reset(new Rolling[BigHashSize]);
- for (int i = 0; i < BigHashSize; i++) {
- _rollingTable[i].updateLastRotate(_clock);
- }
-}
-
-void RecordAccessTracker::markAccessed(const void* record) {
- const size_t page = reinterpret_cast<size_t>(record) >> 12;
- const size_t region = page >> 6;
- const size_t offset = page & 0x3f;
-
- const bool seen =
- PointerTable::seen(PointerTable::getData(), reinterpret_cast<size_t>(record), _clock);
- if (!seen) {
- _rollingTable[bigHash(region)].access(region, offset, true, _clock);
- }
-}
-
-
-bool RecordAccessTracker::checkAccessedAndMark(const void* record) {
- const size_t page = reinterpret_cast<size_t>(record) >> 12;
- const size_t region = page >> 6;
- const size_t offset = page & 0x3f;
-
- // This is like the "L1 cache". If we're a miss then we fall through and check the
- // "L2 cache". If we're still a miss, then we defer to a system-specific system
- // call (or give up and return false if deferring to the system call is not enabled).
- if (PointerTable::seen(PointerTable::getData(), reinterpret_cast<size_t>(record), _clock)) {
- return true;
- }
-
- // We were a miss in the PointerTable. See if we can find 'record' in the Rolling table.
- if (_rollingTable[bigHash(region)].access(region, offset, false, _clock)) {
- return true;
- }
-
- if (!_blockSupported) {
- // This means we don't fall back to a system call. Instead we assume things aren't
- // in memory. This could mean that we yield too much, but this is much better
- // than the alternative of not yielding through a page fault.
- return false;
- }
-
- return ProcessInfo::blockInMemory(const_cast<void*>(record));
-}
-
-void RecordAccessTracker::disableSystemBlockInMemCheck() {
- _blockSupported = false;
-}
-
-} // namespace mongo
diff --git a/src/mongo/db/storage/mmap_v1/record_access_tracker.h b/src/mongo/db/storage/mmap_v1/record_access_tracker.h
deleted file mode 100644
index c4ec579c720..00000000000
--- a/src/mongo/db/storage/mmap_v1/record_access_tracker.h
+++ /dev/null
@@ -1,165 +0,0 @@
-/**
- * Copyright (C) 2014 MongoDB Inc.
- *
- * This program is free software: you can redistribute it and/or modify
- * it under the terms of the GNU Affero General Public License, version 3,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU Affero General Public License for more details.
- *
- * You should have received a copy of the GNU Affero General Public License
- * along with this program. If not, see <http://www.gnu.org/licenses/>.
- *
- * As a special exception, the copyright holders give permission to link the
- * code of portions of this program with the OpenSSL library under certain
- * conditions as described in each individual source file and distribute
- * linked combinations including the program with the OpenSSL library. You
- * must comply with the GNU Affero General Public License in all respects for
- * all of the code used other than as permitted herein. If you modify file(s)
- * with this exception, you may extend this exception to your version of the
- * file(s), but you are not obligated to do so. If you do not wish to do so,
- * delete this exception statement from your version. If you delete this
- * exception statement from all source files in the program, then also delete
- * it in the license file.
- */
-
-#pragma once
-
-#include <memory>
-
-#include "mongo/util/concurrency/mutex.h"
-#include "mongo/util/time_support.h"
-
-namespace mongo {
-
-class ClockSource;
-class Date_t;
-class MmapV1RecordHeader;
-
-/**
- * Used to implement likelyInPhysicalMemory() for the MMAP v1 storage engine. Since
- * MMAP v1 holds exclusive collection-level locks, it should yield the locks during a
- * page fault. The RecordAccessTracker is used to guess at which records are in memory,
- * so that a yield can be requested unless we're sure that the record has been
- * recently accessed.
- */
-class RecordAccessTracker {
- MONGO_DISALLOW_COPYING(RecordAccessTracker);
-
-public:
- RecordAccessTracker(ClockSource* cs);
-
- enum Constants {
- SliceSize = 1024,
- MaxChain = 20, // intentionally very low
- NumSlices = 10,
- RotateTimeSecs = 90,
- BigHashSize = 128
- };
-
- /**
- * Informs this record access tracker that 'record' has been accessed.
- */
- void markAccessed(const void* record);
-
- /**
- * @return whether or not 'record' has been marked as accessed recently. A return value
- * of true means that 'record' is likely in physical memory.
- *
- * Also has the side effect of marking 'record' as accessed.
- */
- bool checkAccessedAndMark(const void* record);
-
- /**
- * Clears out any history of record accesses.
- */
- void reset();
-
- //
- // For testing.
- //
-
- /**
- * The accessedRecently() implementation falls back to making a system call if it
- * appears that the record is not in physical memory. Use this method to disable
- * the fallback for testing.
- */
- void disableSystemBlockInMemCheck();
-
-private:
- enum State { In, Out, Unk };
-
- struct Entry {
- size_t region;
- unsigned long long value;
- };
-
- /**
- * simple hash map for region -> status
- * this constitutes a single region of time
- * it does chaining, but very short chains
- */
- class Slice {
- public:
- Slice();
-
- void reset();
-
- State get(int regionHash, size_t region, short offset);
-
- /**
- * @return true if added, false if full
- */
- bool put(int regionHash, size_t region, short offset);
-
- private:
- Entry* _get(int start, size_t region, bool add);
-
- Entry _data[SliceSize];
- };
-
- /**
- * this contains many slices of times
- * the idea you put mem status in the current time slice
- * and then after a certain period of time, it rolls off so we check again
- */
- class Rolling {
- public:
- Rolling() = default;
-
- /**
- * After this call, we assume the page is in RAM.
- *
- * @param doHalf if this is a known good access, want to put in first half.
- *
- * @return whether we know the page is in RAM
- */
- bool access(size_t region, short offset, bool doHalf, ClockSource* cs);
-
- /**
- * Updates _lastRotate to the current time.
- */
- void updateLastRotate(ClockSource* cs);
-
- private:
- void _rotate(ClockSource* cs);
-
- int _curSlice = 0;
- Date_t _lastRotate;
- Slice _slices[NumSlices];
-
- SimpleMutex _lock;
- };
-
- // Should this record tracker fallback to making a system call?
- bool _blockSupported;
- ClockSource* _clock;
-
- // An array of Rolling instances for tracking record accesses.
- std::unique_ptr<Rolling[]> _rollingTable;
-};
-
-} // namespace
diff --git a/src/mongo/db/storage/mmap_v1/record_access_tracker_test.cpp b/src/mongo/db/storage/mmap_v1/record_access_tracker_test.cpp
deleted file mode 100644
index 7b5c13a1029..00000000000
--- a/src/mongo/db/storage/mmap_v1/record_access_tracker_test.cpp
+++ /dev/null
@@ -1,150 +0,0 @@
-/**
- * Copyright (C) 2014 MongoDB Inc.
- *
- * This program is free software: you can redistribute it and/or modify
- * it under the terms of the GNU Affero General Public License, version 3,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU Affero General Public License for more details.
- *
- * You should have received a copy of the GNU Affero General Public License
- * along with this program. If not, see <http://www.gnu.org/licenses/>.
- *
- * As a special exception, the copyright holders give permission to link the
- * code of portions of this program with the OpenSSL library under certain
- * conditions as described in each individual source file and distribute
- * linked combinations including the program with the OpenSSL library. You
- * must comply with the GNU Affero General Public License in all respects for
- * all of the code used other than as permitted herein. If you modify file(s)
- * with this exception, you may extend this exception to your version of the
- * file(s), but you are not obligated to do so. If you do not wish to do so,
- * delete this exception statement from your version. If you delete this
- * exception statement from all source files in the program, then also delete
- * it in the license file.
- */
-
-#include "mongo/db/storage/mmap_v1/record_access_tracker.h"
-
-#include "mongo/db/storage/mmap_v1/record.h"
-#include "mongo/stdx/memory.h"
-#include "mongo/unittest/unittest.h"
-#include "mongo/util/clock_source_mock.h"
-
-using namespace mongo;
-
-namespace {
-
-const std::unique_ptr<ClockSource> clock = stdx::make_unique<ClockSourceMock>();
-
-const void* pointerOf(int data) {
-#pragma warning(push)
-// C4312: 'reinterpret_cast': conversion from 'int' to 'const void *' of greater size
-#pragma warning(disable : 4312)
- return reinterpret_cast<const void*>(data);
-#pragma warning(pop)
-}
-
-TEST(RecordAccessTrackerTest, TouchRecordTwice) {
- RecordAccessTracker tracker(clock.get());
- tracker.disableSystemBlockInMemCheck();
-
- const void* record = pointerOf(0x10003);
-
- ASSERT_FALSE(tracker.checkAccessedAndMark(record));
- ASSERT_TRUE(tracker.checkAccessedAndMark(record));
-}
-
-TEST(RecordAccessTrackerTest, TouchPageTwice) {
- RecordAccessTracker tracker(clock.get());
- tracker.disableSystemBlockInMemCheck();
-
- const void* firstRecord = pointerOf(0x10003);
- const void* secondRecord = pointerOf(0x10004);
-
- ASSERT_FALSE(tracker.checkAccessedAndMark(firstRecord));
- ASSERT_TRUE(tracker.checkAccessedAndMark(secondRecord));
- ASSERT_TRUE(tracker.checkAccessedAndMark(firstRecord));
- ASSERT_TRUE(tracker.checkAccessedAndMark(secondRecord));
-}
-
-TEST(RecordAccessTrackerTest, TouchTwoPagesTwice) {
- RecordAccessTracker tracker(clock.get());
- tracker.disableSystemBlockInMemCheck();
-
- const void* firstRecordFirstPage = pointerOf(0x11000);
- const void* secondRecordFirstPage = pointerOf(0x11100);
-
- const void* firstRecordSecondPage = pointerOf(0x12000);
- const void* secondRecordSecondPage = pointerOf(0x12100);
-
- ASSERT_FALSE(tracker.checkAccessedAndMark(firstRecordFirstPage));
- ASSERT_FALSE(tracker.checkAccessedAndMark(firstRecordSecondPage));
- ASSERT_TRUE(tracker.checkAccessedAndMark(secondRecordFirstPage));
- ASSERT_TRUE(tracker.checkAccessedAndMark(secondRecordSecondPage));
-}
-
-// Tests RecordAccessTracker::reset().
-TEST(RecordAccessTrackerTest, TouchTwoPagesTwiceWithReset) {
- RecordAccessTracker tracker(clock.get());
- tracker.disableSystemBlockInMemCheck();
-
- const void* firstRecordFirstPage = pointerOf(0x11000);
- const void* secondRecordFirstPage = pointerOf(0x11100);
-
- const void* firstRecordSecondPage = pointerOf(0x12000);
- const void* secondRecordSecondPage = pointerOf(0x12100);
-
- ASSERT_FALSE(tracker.checkAccessedAndMark(firstRecordFirstPage));
- ASSERT_FALSE(tracker.checkAccessedAndMark(firstRecordSecondPage));
- ASSERT_TRUE(tracker.checkAccessedAndMark(secondRecordFirstPage));
- ASSERT_TRUE(tracker.checkAccessedAndMark(secondRecordSecondPage));
-
- // Now reset and make sure things look as though we have a fresh RecordAccessTracker.
- tracker.reset();
- ASSERT_FALSE(tracker.checkAccessedAndMark(firstRecordFirstPage));
- ASSERT_FALSE(tracker.checkAccessedAndMark(firstRecordSecondPage));
- ASSERT_TRUE(tracker.checkAccessedAndMark(secondRecordFirstPage));
- ASSERT_TRUE(tracker.checkAccessedAndMark(secondRecordSecondPage));
-}
-
-// Tests RecordAccessTracker::markAccessed().
-TEST(RecordAccessTrackerTest, AccessTest) {
- RecordAccessTracker tracker(clock.get());
- tracker.disableSystemBlockInMemCheck();
-
- // Mark the first page in superpage 3 as accessed.
- const void* record = pointerOf(0x30000);
- tracker.markAccessed(record);
-
- // Test that all remaining addresses in the page give true when asked whether they are
- // recently accessed.
- for (int i = 0x30001; i < 0x31000; i++) {
- const void* touchedPageRecord = pointerOf(i);
- ASSERT_TRUE(tracker.checkAccessedAndMark(touchedPageRecord));
- }
-}
-
-// Touch pages in 128 separate superpages, and make sure that they all are reported as
-// recently accessed.
-TEST(RecordAccessTrackerTest, Access128Superpages) {
- RecordAccessTracker tracker(clock.get());
- tracker.disableSystemBlockInMemCheck();
-
- // Touch the pages.
- for (int i = 0x00000; i < 0x800000; i += 0x10000) {
- const void* touchedPageRecord = pointerOf(i);
- tracker.markAccessed(touchedPageRecord);
- }
-
- // Ensure we know that the pages have all been touched.
- for (int i = 0x00000; i < 0x800000; i += 0x10000) {
- // It should be fine if there is an offset of, say, 0xA, into the page.
- const void* touchedPageRecord = pointerOf(i + 0xA);
- ASSERT_TRUE(tracker.checkAccessedAndMark(touchedPageRecord));
- }
-}
-
-} // namespace
diff --git a/src/mongo/db/storage/mmap_v1/record_store_v1_base.cpp b/src/mongo/db/storage/mmap_v1/record_store_v1_base.cpp
deleted file mode 100644
index 6bfcaefcdde..00000000000
--- a/src/mongo/db/storage/mmap_v1/record_store_v1_base.cpp
+++ /dev/null
@@ -1,962 +0,0 @@
-// record_store_v1_base.cpp
-
-/**
- * Copyright (C) 2013-2014 MongoDB Inc.
- *
- * This program is free software: you can redistribute it and/or modify
- * it under the terms of the GNU Affero General Public License, version 3,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU Affero General Public License for more details.
- *
- * You should have received a copy of the GNU Affero General Public License
- * along with this program. If not, see <http://www.gnu.org/licenses/>.
- *
- * As a special exception, the copyright holders give permission to link the
- * code of portions of this program with the OpenSSL library under certain
- * conditions as described in each individual source file and distribute
- * linked combinations including the program with the OpenSSL library. You
- * must comply with the GNU Affero General Public License in all respects for
- * all of the code used other than as permitted herein. If you modify file(s)
- * with this exception, you may extend this exception to your version of the
- * file(s), but you are not obligated to do so. If you do not wish to do so,
- * delete this exception statement from your version. If you delete this
- * exception statement from all source files in the program, then also delete
- * it in the license file.
- */
-
-#define MONGO_LOG_DEFAULT_COMPONENT ::mongo::logger::LogComponent::kStorage
-
-#include "mongo/platform/basic.h"
-
-#include "mongo/db/storage/mmap_v1/record_store_v1_base.h"
-
-#include "mongo/base/static_assert.h"
-#include "mongo/db/catalog/collection.h"
-#include "mongo/db/client.h"
-#include "mongo/db/curop.h"
-#include "mongo/db/operation_context.h"
-#include "mongo/db/storage/mmap_v1/extent.h"
-#include "mongo/db/storage/mmap_v1/extent_manager.h"
-#include "mongo/db/storage/mmap_v1/record.h"
-#include "mongo/db/storage/mmap_v1/record_store_v1_repair_iterator.h"
-#include "mongo/db/storage/mmap_v1/touch_pages.h"
-#include "mongo/stdx/memory.h"
-#include "mongo/util/log.h"
-#include "mongo/util/progress_meter.h"
-#include "mongo/util/timer.h"
-
-namespace mongo {
-
-using std::unique_ptr;
-using std::set;
-using std::string;
-
-/* Deleted list buckets are used to quickly locate free space based on size. Each bucket
- contains records up to that size (meaning a record with a size exactly equal to
- bucketSizes[n] would go into bucket n+1).
-*/
-const int RecordStoreV1Base::bucketSizes[] = {
- 0x20,
- 0x40,
- 0x80,
- 0x100, // 32, 64, 128, 256
- 0x200,
- 0x400,
- 0x800,
- 0x1000, // 512, 1K, 2K, 4K
- 0x2000,
- 0x4000,
- 0x8000,
- 0x10000, // 8K, 16K, 32K, 64K
- 0x20000,
- 0x40000,
- 0x80000,
- 0x100000, // 128K, 256K, 512K, 1M
- 0x200000,
- 0x400000,
- 0x600000,
- 0x800000, // 2M, 4M, 6M, 8M
- 0xA00000,
- 0xC00000,
- 0xE00000, // 10M, 12M, 14M,
- MaxAllowedAllocation, // 16.5M
- MaxAllowedAllocation + 1, // Only MaxAllowedAllocation sized records go here.
- INT_MAX, // "oversized" bucket for unused parts of extents.
-};
-
-// If this fails, it means that bucketSizes doesn't have the correct number of entries.
-MONGO_STATIC_ASSERT(sizeof(RecordStoreV1Base::bucketSizes) /
- sizeof(RecordStoreV1Base::bucketSizes[0]) ==
- RecordStoreV1Base::Buckets);
-
-SavedCursorRegistry::~SavedCursorRegistry() {
- for (SavedCursorSet::iterator it = _cursors.begin(); it != _cursors.end(); it++) {
- (*it)->_registry = NULL; // prevent SavedCursor destructor from accessing this
- }
-}
-
-void SavedCursorRegistry::registerCursor(SavedCursor* cursor) {
- invariant(!cursor->_registry);
- cursor->_registry = this;
- scoped_spinlock lock(_mutex);
- _cursors.insert(cursor);
-}
-
-bool SavedCursorRegistry::unregisterCursor(SavedCursor* cursor) {
- if (!cursor->_registry) {
- return false;
- }
- invariant(cursor->_registry == this);
- cursor->_registry = NULL;
- scoped_spinlock lock(_mutex);
- invariant(_cursors.erase(cursor));
- return true;
-}
-
-void SavedCursorRegistry::invalidateCursorsForBucket(DiskLoc bucket) {
- // While this is not strictly necessary as an exclusive collection lock will be held,
- // it's cleaner to just make the SavedCursorRegistry thread-safe. Spinlock is OK here.
- scoped_spinlock lock(_mutex);
- for (SavedCursorSet::iterator it = _cursors.begin(); it != _cursors.end();) {
- if ((*it)->bucket == bucket) {
- (*it)->_registry = NULL; // prevent ~SavedCursor from trying to unregister
- _cursors.erase(it++);
- } else {
- it++;
- }
- }
-}
-
-RecordStoreV1Base::RecordStoreV1Base(StringData ns,
- RecordStoreV1MetaData* details,
- ExtentManager* em,
- bool isSystemIndexes)
- : RecordStore(ns), _details(details), _extentManager(em), _isSystemIndexes(isSystemIndexes) {}
-
-RecordStoreV1Base::~RecordStoreV1Base() {}
-
-
-int64_t RecordStoreV1Base::storageSize(OperationContext* opCtx,
- BSONObjBuilder* extraInfo,
- int level) const {
- BSONArrayBuilder extentInfo;
-
- int64_t total = 0;
- int n = 0;
-
- DiskLoc cur = _details->firstExtent(opCtx);
-
- while (!cur.isNull()) {
- Extent* e = _extentManager->getExtent(cur);
-
- total += e->length;
- n++;
-
- if (extraInfo && level > 0) {
- extentInfo.append(BSON("len" << e->length << "loc: " << e->myLoc.toBSONObj()));
- }
- cur = e->xnext;
- }
-
- if (extraInfo) {
- extraInfo->append("numExtents", n);
- if (level > 0)
- extraInfo->append("extents", extentInfo.arr());
- }
-
- return total;
-}
-
-RecordData RecordStoreV1Base::dataFor(OperationContext* opCtx, const RecordId& loc) const {
- return recordFor(DiskLoc::fromRecordId(loc))->toRecordData();
-}
-
-bool RecordStoreV1Base::findRecord(OperationContext* opCtx,
- const RecordId& loc,
- RecordData* rd) const {
- // this is a bit odd, as the semantics of using the storage engine imply it _has_ to be.
- // And in fact we can't actually check.
- // So we assume the best.
- MmapV1RecordHeader* rec = recordFor(DiskLoc::fromRecordId(loc));
- if (!rec) {
- return false;
- }
- *rd = rec->toRecordData();
- return true;
-}
-
-MmapV1RecordHeader* RecordStoreV1Base::recordFor(const DiskLoc& loc) const {
- return _extentManager->recordForV1(loc);
-}
-
-const DeletedRecord* RecordStoreV1Base::deletedRecordFor(const DiskLoc& loc) const {
- invariant(loc.a() != -1);
- return reinterpret_cast<const DeletedRecord*>(recordFor(loc));
-}
-
-DeletedRecord* RecordStoreV1Base::drec(const DiskLoc& loc) const {
- invariant(loc.a() != -1);
- return reinterpret_cast<DeletedRecord*>(recordFor(loc));
-}
-
-Extent* RecordStoreV1Base::_getExtent(OperationContext* opCtx, const DiskLoc& loc) const {
- return _extentManager->getExtent(loc);
-}
-
-DiskLoc RecordStoreV1Base::_getExtentLocForRecord(OperationContext* opCtx,
- const DiskLoc& loc) const {
- return _extentManager->extentLocForV1(loc);
-}
-
-
-DiskLoc RecordStoreV1Base::getNextRecord(OperationContext* opCtx, const DiskLoc& loc) const {
- DiskLoc next = getNextRecordInExtent(opCtx, loc);
- if (!next.isNull()) {
- return next;
- }
-
- // now traverse extents
-
- Extent* e = _getExtent(opCtx, _getExtentLocForRecord(opCtx, loc));
- while (1) {
- if (e->xnext.isNull())
- return DiskLoc(); // end of collection
- e = _getExtent(opCtx, e->xnext);
- if (!e->firstRecord.isNull())
- break;
- // entire extent could be empty, keep looking
- }
- return e->firstRecord;
-}
-
-DiskLoc RecordStoreV1Base::getPrevRecord(OperationContext* opCtx, const DiskLoc& loc) const {
- DiskLoc prev = getPrevRecordInExtent(opCtx, loc);
- if (!prev.isNull()) {
- return prev;
- }
-
- // now traverse extents
-
- Extent* e = _getExtent(opCtx, _getExtentLocForRecord(opCtx, loc));
- while (1) {
- if (e->xprev.isNull())
- return DiskLoc(); // end of collection
- e = _getExtent(opCtx, e->xprev);
- if (!e->firstRecord.isNull())
- break;
- // entire extent could be empty, keep looking
- }
- return e->lastRecord;
-}
-
-DiskLoc RecordStoreV1Base::_findFirstSpot(OperationContext* opCtx,
- const DiskLoc& extDiskLoc,
- Extent* e) {
- DiskLoc emptyLoc = extDiskLoc;
- emptyLoc.inc(Extent::HeaderSize());
- int delRecLength = e->length - Extent::HeaderSize();
- if (delRecLength >= 32 * 1024 && NamespaceString::virtualized(_ns) && !isCapped()) {
- // probably an index. so skip forward to keep its records page aligned
- int& ofs = emptyLoc.GETOFS();
- int newOfs = (ofs + 0xfff) & ~0xfff;
- delRecLength -= (newOfs - ofs);
- dassert(delRecLength > 0);
- ofs = newOfs;
- }
-
- DeletedRecord* empty = opCtx->recoveryUnit()->writing(drec(emptyLoc));
- empty->lengthWithHeaders() = delRecLength;
- empty->extentOfs() = e->myLoc.getOfs();
- empty->nextDeleted().Null();
- return emptyLoc;
-}
-
-DiskLoc RecordStoreV1Base::getNextRecordInExtent(OperationContext* opCtx,
- const DiskLoc& loc) const {
- int nextOffset = recordFor(loc)->nextOfs();
-
- if (nextOffset == DiskLoc::NullOfs)
- return DiskLoc();
-
- fassert(17441, abs(nextOffset) >= 8); // defensive
- DiskLoc result(loc.a(), nextOffset);
- return result;
-}
-
-DiskLoc RecordStoreV1Base::getPrevRecordInExtent(OperationContext* opCtx,
- const DiskLoc& loc) const {
- int prevOffset = recordFor(loc)->prevOfs();
-
- if (prevOffset == DiskLoc::NullOfs)
- return DiskLoc();
-
- fassert(17442, abs(prevOffset) >= 8); // defensive
- DiskLoc result(loc.a(), prevOffset);
- return result;
-}
-
-Status RecordStoreV1Base::insertRecordsWithDocWriter(OperationContext* opCtx,
- const DocWriter* const* docs,
- const Timestamp*,
- size_t nDocs,
- RecordId* idsOut) {
- for (size_t i = 0; i < nDocs; i++) {
- int docSize = docs[i]->documentSize();
- if (docSize < 4) {
- return Status(ErrorCodes::InvalidLength, "record has to be >= 4 bytes");
- }
- const int lenWHdr = docSize + MmapV1RecordHeader::HeaderSize;
- if (lenWHdr > MaxAllowedAllocation) {
- return Status(ErrorCodes::InvalidLength, "record has to be <= 16.5MB");
- }
- const int lenToAlloc = (docs[i]->addPadding() && shouldPadInserts())
- ? quantizeAllocationSpace(lenWHdr)
- : lenWHdr;
-
- StatusWith<DiskLoc> loc = allocRecord(opCtx, lenToAlloc, /*enforceQuota=*/false);
- if (!loc.isOK())
- return loc.getStatus();
-
- MmapV1RecordHeader* r = recordFor(loc.getValue());
- fassert(17319, r->lengthWithHeaders() >= lenWHdr);
-
- r = reinterpret_cast<MmapV1RecordHeader*>(opCtx->recoveryUnit()->writingPtr(r, lenWHdr));
- docs[i]->writeDocument(r->data());
-
- _addRecordToRecListInExtent(opCtx, r, loc.getValue());
-
- _details->incrementStats(opCtx, r->netLength(), 1);
-
- if (idsOut)
- idsOut[i] = loc.getValue().toRecordId();
- }
-
-
- return Status::OK();
-}
-
-
-StatusWith<RecordId> RecordStoreV1Base::insertRecord(
- OperationContext* opCtx, const char* data, int len, Timestamp, bool enforceQuota) {
- if (len < 4) {
- return StatusWith<RecordId>(ErrorCodes::InvalidLength, "record has to be >= 4 bytes");
- }
-
- if (len + MmapV1RecordHeader::HeaderSize > MaxAllowedAllocation) {
- return StatusWith<RecordId>(ErrorCodes::InvalidLength, "record has to be <= 16.5MB");
- }
-
- return _insertRecord(opCtx, data, len, enforceQuota);
-}
-
-StatusWith<RecordId> RecordStoreV1Base::_insertRecord(OperationContext* opCtx,
- const char* data,
- int len,
- bool enforceQuota) {
- const int lenWHdr = len + MmapV1RecordHeader::HeaderSize;
- const int lenToAlloc = shouldPadInserts() ? quantizeAllocationSpace(lenWHdr) : lenWHdr;
- fassert(17208, lenToAlloc >= lenWHdr);
-
- StatusWith<DiskLoc> loc = allocRecord(opCtx, lenToAlloc, enforceQuota);
- if (!loc.isOK())
- return StatusWith<RecordId>(loc.getStatus());
-
- MmapV1RecordHeader* r = recordFor(loc.getValue());
- fassert(17210, r->lengthWithHeaders() >= lenWHdr);
-
- // copy the data
- r = reinterpret_cast<MmapV1RecordHeader*>(opCtx->recoveryUnit()->writingPtr(r, lenWHdr));
- memcpy(r->data(), data, len);
-
- _addRecordToRecListInExtent(opCtx, r, loc.getValue());
-
- _details->incrementStats(opCtx, r->netLength(), 1);
-
- return StatusWith<RecordId>(loc.getValue().toRecordId());
-}
-
-Status RecordStoreV1Base::updateRecord(OperationContext* opCtx,
- const RecordId& oldLocation,
- const char* data,
- int dataSize,
- bool enforceQuota,
- UpdateNotifier* notifier) {
- MmapV1RecordHeader* oldRecord = recordFor(DiskLoc::fromRecordId(oldLocation));
- if (oldRecord->netLength() >= dataSize) {
- // Make sure to notify other queries before we do an in-place update.
- if (notifier) {
- Status callbackStatus = notifier->recordStoreGoingToUpdateInPlace(opCtx, oldLocation);
- if (!callbackStatus.isOK())
- return callbackStatus;
- }
-
- // we fit
- memcpy(opCtx->recoveryUnit()->writingPtr(oldRecord->data(), dataSize), data, dataSize);
- return Status::OK();
- }
-
- // We enforce the restriction of unchanging capped doc sizes above the storage layer.
- invariant(!isCapped());
-
- return {ErrorCodes::NeedsDocumentMove, "Update requires document move"};
-}
-
-bool RecordStoreV1Base::updateWithDamagesSupported() const {
- return true;
-}
-
-StatusWith<RecordData> RecordStoreV1Base::updateWithDamages(
- OperationContext* opCtx,
- const RecordId& loc,
- const RecordData& oldRec,
- const char* damageSource,
- const mutablebson::DamageVector& damages) {
- MmapV1RecordHeader* rec = recordFor(DiskLoc::fromRecordId(loc));
- char* root = rec->data();
-
- // All updates were in place. Apply them via durability and writing pointer.
- mutablebson::DamageVector::const_iterator where = damages.begin();
- const mutablebson::DamageVector::const_iterator end = damages.end();
- for (; where != end; ++where) {
- const char* sourcePtr = damageSource + where->sourceOffset;
- void* targetPtr =
- opCtx->recoveryUnit()->writingPtr(root + where->targetOffset, where->size);
- std::memcpy(targetPtr, sourcePtr, where->size);
- }
-
- return rec->toRecordData();
-}
-
-void RecordStoreV1Base::deleteRecord(OperationContext* opCtx, const RecordId& rid) {
- const DiskLoc dl = DiskLoc::fromRecordId(rid);
-
- MmapV1RecordHeader* todelete = recordFor(dl);
- invariant(todelete->netLength() >= 4); // this is required for defensive code
-
- /* remove ourself from the record next/prev chain */
- {
- if (todelete->prevOfs() != DiskLoc::NullOfs) {
- DiskLoc prev = getPrevRecordInExtent(opCtx, dl);
- MmapV1RecordHeader* prevRecord = recordFor(prev);
- opCtx->recoveryUnit()->writingInt(prevRecord->nextOfs()) = todelete->nextOfs();
- }
-
- if (todelete->nextOfs() != DiskLoc::NullOfs) {
- DiskLoc next = getNextRecord(opCtx, dl);
- MmapV1RecordHeader* nextRecord = recordFor(next);
- opCtx->recoveryUnit()->writingInt(nextRecord->prevOfs()) = todelete->prevOfs();
- }
- }
-
- /* remove ourself from extent pointers */
- {
- DiskLoc extentLoc = todelete->myExtentLoc(dl);
- Extent* e = _getExtent(opCtx, extentLoc);
- if (e->firstRecord == dl) {
- opCtx->recoveryUnit()->writing(&e->firstRecord);
- if (todelete->nextOfs() == DiskLoc::NullOfs)
- e->firstRecord.Null();
- else
- e->firstRecord.set(dl.a(), todelete->nextOfs());
- }
- if (e->lastRecord == dl) {
- opCtx->recoveryUnit()->writing(&e->lastRecord);
- if (todelete->prevOfs() == DiskLoc::NullOfs)
- e->lastRecord.Null();
- else
- e->lastRecord.set(dl.a(), todelete->prevOfs());
- }
- }
-
- /* add to the free list */
- {
- _details->incrementStats(opCtx, -1 * todelete->netLength(), -1);
-
- if (_isSystemIndexes) {
- /* temp: if in system.indexes, don't reuse, and zero out: we want to be
- careful until validated more, as IndexDetails has pointers
- to this disk location. so an incorrectly done remove would cause
- a lot of problems.
- */
- memset(opCtx->recoveryUnit()->writingPtr(todelete, todelete->lengthWithHeaders()),
- 0,
- todelete->lengthWithHeaders());
- } else {
- // this is defensive so we can detect if we are still using a location
- // that was deleted
- memset(opCtx->recoveryUnit()->writingPtr(todelete->data(), 4), 0xee, 4);
- addDeletedRec(opCtx, dl);
- }
- }
-}
-
-std::unique_ptr<RecordCursor> RecordStoreV1Base::getCursorForRepair(OperationContext* opCtx) const {
- return stdx::make_unique<RecordStoreV1RepairCursor>(opCtx, this);
-}
-
-void RecordStoreV1Base::_addRecordToRecListInExtent(OperationContext* opCtx,
- MmapV1RecordHeader* r,
- DiskLoc loc) {
- dassert(recordFor(loc) == r);
- DiskLoc extentLoc = _getExtentLocForRecord(opCtx, loc);
- Extent* e = _getExtent(opCtx, extentLoc);
- if (e->lastRecord.isNull()) {
- *opCtx->recoveryUnit()->writing(&e->firstRecord) = loc;
- *opCtx->recoveryUnit()->writing(&e->lastRecord) = loc;
- r->prevOfs() = r->nextOfs() = DiskLoc::NullOfs;
- } else {
- MmapV1RecordHeader* oldlast = recordFor(e->lastRecord);
- r->prevOfs() = e->lastRecord.getOfs();
- r->nextOfs() = DiskLoc::NullOfs;
- opCtx->recoveryUnit()->writingInt(oldlast->nextOfs()) = loc.getOfs();
- *opCtx->recoveryUnit()->writing(&e->lastRecord) = loc;
- }
-}
-
-void RecordStoreV1Base::increaseStorageSize(OperationContext* opCtx, int size, bool enforceQuota) {
- DiskLoc eloc = _extentManager->allocateExtent(opCtx, isCapped(), size, enforceQuota);
- Extent* e = _extentManager->getExtent(eloc);
- invariant(e);
-
- *opCtx->recoveryUnit()->writing(&e->nsDiagnostic) = _ns;
-
- opCtx->recoveryUnit()->writing(&e->xnext)->Null();
- opCtx->recoveryUnit()->writing(&e->xprev)->Null();
- opCtx->recoveryUnit()->writing(&e->firstRecord)->Null();
- opCtx->recoveryUnit()->writing(&e->lastRecord)->Null();
-
- DiskLoc emptyLoc = _findFirstSpot(opCtx, eloc, e);
-
- if (_details->lastExtent(opCtx).isNull()) {
- invariant(_details->firstExtent(opCtx).isNull());
- _details->setFirstExtent(opCtx, eloc);
- _details->setLastExtent(opCtx, eloc);
- _details->setCapExtent(opCtx, eloc);
- invariant(e->xprev.isNull());
- invariant(e->xnext.isNull());
- } else {
- invariant(!_details->firstExtent(opCtx).isNull());
- *opCtx->recoveryUnit()->writing(&e->xprev) = _details->lastExtent(opCtx);
- *opCtx->recoveryUnit()->writing(
- &_extentManager->getExtent(_details->lastExtent(opCtx))->xnext) = eloc;
- _details->setLastExtent(opCtx, eloc);
- }
-
- _details->setLastExtentSize(opCtx, e->length);
-
- addDeletedRec(opCtx, emptyLoc);
-}
-
-Status RecordStoreV1Base::validate(OperationContext* opCtx,
- ValidateCmdLevel level,
- ValidateAdaptor* adaptor,
- ValidateResults* results,
- BSONObjBuilder* output) {
- // 1) basic status that require no iteration
- // 2) extent level info
- // 3) check extent start and end
- // 4) check each non-deleted record
- // 5) check deleted list
-
- // -------------
-
- // 1111111111111111111
- if (isCapped()) {
- output->appendBool("capped", true);
- output->appendNumber("max", _details->maxCappedDocs());
- }
-
- output->appendNumber("datasize", _details->dataSize());
- output->appendNumber("nrecords", _details->numRecords());
- output->appendNumber("lastExtentSize", _details->lastExtentSize(opCtx));
-
- if (_details->firstExtent(opCtx).isNull())
- output->append("firstExtent", "null");
- else
- output->append("firstExtent",
- str::stream() << _details->firstExtent(opCtx).toString() << " ns:"
- << _getExtent(opCtx, _details->firstExtent(opCtx))
- ->nsDiagnostic.toString());
- if (_details->lastExtent(opCtx).isNull())
- output->append("lastExtent", "null");
- else
- output->append("lastExtent",
- str::stream() << _details->lastExtent(opCtx).toString() << " ns:"
- << _getExtent(opCtx, _details->lastExtent(opCtx))
- ->nsDiagnostic.toString());
-
- // 22222222222222222222222222
- { // validate extent basics
- BSONArrayBuilder extentData;
- int extentCount = 0;
- DiskLoc extentDiskLoc;
- try {
- if (!_details->firstExtent(opCtx).isNull()) {
- _getExtent(opCtx, _details->firstExtent(opCtx))->assertOk();
- _getExtent(opCtx, _details->lastExtent(opCtx))->assertOk();
- }
-
- extentDiskLoc = _details->firstExtent(opCtx);
- while (!extentDiskLoc.isNull()) {
- Extent* thisExtent = _getExtent(opCtx, extentDiskLoc);
- if (level == kValidateFull) {
- extentData << thisExtent->dump();
- }
- if (!thisExtent->validates(extentDiskLoc, &results->errors)) {
- results->valid = false;
- }
- DiskLoc nextDiskLoc = thisExtent->xnext;
-
- if (extentCount > 0 && !nextDiskLoc.isNull() &&
- _getExtent(opCtx, nextDiskLoc)->xprev != extentDiskLoc) {
- StringBuilder sb;
- sb << "'xprev' pointer " << _getExtent(opCtx, nextDiskLoc)->xprev.toString()
- << " in extent " << nextDiskLoc.toString() << " does not point to extent "
- << extentDiskLoc.toString();
- results->errors.push_back(sb.str());
- results->valid = false;
- }
- if (nextDiskLoc.isNull() && extentDiskLoc != _details->lastExtent(opCtx)) {
- StringBuilder sb;
- sb << "'lastExtent' pointer " << _details->lastExtent(opCtx).toString()
- << " does not point to last extent in list " << extentDiskLoc.toString();
- results->errors.push_back(sb.str());
- results->valid = false;
- }
- extentDiskLoc = nextDiskLoc;
- extentCount++;
- opCtx->checkForInterrupt();
- }
- } catch (const DBException& e) {
- StringBuilder sb;
- sb << "exception validating extent " << extentCount << ": " << e.what();
- results->errors.push_back(sb.str());
- results->valid = false;
- return Status::OK();
- }
- output->append("extentCount", extentCount);
-
- if (level == kValidateFull)
- output->appendArray("extents", extentData.arr());
- }
-
- try {
- // 333333333333333333333333333
- bool testingLastExtent = false;
- try {
- DiskLoc firstExtentLoc = _details->firstExtent(opCtx);
- if (firstExtentLoc.isNull()) {
- // this is ok
- } else {
- output->append("firstExtentDetails", _getExtent(opCtx, firstExtentLoc)->dump());
- if (!_getExtent(opCtx, firstExtentLoc)->xprev.isNull()) {
- StringBuilder sb;
- sb << "'xprev' pointer in 'firstExtent' "
- << _details->firstExtent(opCtx).toString() << " is "
- << _getExtent(opCtx, firstExtentLoc)->xprev.toString() << ", should be null";
- results->errors.push_back(sb.str());
- results->valid = false;
- }
- }
- testingLastExtent = true;
- DiskLoc lastExtentLoc = _details->lastExtent(opCtx);
- if (lastExtentLoc.isNull()) {
- // this is ok
- } else {
- if (firstExtentLoc != lastExtentLoc) {
- output->append("lastExtentDetails", _getExtent(opCtx, lastExtentLoc)->dump());
- if (!_getExtent(opCtx, lastExtentLoc)->xnext.isNull()) {
- StringBuilder sb;
- sb << "'xnext' pointer in 'lastExtent' " << lastExtentLoc.toString()
- << " is " << _getExtent(opCtx, lastExtentLoc)->xnext.toString()
- << ", should be null";
- results->errors.push_back(sb.str());
- results->valid = false;
- }
- }
- }
- } catch (const DBException& e) {
- StringBuilder sb;
- sb << "exception processing '" << (testingLastExtent ? "lastExtent" : "firstExtent")
- << "': " << e.what();
- results->errors.push_back(sb.str());
- results->valid = false;
- }
-
- // 4444444444444444444444444
-
- set<DiskLoc> recs;
- int n = 0;
- int nInvalid = 0;
- long long nQuantizedSize = 0;
- long long len = 0;
- long long nlen = 0;
- long long bsonLen = 0;
- int outOfOrder = 0;
- DiskLoc dl_last;
-
- auto cursor = getCursor(opCtx);
- while (auto record = cursor->next()) {
- const auto dl = DiskLoc::fromRecordId(record->id);
- n++;
-
- if (n < 1000000 && level == kValidateFull)
- recs.insert(dl);
- if (isCapped()) {
- if (dl < dl_last)
- outOfOrder++;
- dl_last = dl;
- }
-
- MmapV1RecordHeader* r = recordFor(dl);
- len += r->lengthWithHeaders();
- nlen += r->netLength();
-
- if (isQuantized(r->lengthWithHeaders())) {
- // Count the number of records having a size consistent with
- // the quantizeAllocationSpace quantization implementation.
- ++nQuantizedSize;
- }
-
- size_t dataSize = 0;
- const Status status = adaptor->validate(record->id, r->toRecordData(), &dataSize);
- if (!status.isOK()) {
- results->valid = false;
- if (nInvalid == 0) // only log once;
- results->errors.push_back("invalid object detected (see logs)");
-
- nInvalid++;
- log() << "Invalid object detected in " << _ns << ": " << redact(status);
- } else {
- bsonLen += dataSize;
- }
- }
-
- if (isCapped() && !_details->capLooped()) {
- output->append("cappedOutOfOrder", outOfOrder);
- if (outOfOrder > 1) {
- results->valid = false;
- results->errors.push_back("too many out of order records");
- }
- }
- output->append("objectsFound", n);
- output->append("invalidObjects", nInvalid);
- output->appendNumber("nQuantizedSize", nQuantizedSize);
- output->appendNumber("bytesWithHeaders", len);
- output->appendNumber("bytesWithoutHeaders", nlen);
-
- if (level == kValidateFull) {
- output->appendNumber("bytesBson", bsonLen);
- } // end scanData
-
- // 55555555555555555555555555
-
- if (level == kValidateFull) {
- BSONArrayBuilder deletedListArray;
- for (int i = 0; i < Buckets; i++) {
- deletedListArray << _details->deletedListEntry(i).isNull();
- }
-
- int ndel = 0;
- long long delSize = 0;
- BSONArrayBuilder delBucketSizes;
- int incorrect = 0;
- for (int i = 0; i < Buckets; i++) {
- DiskLoc loc = _details->deletedListEntry(i);
- try {
- int k = 0;
- while (!loc.isNull()) {
- if (recs.count(loc))
- incorrect++;
- ndel++;
-
- if (loc.questionable()) {
- if (isCapped() && !loc.isValid() && i == 1) {
- /* the constructor for NamespaceDetails intentionally sets
- * deletedList[1] to invalid see comments in namespace.h
- */
- break;
- }
-
- string err(str::stream() << "bad pointer in deleted record list: "
- << loc.toString()
- << " bucket: "
- << i
- << " k: "
- << k);
- results->errors.push_back(err);
- results->valid = false;
- break;
- }
-
- const DeletedRecord* d = deletedRecordFor(loc);
- delSize += d->lengthWithHeaders();
- loc = d->nextDeleted();
- k++;
- opCtx->checkForInterrupt();
- }
- delBucketSizes << k;
- } catch (...) {
- results->errors.push_back((string) "exception in deleted chain for bucket " +
- BSONObjBuilder::numStr(i));
- results->valid = false;
- }
- }
-
- output->appendNumber("deletedCount", ndel);
- output->appendNumber("deletedSize", delSize);
- output->append("delBucketSizes", delBucketSizes.arr());
-
- if (incorrect) {
- results->errors.push_back(BSONObjBuilder::numStr(incorrect) +
- " records from datafile are in deleted list");
- results->valid = false;
- }
- }
-
- } catch (const AssertionException& e) {
- StringBuilder sb;
- sb << "exception during validate: " << e.what();
- results->errors.push_back(sb.str());
- results->valid = false;
- }
-
- return Status::OK();
-}
-
-void RecordStoreV1Base::appendCustomStats(OperationContext* opCtx,
- BSONObjBuilder* result,
- double scale) const {
- result->append("lastExtentSize", _details->lastExtentSize(opCtx) / scale);
- result->append("paddingFactor", 1.0); // hard coded
- result->append("paddingFactorNote",
- "paddingFactor is unused and unmaintained in 3.0. It "
- "remains hard coded to 1.0 for compatibility only.");
- result->append("userFlags", _details->userFlags());
- result->appendBool("capped", isCapped());
- if (isCapped()) {
- result->appendNumber("max", _details->maxCappedDocs());
- result->appendNumber("maxSize",
- static_cast<long long>(storageSize(opCtx, NULL, 0) / scale));
- }
-}
-
-
-namespace {
-struct touch_location {
- const char* root;
- size_t length;
-};
-}
-
-Status RecordStoreV1Base::touch(OperationContext* opCtx, BSONObjBuilder* output) const {
- Timer t;
-
- std::vector<touch_location> ranges;
- {
- DiskLoc nextLoc = _details->firstExtent(opCtx);
- Extent* ext = nextLoc.isNull() ? NULL : _getExtent(opCtx, nextLoc);
- while (ext) {
- touch_location tl;
- tl.root = reinterpret_cast<const char*>(ext);
- tl.length = ext->length;
- ranges.push_back(tl);
-
- nextLoc = ext->xnext;
- if (nextLoc.isNull())
- ext = NULL;
- else
- ext = _getExtent(opCtx, nextLoc);
- }
- }
-
- std::string progress_msg = "touch " + ns() + " extents";
- stdx::unique_lock<Client> lk(*opCtx->getClient());
- ProgressMeterHolder pm(CurOp::get(opCtx)->setMessage_inlock(
- progress_msg.c_str(), "Touch Progress", ranges.size()));
- lk.unlock();
-
- for (std::vector<touch_location>::iterator it = ranges.begin(); it != ranges.end(); ++it) {
- touch_pages(it->root, it->length);
- pm.hit();
- opCtx->checkForInterrupt();
- }
- pm.finished();
-
- if (output) {
- output->append("numRanges", static_cast<int>(ranges.size()));
- output->append("millis", t.millis());
- }
-
- return Status::OK();
-}
-
-boost::optional<Record> RecordStoreV1Base::IntraExtentIterator::next() {
- if (_curr.isNull())
- return {};
- auto out = _curr.toRecordId();
- advance();
- return {{out, _rs->dataFor(_opCtx, out)}};
-}
-
-void RecordStoreV1Base::IntraExtentIterator::advance() {
- if (_curr.isNull())
- return;
-
- const MmapV1RecordHeader* rec = recordFor(_curr);
- const int nextOfs = _forward ? rec->nextOfs() : rec->prevOfs();
- _curr = (nextOfs == DiskLoc::NullOfs ? DiskLoc() : DiskLoc(_curr.a(), nextOfs));
-}
-
-void RecordStoreV1Base::IntraExtentIterator::invalidate(OperationContext* opCtx,
- const RecordId& rid) {
- if (rid == _curr.toRecordId()) {
- const DiskLoc origLoc = _curr;
-
- // Undo the advance on rollback, as the deletion that forced it "never happened".
- opCtx->recoveryUnit()->onRollback([this, origLoc]() { this->_curr = origLoc; });
- advance();
- }
-}
-
-std::unique_ptr<RecordFetcher> RecordStoreV1Base::IntraExtentIterator::fetcherForNext() const {
- return _rs->_extentManager->recordNeedsFetch(_curr);
-}
-
-int RecordStoreV1Base::quantizeAllocationSpace(int allocSize) {
- invariant(allocSize <= MaxAllowedAllocation);
- for (int i = 0; i < Buckets - 2; i++) { // last two bucketSizes are invalid
- if (bucketSizes[i] >= allocSize) {
- // Return the size of the first bucket sized >= the requested size.
- return bucketSizes[i];
- }
- }
- MONGO_UNREACHABLE; // prior invariant means we should find something.
-}
-
-bool RecordStoreV1Base::isQuantized(int recordSize) {
- if (recordSize > MaxAllowedAllocation)
- return false;
-
- return recordSize == quantizeAllocationSpace(recordSize);
-}
-
-int RecordStoreV1Base::bucket(int size) {
- for (int i = 0; i < Buckets; i++) {
- if (bucketSizes[i] > size) {
- // Return the first bucket sized _larger_ than the requested size. This is important
- // since we want all records in a bucket to be >= the quantized size, therefore the
- // quantized size must be the smallest allowed record per bucket.
- return i;
- }
- }
- // Technically, this is reachable if size == INT_MAX, but it would be an error to pass that
- // in anyway since it would be impossible to have a record that large given the file and
- // extent headers.
- MONGO_UNREACHABLE;
-}
-}
diff --git a/src/mongo/db/storage/mmap_v1/record_store_v1_base.h b/src/mongo/db/storage/mmap_v1/record_store_v1_base.h
deleted file mode 100644
index 7e21228fbb9..00000000000
--- a/src/mongo/db/storage/mmap_v1/record_store_v1_base.h
+++ /dev/null
@@ -1,364 +0,0 @@
-/**
-* Copyright (C) 2013-2014 MongoDB Inc.
-*
-* This program is free software: you can redistribute it and/or modify
-* it under the terms of the GNU Affero General Public License, version 3,
-* as published by the Free Software Foundation.
-*
-* This program is distributed in the hope that it will be useful,
-* but WITHOUT ANY WARRANTY; without even the implied warranty of
-* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-* GNU Affero General Public License for more details.
-*
-* You should have received a copy of the GNU Affero General Public License
-* along with this program. If not, see <http://www.gnu.org/licenses/>.
-*
-* As a special exception, the copyright holders give permission to link the
-* code of portions of this program with the OpenSSL library under certain
-* conditions as described in each individual source file and distribute
-* linked combinations including the program with the OpenSSL library. You
-* must comply with the GNU Affero General Public License in all respects for
-* all of the code used other than as permitted herein. If you modify file(s)
-* with this exception, you may extend this exception to your version of the
-* file(s), but you are not obligated to do so. If you do not wish to do so,
-* delete this exception statement from your version. If you delete this
-* exception statement from all source files in the program, then also delete
-* it in the license file.
-*/
-
-#pragma once
-
-#include "mongo/stdx/unordered_set.h"
-#include "mongo/util/concurrency/spin_lock.h"
-
-#include "mongo/db/storage/mmap_v1/diskloc.h"
-#include "mongo/db/storage/record_store.h"
-
-namespace mongo {
-
-class DeletedRecord;
-class ExtentManager;
-class MmapV1RecordHeader;
-class OperationContext;
-
-struct Extent;
-
-class RecordStoreV1MetaData {
-public:
- virtual ~RecordStoreV1MetaData() {}
-
- virtual const DiskLoc& capExtent() const = 0;
- virtual void setCapExtent(OperationContext* opCtx, const DiskLoc& loc) = 0;
-
- virtual const DiskLoc& capFirstNewRecord() const = 0;
- virtual void setCapFirstNewRecord(OperationContext* opCtx, const DiskLoc& loc) = 0;
-
- bool capLooped() const {
- return capFirstNewRecord().isValid();
- }
-
- virtual long long dataSize() const = 0;
- virtual long long numRecords() const = 0;
-
- virtual void incrementStats(OperationContext* opCtx,
- long long dataSizeIncrement,
- long long numRecordsIncrement) = 0;
-
- virtual void setStats(OperationContext* opCtx, long long dataSize, long long numRecords) = 0;
-
- virtual DiskLoc deletedListEntry(int bucket) const = 0;
- virtual void setDeletedListEntry(OperationContext* opCtx, int bucket, const DiskLoc& loc) = 0;
-
- virtual DiskLoc deletedListLegacyGrabBag() const = 0;
- virtual void setDeletedListLegacyGrabBag(OperationContext* opCtx, const DiskLoc& loc) = 0;
-
- virtual void orphanDeletedList(OperationContext* opCtx) = 0;
-
- virtual const DiskLoc& firstExtent(OperationContext* opCtx) const = 0;
- virtual void setFirstExtent(OperationContext* opCtx, const DiskLoc& loc) = 0;
-
- virtual const DiskLoc& lastExtent(OperationContext* opCtx) const = 0;
- virtual void setLastExtent(OperationContext* opCtx, const DiskLoc& loc) = 0;
-
- virtual bool isCapped() const = 0;
-
- virtual bool isUserFlagSet(int flag) const = 0;
- virtual int userFlags() const = 0;
- virtual bool setUserFlag(OperationContext* opCtx, int flag) = 0;
- virtual bool clearUserFlag(OperationContext* opCtx, int flag) = 0;
- virtual bool replaceUserFlags(OperationContext* opCtx, int flags) = 0;
-
- virtual int lastExtentSize(OperationContext* opCtx) const = 0;
- virtual void setLastExtentSize(OperationContext* opCtx, int newMax) = 0;
-
- virtual long long maxCappedDocs() const = 0;
-};
-
-/**
- * Class that stores active cursors that have been saved (as part of yielding) to
- * allow them to be invalidated if the thing they pointed at goes away. The registry is
- * thread-safe, as readers may concurrently register and remove their cursors. Contention is
- * expected to be very low, as yielding is infrequent. This logically belongs to the
- * RecordStore, but is not contained in it to facilitate unit testing.
- */
-class SavedCursorRegistry {
-public:
- /**
- * The destructor ensures the cursor is unregistered when an exception is thrown.
- * Note that the SavedCursor may outlive the registry it was saved in.
- */
- struct SavedCursor {
- SavedCursor() : _registry(NULL) {}
- virtual ~SavedCursor() {
- if (_registry)
- _registry->unregisterCursor(this);
- }
- DiskLoc bucket;
- BSONObj key;
- DiskLoc loc;
-
- private:
- friend class SavedCursorRegistry;
- // Non-null iff registered. Accessed by owner or writer with MODE_X collection lock
- SavedCursorRegistry* _registry;
- };
-
- ~SavedCursorRegistry();
-
- /**
- * Adds given saved cursor to SavedCursorRegistry. Doesn't take ownership.
- */
- void registerCursor(SavedCursor* cursor);
-
- /**
- * Removes given saved cursor. Returns true if the cursor was still present, and false
- * if it had already been removed due to invalidation. Doesn't take ownership.
- */
- bool unregisterCursor(SavedCursor* cursor);
-
- /**
- * When a btree-bucket disappears due to merge/split or similar, this invalidates all
- * cursors that point at the same bucket by removing them from the registry.
- */
- void invalidateCursorsForBucket(DiskLoc bucket);
-
-private:
- SpinLock _mutex;
- typedef stdx::unordered_set<SavedCursor*>
- SavedCursorSet; // SavedCursor pointers not owned here
- SavedCursorSet _cursors;
-};
-
-class RecordStoreV1Base : public RecordStore {
-public:
- static const int Buckets = 26;
- static const int MaxAllowedAllocation = 16 * 1024 * 1024 + 512 * 1024;
-
- static const int bucketSizes[];
-
- // ------------
-
- class IntraExtentIterator;
-
- /**
- * @param details - takes ownership
- * @param em - does NOT take ownership
- */
- RecordStoreV1Base(StringData ns,
- RecordStoreV1MetaData* details,
- ExtentManager* em,
- bool isSystemIndexes);
-
- virtual ~RecordStoreV1Base();
-
- const std::string& getIdent() const override {
- MONGO_UNREACHABLE;
- }
-
- virtual long long dataSize(OperationContext* opCtx) const {
- return _details->dataSize();
- }
- virtual long long numRecords(OperationContext* opCtx) const {
- return _details->numRecords();
- }
-
- virtual int64_t storageSize(OperationContext* opCtx,
- BSONObjBuilder* extraInfo = NULL,
- int level = 0) const;
-
- virtual RecordData dataFor(OperationContext* opCtx, const RecordId& loc) const;
-
- virtual bool findRecord(OperationContext* opCtx, const RecordId& loc, RecordData* rd) const;
-
- void deleteRecord(OperationContext* opCtx, const RecordId& dl);
-
- StatusWith<RecordId> insertRecord(
- OperationContext* opCtx, const char* data, int len, Timestamp, bool enforceQuota);
-
- Status insertRecordsWithDocWriter(OperationContext* opCtx,
- const DocWriter* const* docs,
- const Timestamp*,
- size_t nDocs,
- RecordId* idsOut) final;
-
- virtual Status updateRecord(OperationContext* opCtx,
- const RecordId& oldLocation,
- const char* data,
- int len,
- bool enforceQuota,
- UpdateNotifier* notifier);
-
- virtual bool updateWithDamagesSupported() const;
-
- virtual StatusWith<RecordData> updateWithDamages(OperationContext* opCtx,
- const RecordId& loc,
- const RecordData& oldRec,
- const char* damageSource,
- const mutablebson::DamageVector& damages);
-
- virtual std::unique_ptr<RecordCursor> getCursorForRepair(OperationContext* opCtx) const;
-
- void increaseStorageSize(OperationContext* opCtx, int size, bool enforceQuota);
-
- virtual Status validate(OperationContext* opCtx,
- ValidateCmdLevel level,
- ValidateAdaptor* adaptor,
- ValidateResults* results,
- BSONObjBuilder* output);
-
- virtual void appendCustomStats(OperationContext* opCtx,
- BSONObjBuilder* result,
- double scale) const;
-
- virtual Status touch(OperationContext* opCtx, BSONObjBuilder* output) const;
-
- const RecordStoreV1MetaData* details() const {
- return _details.get();
- }
-
- // This keeps track of cursors saved during yielding, for invalidation purposes.
- SavedCursorRegistry savedCursors;
-
- DiskLoc getExtentLocForRecord(OperationContext* opCtx, const DiskLoc& loc) const;
-
- DiskLoc getNextRecord(OperationContext* opCtx, const DiskLoc& loc) const;
- DiskLoc getPrevRecord(OperationContext* opCtx, const DiskLoc& loc) const;
-
- DiskLoc getNextRecordInExtent(OperationContext* opCtx, const DiskLoc& loc) const;
- DiskLoc getPrevRecordInExtent(OperationContext* opCtx, const DiskLoc& loc) const;
-
- /**
- * Quantize 'minSize' to the nearest allocation size.
- */
- static int quantizeAllocationSpace(int minSize);
-
- static bool isQuantized(int recordSize);
-
- /* return which "deleted bucket" for this size object */
- static int bucket(int size);
-
- void waitForAllEarlierOplogWritesToBeVisible(OperationContext* opCtx) const override {}
-
- virtual void updateStatsAfterRepair(OperationContext* opCtx,
- long long numRecords,
- long long dataSize) {
- MONGO_UNREACHABLE; // MMAPv1 has its own repair which doesn't call this.
- }
-
-protected:
- virtual MmapV1RecordHeader* recordFor(const DiskLoc& loc) const;
-
- const DeletedRecord* deletedRecordFor(const DiskLoc& loc) const;
-
- virtual bool isCapped() const = 0;
-
- virtual bool shouldPadInserts() const = 0;
-
- virtual StatusWith<DiskLoc> allocRecord(OperationContext* opCtx,
- int lengthWithHeaders,
- bool enforceQuota) = 0;
-
- // TODO: document, remove, what have you
- virtual void addDeletedRec(OperationContext* opCtx, const DiskLoc& dloc) = 0;
-
- // TODO: another sad one
- virtual DeletedRecord* drec(const DiskLoc& loc) const;
-
- // just a wrapper for _extentManager->getExtent( loc );
- Extent* _getExtent(OperationContext* opCtx, const DiskLoc& loc) const;
-
- DiskLoc _getExtentLocForRecord(OperationContext* opCtx, const DiskLoc& loc) const;
-
- DiskLoc _getNextRecord(OperationContext* opCtx, const DiskLoc& loc) const;
- DiskLoc _getPrevRecord(OperationContext* opCtx, const DiskLoc& loc) const;
-
- DiskLoc _getNextRecordInExtent(OperationContext* opCtx, const DiskLoc& loc) const;
- DiskLoc _getPrevRecordInExtent(OperationContext* opCtx, const DiskLoc& loc) const;
-
- /**
- * finds the first suitable DiskLoc for data
- * will return the DiskLoc of a newly created DeletedRecord
- */
- DiskLoc _findFirstSpot(OperationContext* opCtx, const DiskLoc& extDiskLoc, Extent* e);
-
- /** add a record to the end of the linked list chain within this extent.
- require: you must have already declared write intent for the record header.
- */
- void _addRecordToRecListInExtent(OperationContext* opCtx, MmapV1RecordHeader* r, DiskLoc loc);
-
- /**
- * internal
- * doesn't check inputs or change padding
- */
- StatusWith<RecordId> _insertRecord(OperationContext* opCtx,
- const char* data,
- int len,
- bool enforceQuota);
-
- std::unique_ptr<RecordStoreV1MetaData> _details;
- ExtentManager* _extentManager;
- bool _isSystemIndexes;
-
- friend class RecordStoreV1RepairCursor;
-};
-
-/**
- * Iterates over all records within a single extent.
- *
- * EOF at end of extent, even if there are more extents.
- */
-class RecordStoreV1Base::IntraExtentIterator final : public RecordCursor {
-public:
- IntraExtentIterator(OperationContext* opCtx,
- DiskLoc start,
- const RecordStoreV1Base* rs,
- bool forward = true)
- : _opCtx(opCtx), _curr(start), _rs(rs), _forward(forward) {}
-
- boost::optional<Record> next() final;
- void invalidate(OperationContext* opCtx, const RecordId& dl) final;
- void save() final {}
- bool restore() final {
- return true;
- }
- void detachFromOperationContext() final {
- _opCtx = nullptr;
- }
- void reattachToOperationContext(OperationContext* opCtx) final {
- _opCtx = opCtx;
- }
- std::unique_ptr<RecordFetcher> fetcherForNext() const final;
-
-private:
- virtual const MmapV1RecordHeader* recordFor(const DiskLoc& loc) const {
- return _rs->recordFor(loc);
- }
-
- void advance();
-
- OperationContext* _opCtx;
- DiskLoc _curr;
- const RecordStoreV1Base* _rs;
- bool _forward;
-};
-} // namespace mongo
diff --git a/src/mongo/db/storage/mmap_v1/record_store_v1_capped.cpp b/src/mongo/db/storage/mmap_v1/record_store_v1_capped.cpp
deleted file mode 100644
index 6a3c02b562f..00000000000
--- a/src/mongo/db/storage/mmap_v1/record_store_v1_capped.cpp
+++ /dev/null
@@ -1,696 +0,0 @@
-// record_store_v1_capped.cpp
-
-/**
- * Copyright (C) 2013 10gen Inc.
- *
- * This program is free software: you can redistribute it and/or modify
- * it under the terms of the GNU Affero General Public License, version 3,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU Affero General Public License for more details.
- *
- * You should have received a copy of the GNU Affero General Public License
- * along with this program. If not, see <http://www.gnu.org/licenses/>.
- *
- * As a special exception, the copyright holders give permission to link the
- * code of portions of this program with the OpenSSL library under certain
- * conditions as described in each individual source file and distribute
- * linked combinations including the program with the OpenSSL library. You
- * must comply with the GNU Affero General Public License in all respects for
- * all of the code used other than as permitted herein. If you modify file(s)
- * with this exception, you may extend this exception to your version of the
- * file(s), but you are not obligated to do so. If you do not wish to do so,
- * delete this exception statement from your version. If you delete this
- * exception statement from all source files in the program, then also delete
- * it in the license file.
- */
-
-#define MONGO_LOG_DEFAULT_COMPONENT ::mongo::logger::LogComponent::kStorage
-
-#include "mongo/db/storage/mmap_v1/record_store_v1_capped.h"
-
-#include "mongo/db/client.h"
-#include "mongo/db/operation_context.h"
-#include "mongo/db/storage/mmap_v1/extent.h"
-#include "mongo/db/storage/mmap_v1/extent_manager.h"
-#include "mongo/db/storage/mmap_v1/mmap.h"
-#include "mongo/db/storage/mmap_v1/record.h"
-#include "mongo/db/storage/mmap_v1/record_store_v1_capped_iterator.h"
-#include "mongo/stdx/memory.h"
-#include "mongo/util/log.h"
-#include "mongo/util/mongoutils/str.h"
-
-/*
- capped collection layout
-
- d's below won't exist if things align perfectly:
-
- extent1 -> extent2 -> extent3
- ------------------- ----------------------- ---------------------
- d r r r r r r r r d d r r r r d r r r r r d d r r r r r r r r r d
- ^ ^
- oldest newest
-
- ^cappedFirstDeletedInCurExtent()
- ^cappedLastDelRecLastExtent()
- ^cappedListOfAllDeletedRecords()
-*/
-
-#define DDD(x)
-
-namespace mongo {
-
-using std::dec;
-using std::endl;
-using std::hex;
-using std::vector;
-
-CappedRecordStoreV1::CappedRecordStoreV1(OperationContext* opCtx,
- CappedCallback* collection,
- StringData ns,
- RecordStoreV1MetaData* details,
- ExtentManager* em,
- bool isSystemIndexes)
- : RecordStoreV1Base(ns, details, em, isSystemIndexes), _cappedCallback(collection) {
- DiskLoc extentLoc = details->firstExtent(opCtx);
- while (!extentLoc.isNull()) {
- _extentAdvice.push_back(_extentManager->cacheHint(extentLoc, ExtentManager::Sequential));
- Extent* extent = em->getExtent(extentLoc);
- extentLoc = extent->xnext;
- }
-
- // this is for VERY VERY old versions of capped collections
- cappedCheckMigrate(opCtx);
-}
-
-CappedRecordStoreV1::~CappedRecordStoreV1() {}
-
-StatusWith<DiskLoc> CappedRecordStoreV1::allocRecord(OperationContext* opCtx,
- int lenToAlloc,
- bool enforceQuota) {
- {
- // align very slightly.
- lenToAlloc = (lenToAlloc + 3) & 0xfffffffc;
- }
-
- if (lenToAlloc > theCapExtent()->length) {
- // the extent check is a way to try and improve performance
- // since we have to iterate all the extents (for now) to get
- // storage size
- if (lenToAlloc > storageSize(opCtx)) {
- return StatusWith<DiskLoc>(
- ErrorCodes::DocTooLargeForCapped,
- mongoutils::str::stream() << "document is larger than capped size " << lenToAlloc
- << " > "
- << storageSize(opCtx));
- }
- }
- DiskLoc loc;
- { // do allocation
-
- // signal done allocating new extents.
- if (!cappedLastDelRecLastExtent().isValid())
- setLastDelRecLastExtent(opCtx, DiskLoc());
-
- invariant(lenToAlloc < 400000000);
- int passes = 0;
-
- // delete records until we have room and the max # objects limit achieved.
-
- /* this fails on a rename -- that is ok but must keep commented out */
- // invariant( theCapExtent()->ns == ns );
-
- theCapExtent()->assertOk();
- DiskLoc firstEmptyExtent; // This prevents us from infinite looping.
- while (1) {
- if (_details->numRecords() < _details->maxCappedDocs()) {
- loc = __capAlloc(opCtx, lenToAlloc);
- if (!loc.isNull())
- break;
- }
-
- // If on first iteration through extents, don't delete anything.
- if (!_details->capFirstNewRecord().isValid()) {
- advanceCapExtent(opCtx, _ns);
-
- if (_details->capExtent() != _details->firstExtent(opCtx))
- _details->setCapFirstNewRecord(opCtx, DiskLoc().setInvalid());
- // else signal done with first iteration through extents.
- continue;
- }
-
- if (!_details->capFirstNewRecord().isNull() &&
- theCapExtent()->firstRecord == _details->capFirstNewRecord()) {
- // We've deleted all records that were allocated on the previous
- // iteration through this extent.
- advanceCapExtent(opCtx, _ns);
- continue;
- }
-
- if (theCapExtent()->firstRecord.isNull()) {
- if (firstEmptyExtent.isNull())
- firstEmptyExtent = _details->capExtent();
- advanceCapExtent(opCtx, _ns);
- if (firstEmptyExtent == _details->capExtent()) {
- // All records have been deleted but there is still no room for this record.
- // Nothing we can do but fail.
- _maybeComplain(opCtx, lenToAlloc);
- return StatusWith<DiskLoc>(ErrorCodes::DocTooLargeForCapped,
- str::stream()
- << "document doesn't fit in capped collection."
- << " size: "
- << lenToAlloc
- << " storageSize:"
- << storageSize(opCtx));
- }
- continue;
- }
-
- const RecordId fr = theCapExtent()->firstRecord.toRecordId();
- Status status = _cappedCallback->aboutToDeleteCapped(opCtx, fr, dataFor(opCtx, fr));
- if (!status.isOK())
- return StatusWith<DiskLoc>(status);
- deleteRecord(opCtx, fr);
-
- _compact(opCtx);
- if ((++passes % 5000) == 0) {
- StringBuilder sb;
- log() << "passes = " << passes << " in CappedRecordStoreV1::allocRecord:"
- << " ns: " << _ns << ", lenToAlloc: " << lenToAlloc
- << ", maxCappedDocs: " << _details->maxCappedDocs()
- << ", nrecords: " << _details->numRecords()
- << ", datasize: " << _details->dataSize()
- << ". Continuing to delete old records to make room.";
- }
- }
-
- // Remember first record allocated on this iteration through capExtent.
- if (_details->capFirstNewRecord().isValid() && _details->capFirstNewRecord().isNull())
- _details->setCapFirstNewRecord(opCtx, loc);
- }
-
- invariant(!loc.isNull());
-
- // possibly slice up if we've allocated too much space
-
- DeletedRecord* r = drec(loc);
-
- /* note we want to grab from the front so our next pointers on disk tend
- to go in a forward direction which is important for performance. */
- int regionlen = r->lengthWithHeaders();
- invariant(r->extentOfs() < loc.getOfs());
-
- int left = regionlen - lenToAlloc;
-
- /* split off some for further use. */
- opCtx->recoveryUnit()->writingInt(r->lengthWithHeaders()) = lenToAlloc;
- DiskLoc newDelLoc = loc;
- newDelLoc.inc(lenToAlloc);
- DeletedRecord* newDel = drec(newDelLoc);
- DeletedRecord* newDelW = opCtx->recoveryUnit()->writing(newDel);
- newDelW->extentOfs() = r->extentOfs();
- newDelW->lengthWithHeaders() = left;
- newDelW->nextDeleted().Null();
-
- addDeletedRec(opCtx, newDelLoc);
-
- return StatusWith<DiskLoc>(loc);
-}
-
-Status CappedRecordStoreV1::truncate(OperationContext* opCtx) {
- setLastDelRecLastExtent(opCtx, DiskLoc());
- setListOfAllDeletedRecords(opCtx, DiskLoc());
-
- // preserve firstExtent/lastExtent
- _details->setCapExtent(opCtx, _details->firstExtent(opCtx));
- _details->setStats(opCtx, 0, 0);
- // preserve lastExtentSize
- // nIndexes preserve 0
- // capped preserve true
- // max preserve
- // paddingFactor is unused
- _details->setCapFirstNewRecord(opCtx, DiskLoc().setInvalid());
- setLastDelRecLastExtent(opCtx, DiskLoc().setInvalid());
- // dataFileVersion preserve
- // indexFileVersion preserve
-
- // Reset all existing extents and recreate the deleted list.
- Extent* ext;
- for (DiskLoc extLoc = _details->firstExtent(opCtx); !extLoc.isNull(); extLoc = ext->xnext) {
- ext = _extentManager->getExtent(extLoc);
-
- opCtx->recoveryUnit()->writing(&ext->firstRecord)->Null();
- opCtx->recoveryUnit()->writing(&ext->lastRecord)->Null();
-
- addDeletedRec(opCtx, _findFirstSpot(opCtx, extLoc, ext));
- }
-
- return Status::OK();
-}
-
-void CappedRecordStoreV1::cappedTruncateAfter(OperationContext* opCtx,
- RecordId end,
- bool inclusive) {
- cappedTruncateAfter(opCtx, _ns.c_str(), DiskLoc::fromRecordId(end), inclusive);
-}
-
-/* combine adjacent deleted records *for the current extent* of the capped collection
-
- this is O(n^2) but we call it for capped tables where typically n==1 or 2!
- (or 3...there will be a little unused sliver at the end of the extent.)
-*/
-void CappedRecordStoreV1::_compact(OperationContext* opCtx) {
- DDD("CappedRecordStoreV1::compact enter");
-
- vector<DiskLoc> drecs;
-
- // Pull out capExtent's DRs from deletedList
- DiskLoc i = cappedFirstDeletedInCurExtent();
- for (; !i.isNull() && inCapExtent(i); i = deletedRecordFor(i)->nextDeleted()) {
- DDD("\t" << i);
- drecs.push_back(i);
- }
-
- setFirstDeletedInCurExtent(opCtx, i);
-
- std::sort(drecs.begin(), drecs.end());
- DDD("\t drecs.size(): " << drecs.size());
-
- vector<DiskLoc>::const_iterator j = drecs.begin();
- invariant(j != drecs.end());
- DiskLoc a = *j;
- while (1) {
- j++;
- if (j == drecs.end()) {
- DDD("\t compact adddelrec");
- addDeletedRec(opCtx, a);
- break;
- }
- DiskLoc b = *j;
- while (a.a() == b.a() && a.getOfs() + drec(a)->lengthWithHeaders() == b.getOfs()) {
- // a & b are adjacent. merge.
- opCtx->recoveryUnit()->writingInt(drec(a)->lengthWithHeaders()) +=
- drec(b)->lengthWithHeaders();
- j++;
- if (j == drecs.end()) {
- DDD("\t compact adddelrec2");
- addDeletedRec(opCtx, a);
- return;
- }
- b = *j;
- }
- DDD("\t compact adddelrec3");
- addDeletedRec(opCtx, a);
- a = b;
- }
-}
-
-DiskLoc CappedRecordStoreV1::cappedFirstDeletedInCurExtent() const {
- if (cappedLastDelRecLastExtent().isNull())
- return cappedListOfAllDeletedRecords();
- else
- return drec(cappedLastDelRecLastExtent())->nextDeleted();
-}
-
-void CappedRecordStoreV1::setFirstDeletedInCurExtent(OperationContext* opCtx, const DiskLoc& loc) {
- if (cappedLastDelRecLastExtent().isNull())
- setListOfAllDeletedRecords(opCtx, loc);
- else
- *opCtx->recoveryUnit()->writing(&drec(cappedLastDelRecLastExtent())->nextDeleted()) = loc;
-}
-
-void CappedRecordStoreV1::cappedCheckMigrate(OperationContext* opCtx) {
- // migrate old RecordStoreV1MetaData format
- if (_details->capExtent().a() == 0 && _details->capExtent().getOfs() == 0) {
- WriteUnitOfWork wunit(opCtx);
- _details->setCapFirstNewRecord(opCtx, DiskLoc().setInvalid());
- // put all the DeletedRecords in cappedListOfAllDeletedRecords()
- for (int i = 1; i < Buckets; ++i) {
- DiskLoc first = _details->deletedListEntry(i);
- if (first.isNull())
- continue;
- DiskLoc last = first;
- for (; !drec(last)->nextDeleted().isNull(); last = drec(last)->nextDeleted())
- ;
- *opCtx->recoveryUnit()->writing(&drec(last)->nextDeleted()) =
- cappedListOfAllDeletedRecords();
- setListOfAllDeletedRecords(opCtx, first);
- _details->setDeletedListEntry(opCtx, i, DiskLoc());
- }
- // NOTE cappedLastDelRecLastExtent() set to DiskLoc() in above
-
- // Last, in case we're killed before getting here
- _details->setCapExtent(opCtx, _details->firstExtent(opCtx));
- wunit.commit();
- }
-}
-
-bool CappedRecordStoreV1::inCapExtent(const DiskLoc& dl) const {
- invariant(!dl.isNull());
-
- if (dl.a() != _details->capExtent().a())
- return false;
-
- if (dl.getOfs() < _details->capExtent().getOfs())
- return false;
-
- const Extent* e = theCapExtent();
- int end = _details->capExtent().getOfs() + e->length;
- return dl.getOfs() <= end;
-}
-
-bool CappedRecordStoreV1::nextIsInCapExtent(const DiskLoc& dl) const {
- invariant(!dl.isNull());
- DiskLoc next = drec(dl)->nextDeleted();
- if (next.isNull())
- return false;
- return inCapExtent(next);
-}
-
-void CappedRecordStoreV1::advanceCapExtent(OperationContext* opCtx, StringData ns) {
- // We want cappedLastDelRecLastExtent() to be the last DeletedRecord of the prev cap extent
- // (or DiskLoc() if new capExtent == firstExtent)
- if (_details->capExtent() == _details->lastExtent(opCtx))
- setLastDelRecLastExtent(opCtx, DiskLoc());
- else {
- DiskLoc i = cappedFirstDeletedInCurExtent();
- for (; !i.isNull() && nextIsInCapExtent(i); i = drec(i)->nextDeleted())
- ;
- setLastDelRecLastExtent(opCtx, i);
- }
-
- _details->setCapExtent(opCtx,
- theCapExtent()->xnext.isNull() ? _details->firstExtent(opCtx)
- : theCapExtent()->xnext);
-
- /* this isn't true if a collection has been renamed...that is ok just used for diagnostics */
- // dassert( theCapExtent()->ns == ns );
-
- theCapExtent()->assertOk();
- _details->setCapFirstNewRecord(opCtx, DiskLoc());
-}
-
-DiskLoc CappedRecordStoreV1::__capAlloc(OperationContext* opCtx, int len) {
- DiskLoc prev = cappedLastDelRecLastExtent();
- DiskLoc i = cappedFirstDeletedInCurExtent();
- DiskLoc ret;
- for (; !i.isNull() && inCapExtent(i); prev = i, i = drec(i)->nextDeleted()) {
- // We need to keep at least one DR per extent in cappedListOfAllDeletedRecords(),
- // so make sure there's space to create a DR at the end.
- if (drec(i)->lengthWithHeaders() >= len + 24) {
- ret = i;
- break;
- }
- }
-
- /* unlink ourself from the deleted list */
- if (!ret.isNull()) {
- if (prev.isNull())
- setListOfAllDeletedRecords(opCtx, drec(ret)->nextDeleted());
- else
- *opCtx->recoveryUnit()->writing(&drec(prev)->nextDeleted()) = drec(ret)->nextDeleted();
- *opCtx->recoveryUnit()->writing(&drec(ret)->nextDeleted()) =
- DiskLoc().setInvalid(); // defensive.
- invariant(drec(ret)->extentOfs() < ret.getOfs());
- }
-
- return ret;
-}
-
-void CappedRecordStoreV1::cappedTruncateLastDelUpdate(OperationContext* opCtx) {
- if (_details->capExtent() == _details->firstExtent(opCtx)) {
- // Only one extent of the collection is in use, so there
- // is no deleted record in a previous extent, so nullify
- // cappedLastDelRecLastExtent().
- setLastDelRecLastExtent(opCtx, DiskLoc());
- } else {
- // Scan through all deleted records in the collection
- // until the last deleted record for the extent prior
- // to the new capExtent is found. Then set
- // cappedLastDelRecLastExtent() to that deleted record.
- DiskLoc i = cappedListOfAllDeletedRecords();
- for (; !drec(i)->nextDeleted().isNull() && !inCapExtent(drec(i)->nextDeleted());
- i = drec(i)->nextDeleted())
- ;
- // In our capped storage model, every extent must have at least one
- // deleted record. Here we check that 'i' is not the last deleted
- // record. (We expect that there will be deleted records in the new
- // capExtent as well.)
- invariant(!drec(i)->nextDeleted().isNull());
- setLastDelRecLastExtent(opCtx, i);
- }
-}
-
-void CappedRecordStoreV1::cappedTruncateAfter(OperationContext* opCtx,
- const char* ns,
- DiskLoc end,
- bool inclusive) {
- invariant(cappedLastDelRecLastExtent().isValid());
-
- // We iteratively remove the newest document until the newest document
- // is 'end', then we remove 'end' if requested.
- bool foundLast = false;
- while (1) {
- if (foundLast) {
- // 'end' has been found and removed, so break.
- break;
- }
- // 'curr' will point to the newest document in the collection.
- const DiskLoc curr = theCapExtent()->lastRecord;
- const RecordId currId = curr.toRecordId();
- invariant(!curr.isNull());
- if (curr == end) {
- if (inclusive) {
- // 'end' has been found, so break next iteration.
- foundLast = true;
- } else {
- // 'end' has been found, so break.
- break;
- }
- }
-
- // TODO The algorithm used in this function cannot generate an
- // empty collection, but we could call emptyCappedCollection() in
- // this case instead of asserting.
- uassert(13415, "emptying the collection is not allowed", _details->numRecords() > 1);
-
- WriteUnitOfWork wunit(opCtx);
- // Delete the newest record, and coalesce the new deleted
- // record with existing deleted records.
- Status status = _cappedCallback->aboutToDeleteCapped(opCtx, currId, dataFor(opCtx, currId));
- uassertStatusOK(status);
- deleteRecord(opCtx, currId);
- _compact(opCtx);
-
- // This is the case where we have not yet had to remove any
- // documents to make room for other documents, and we are allocating
- // documents from free space in fresh extents instead of reusing
- // space from familiar extents.
- if (!_details->capLooped()) {
- // We just removed the last record from the 'capExtent', and
- // the 'capExtent' can't be empty, so we set 'capExtent' to
- // capExtent's prev extent.
- if (theCapExtent()->lastRecord.isNull()) {
- invariant(!theCapExtent()->xprev.isNull());
- // NOTE Because we didn't delete the last document, and
- // capLooped() is false, capExtent is not the first extent
- // so xprev will be nonnull.
- _details->setCapExtent(opCtx, theCapExtent()->xprev);
- theCapExtent()->assertOk();
-
- // update cappedLastDelRecLastExtent()
- cappedTruncateLastDelUpdate(opCtx);
- }
- wunit.commit();
- continue;
- }
-
- // This is the case where capLooped() is true, and we just deleted
- // from capExtent, and we just deleted capFirstNewRecord, which was
- // the last record on the fresh side of capExtent.
- // NOTE In this comparison, curr and potentially capFirstNewRecord
- // may point to invalid data, but we can still compare the
- // references themselves.
- if (curr == _details->capFirstNewRecord()) {
- // Set 'capExtent' to the first nonempty extent prior to the
- // initial capExtent. There must be such an extent because we
- // have not deleted the last document in the collection. It is
- // possible that all extents other than the capExtent are empty.
- // In this case we will keep the initial capExtent and specify
- // that all records contained within are on the fresh rather than
- // stale side of the extent.
- DiskLoc newCapExtent = _details->capExtent();
- do {
- // Find the previous extent, looping if necessary.
- newCapExtent = (newCapExtent == _details->firstExtent(opCtx))
- ? _details->lastExtent(opCtx)
- : _extentManager->getExtent(newCapExtent)->xprev;
- _extentManager->getExtent(newCapExtent)->assertOk();
- } while (_extentManager->getExtent(newCapExtent)->firstRecord.isNull());
- _details->setCapExtent(opCtx, newCapExtent);
-
- // Place all documents in the new capExtent on the fresh side
- // of the capExtent by setting capFirstNewRecord to the first
- // document in the new capExtent.
- _details->setCapFirstNewRecord(opCtx, theCapExtent()->firstRecord);
-
- // update cappedLastDelRecLastExtent()
- cappedTruncateLastDelUpdate(opCtx);
- }
-
- wunit.commit();
- }
-}
-
-DiskLoc CappedRecordStoreV1::cappedListOfAllDeletedRecords() const {
- return _details->deletedListEntry(0);
-}
-
-void CappedRecordStoreV1::setListOfAllDeletedRecords(OperationContext* opCtx, const DiskLoc& loc) {
- return _details->setDeletedListEntry(opCtx, 0, loc);
-}
-
-DiskLoc CappedRecordStoreV1::cappedLastDelRecLastExtent() const {
- return _details->deletedListEntry(1);
-}
-
-void CappedRecordStoreV1::setLastDelRecLastExtent(OperationContext* opCtx, const DiskLoc& loc) {
- return _details->setDeletedListEntry(opCtx, 1, loc);
-}
-
-Extent* CappedRecordStoreV1::theCapExtent() const {
- return _extentManager->getExtent(_details->capExtent());
-}
-
-void CappedRecordStoreV1::addDeletedRec(OperationContext* opCtx, const DiskLoc& dloc) {
- DeletedRecord* d = opCtx->recoveryUnit()->writing(drec(dloc));
-
- if (!cappedLastDelRecLastExtent().isValid()) {
- // Initial extent allocation. Insert at end.
- d->nextDeleted() = DiskLoc();
- if (cappedListOfAllDeletedRecords().isNull())
- setListOfAllDeletedRecords(opCtx, dloc);
- else {
- DiskLoc i = cappedListOfAllDeletedRecords();
- for (; !drec(i)->nextDeleted().isNull(); i = drec(i)->nextDeleted())
- ;
- *opCtx->recoveryUnit()->writing(&drec(i)->nextDeleted()) = dloc;
- }
- } else {
- d->nextDeleted() = cappedFirstDeletedInCurExtent();
- setFirstDeletedInCurExtent(opCtx, dloc);
- // always _compact() after this so order doesn't matter
- }
-}
-
-std::unique_ptr<SeekableRecordCursor> CappedRecordStoreV1::getCursor(OperationContext* opCtx,
- bool forward) const {
- return stdx::make_unique<CappedRecordStoreV1Iterator>(opCtx, this, forward);
-}
-
-vector<std::unique_ptr<RecordCursor>> CappedRecordStoreV1::getManyCursors(
- OperationContext* opCtx) const {
- vector<std::unique_ptr<RecordCursor>> cursors;
-
- if (!_details->capLooped()) {
- // if we haven't looped yet, just spit out all extents (same as non-capped impl)
- const Extent* ext;
- for (DiskLoc extLoc = details()->firstExtent(opCtx); !extLoc.isNull();
- extLoc = ext->xnext) {
- ext = _getExtent(opCtx, extLoc);
- if (ext->firstRecord.isNull())
- continue;
-
- cursors.push_back(stdx::make_unique<RecordStoreV1Base::IntraExtentIterator>(
- opCtx, ext->firstRecord, this));
- }
- } else {
- // if we've looped we need to iterate the extents, starting and ending with the
- // capExtent
- const DiskLoc capExtent = details()->capExtent();
- invariant(!capExtent.isNull());
- invariant(capExtent.isValid());
-
- // First do the "old" portion of capExtent if there is any
- DiskLoc extLoc = capExtent;
- {
- const Extent* ext = _getExtent(opCtx, extLoc);
- if (ext->firstRecord != details()->capFirstNewRecord()) {
- // this means there is old data in capExtent
- cursors.push_back(stdx::make_unique<RecordStoreV1Base::IntraExtentIterator>(
- opCtx, ext->firstRecord, this));
- }
-
- extLoc = ext->xnext.isNull() ? details()->firstExtent(opCtx) : ext->xnext;
- }
-
- // Next handle all the other extents
- while (extLoc != capExtent) {
- const Extent* ext = _getExtent(opCtx, extLoc);
- cursors.push_back(stdx::make_unique<RecordStoreV1Base::IntraExtentIterator>(
- opCtx, ext->firstRecord, this));
-
- extLoc = ext->xnext.isNull() ? details()->firstExtent(opCtx) : ext->xnext;
- }
-
- // Finally handle the "new" data in the capExtent
- cursors.push_back(stdx::make_unique<RecordStoreV1Base::IntraExtentIterator>(
- opCtx, details()->capFirstNewRecord(), this));
- }
-
- return cursors;
-}
-
-void CappedRecordStoreV1::_maybeComplain(OperationContext* opCtx, int len) const {
- RARELY {
- std::stringstream buf;
- buf << "couldn't make room for record len: " << len << " in capped ns " << _ns << '\n';
- buf << "numRecords: " << numRecords(opCtx) << '\n';
- int i = 0;
- for (DiskLoc e = _details->firstExtent(opCtx); !e.isNull();
- e = _extentManager->getExtent(e)->xnext, ++i) {
- buf << " Extent " << i;
- if (e == _details->capExtent())
- buf << " (capExtent)";
- buf << ' ' << e;
- buf << '\n';
-
- buf << " magic: " << hex << _extentManager->getExtent(e)->magic << dec
- << " extent->ns: " << _extentManager->getExtent(e)->nsDiagnostic.toString() << '\n';
- buf << " fr: " << _extentManager->getExtent(e)->firstRecord.toString()
- << " lr: " << _extentManager->getExtent(e)->lastRecord.toString()
- << " extent->len: " << _extentManager->getExtent(e)->length << '\n';
- }
-
- warning() << buf.str();
-
- // assume it is unusually large record; if not, something is broken
- fassert(17438, len * 5 > _details->lastExtentSize(opCtx));
- }
-}
-
-DiskLoc CappedRecordStoreV1::firstRecord(OperationContext* opCtx,
- const DiskLoc& startExtent) const {
- for (DiskLoc i = startExtent.isNull() ? _details->firstExtent(opCtx) : startExtent; !i.isNull();
- i = _extentManager->getExtent(i)->xnext) {
- Extent* e = _extentManager->getExtent(i);
-
- if (!e->firstRecord.isNull())
- return e->firstRecord;
- }
- return DiskLoc();
-}
-
-DiskLoc CappedRecordStoreV1::lastRecord(OperationContext* opCtx, const DiskLoc& startExtent) const {
- for (DiskLoc i = startExtent.isNull() ? _details->lastExtent(opCtx) : startExtent; !i.isNull();
- i = _extentManager->getExtent(i)->xprev) {
- Extent* e = _extentManager->getExtent(i);
- if (!e->lastRecord.isNull())
- return e->lastRecord;
- }
- return DiskLoc();
-}
-}
diff --git a/src/mongo/db/storage/mmap_v1/record_store_v1_capped.h b/src/mongo/db/storage/mmap_v1/record_store_v1_capped.h
deleted file mode 100644
index d74fc7c65ea..00000000000
--- a/src/mongo/db/storage/mmap_v1/record_store_v1_capped.h
+++ /dev/null
@@ -1,129 +0,0 @@
-// record_store_v1_capped.h
-
-/**
-* Copyright (C) 2013 10gen Inc.
-*
-* This program is free software: you can redistribute it and/or modify
-* it under the terms of the GNU Affero General Public License, version 3,
-* as published by the Free Software Foundation.
-*
-* This program is distributed in the hope that it will be useful,
-* but WITHOUT ANY WARRANTY; without even the implied warranty of
-* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-* GNU Affero General Public License for more details.
-*
-* You should have received a copy of the GNU Affero General Public License
-* along with this program. If not, see <http://www.gnu.org/licenses/>.
-*
-* As a special exception, the copyright holders give permission to link the
-* code of portions of this program with the OpenSSL library under certain
-* conditions as described in each individual source file and distribute
-* linked combinations including the program with the OpenSSL library. You
-* must comply with the GNU Affero General Public License in all respects for
-* all of the code used other than as permitted herein. If you modify file(s)
-* with this exception, you may extend this exception to your version of the
-* file(s), but you are not obligated to do so. If you do not wish to do so,
-* delete this exception statement from your version. If you delete this
-* exception statement from all source files in the program, then also delete
-* it in the license file.
-*/
-
-#pragma once
-
-#include "mongo/base/owned_pointer_vector.h"
-#include "mongo/db/storage/capped_callback.h"
-#include "mongo/db/storage/mmap_v1/diskloc.h"
-#include "mongo/db/storage/mmap_v1/extent_manager.h"
-#include "mongo/db/storage/mmap_v1/record_store_v1_base.h"
-
-namespace mongo {
-
-class CappedRecordStoreV1 final : public RecordStoreV1Base {
-public:
- CappedRecordStoreV1(OperationContext* opCtx,
- CappedCallback* collection,
- StringData ns,
- RecordStoreV1MetaData* details,
- ExtentManager* em,
- bool isSystemIndexes);
-
- ~CappedRecordStoreV1() final;
-
- const char* name() const final {
- return "CappedRecordStoreV1";
- }
-
- Status truncate(OperationContext* opCtx) final;
-
- /**
- * Truncate documents newer than the document at 'end' from the capped
- * collection. The collection cannot be completely emptied using this
- * function. An assertion will be thrown if that is attempted.
- * @param inclusive - Truncate 'end' as well iff true
- */
- void cappedTruncateAfter(OperationContext* opCtx, RecordId end, bool inclusive) final;
-
- std::unique_ptr<SeekableRecordCursor> getCursor(OperationContext* opCtx,
- bool forward) const final;
-
- std::vector<std::unique_ptr<RecordCursor>> getManyCursors(OperationContext* opCtx) const final;
-
- // Start from firstExtent by default.
- DiskLoc firstRecord(OperationContext* opCtx, const DiskLoc& startExtent = DiskLoc()) const;
- // Start from lastExtent by default.
- DiskLoc lastRecord(OperationContext* opCtx, const DiskLoc& startExtent = DiskLoc()) const;
-
-protected:
- bool isCapped() const final {
- return true;
- }
- bool shouldPadInserts() const final {
- return false;
- }
-
- void setCappedCallback(CappedCallback* cb) final {
- _cappedCallback = cb;
- }
-
- StatusWith<DiskLoc> allocRecord(OperationContext* opCtx,
- int lengthWithHeaders,
- bool enforceQuota) final;
-
- void addDeletedRec(OperationContext* opCtx, const DiskLoc& dloc) final;
-
-private:
- // -- start copy from cap.cpp --
- void _compact(OperationContext* opCtx);
- DiskLoc cappedFirstDeletedInCurExtent() const;
- void setFirstDeletedInCurExtent(OperationContext* opCtx, const DiskLoc& loc);
- void cappedCheckMigrate(OperationContext* opCtx);
- DiskLoc __capAlloc(OperationContext* opCtx, int len);
- bool inCapExtent(const DiskLoc& dl) const;
- DiskLoc cappedListOfAllDeletedRecords() const;
- DiskLoc cappedLastDelRecLastExtent() const;
- void setListOfAllDeletedRecords(OperationContext* opCtx, const DiskLoc& loc);
- void setLastDelRecLastExtent(OperationContext* opCtx, const DiskLoc& loc);
- Extent* theCapExtent() const;
- bool nextIsInCapExtent(const DiskLoc& dl) const;
- void advanceCapExtent(OperationContext* opCtx, StringData ns);
- void cappedTruncateLastDelUpdate(OperationContext* opCtx);
-
- /**
- * Truncate documents newer than the document at 'end' from the capped
- * collection. The collection cannot be completely emptied using this
- * function. An assertion will be thrown if that is attempted.
- * @param inclusive - Truncate 'end' as well iff true
- */
- void cappedTruncateAfter(OperationContext* opCtx, const char* ns, DiskLoc end, bool inclusive);
-
- void _maybeComplain(OperationContext* opCtx, int len) const;
-
- // -- end copy from cap.cpp --
-
- CappedCallback* _cappedCallback;
-
- OwnedPointerVector<ExtentManager::CacheHint> _extentAdvice;
-
- friend class CappedRecordStoreV1Iterator;
-};
-}
diff --git a/src/mongo/db/storage/mmap_v1/record_store_v1_capped_iterator.cpp b/src/mongo/db/storage/mmap_v1/record_store_v1_capped_iterator.cpp
deleted file mode 100644
index 20324ffe5ee..00000000000
--- a/src/mongo/db/storage/mmap_v1/record_store_v1_capped_iterator.cpp
+++ /dev/null
@@ -1,215 +0,0 @@
-/**
- * Copyright (C) 2013 10gen Inc.
- *
- * This program is free software: you can redistribute it and/or modify
- * it under the terms of the GNU Affero General Public License, version 3,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU Affero General Public License for more details.
- *
- * You should have received a copy of the GNU Affero General Public License
- * along with this program. If not, see <http://www.gnu.org/licenses/>.
- *
- * As a special exception, the copyright holders give permission to link the
- * code of portions of this program with the OpenSSL library under certain
- * conditions as described in each individual source file and distribute
- * linked combinations including the program with the OpenSSL library. You
- * must comply with the GNU Affero General Public License in all respects for
- * all of the code used other than as permitted herein. If you modify file(s)
- * with this exception, you may extend this exception to your version of the
- * file(s), but you are not obligated to do so. If you do not wish to do so,
- * delete this exception statement from your version. If you delete this
- * exception statement from all source files in the program, then also delete
- * it in the license file.
- */
-
-#include "mongo/db/storage/mmap_v1/record_store_v1_capped_iterator.h"
-
-#include "mongo/db/catalog/collection.h"
-#include "mongo/db/storage/mmap_v1/extent.h"
-#include "mongo/db/storage/mmap_v1/extent_manager.h"
-#include "mongo/db/storage/mmap_v1/record_store_v1_capped.h"
-
-namespace mongo {
-
-
-//
-// Capped collection traversal
-//
-CappedRecordStoreV1Iterator::CappedRecordStoreV1Iterator(OperationContext* opCtx,
- const CappedRecordStoreV1* collection,
- bool forward)
- : _opCtx(opCtx), _recordStore(collection), _forward(forward) {
- const RecordStoreV1MetaData* nsd = _recordStore->details();
-
- // If a start position isn't specified, we fill one out from the start of the
- // collection.
- if (_forward) {
- // Going forwards.
- if (!nsd->capLooped()) {
- // If our capped collection doesn't loop around, the first record is easy.
- _curr = collection->firstRecord(_opCtx);
- } else {
- // Our capped collection has "looped' around.
- // Copied verbatim from ForwardCappedCursor::init.
- // TODO ELABORATE
- _curr = _getExtent(nsd->capExtent())->firstRecord;
- if (!_curr.isNull() && _curr == nsd->capFirstNewRecord()) {
- _curr = _getExtent(nsd->capExtent())->lastRecord;
- _curr = nextLoop(_curr);
- }
- }
- } else {
- // Going backwards
- if (!nsd->capLooped()) {
- // Start at the end.
- _curr = collection->lastRecord(_opCtx);
- } else {
- _curr = _getExtent(nsd->capExtent())->lastRecord;
- }
- }
-}
-
-boost::optional<Record> CappedRecordStoreV1Iterator::next() {
- if (isEOF())
- return {};
- auto toReturn = _curr.toRecordId();
- _curr = getNextCapped(_curr);
- return {{toReturn, _recordStore->RecordStore::dataFor(_opCtx, toReturn)}};
-}
-
-boost::optional<Record> CappedRecordStoreV1Iterator::seekExact(const RecordId& id) {
- _curr = getNextCapped(DiskLoc::fromRecordId(id));
- return {{id, _recordStore->RecordStore::dataFor(_opCtx, id)}};
-}
-
-void CappedRecordStoreV1Iterator::invalidate(OperationContext* opCtx, const RecordId& id) {
- const DiskLoc dl = DiskLoc::fromRecordId(id);
- if (dl == _curr) {
- // We *could* move to the next thing, since there is actually a next
- // thing, but according to clientcursor.cpp:
- // "note we cannot advance here. if this condition occurs, writes to the oplog
- // have "caught" the reader. skipping ahead, the reader would miss potentially
- // important data."
- // We don't really need to worry about rollback here, as the very next write would
- // invalidate the cursor anyway.
- _curr = DiskLoc();
- _killedByInvalidate = true;
- }
-}
-
-void CappedRecordStoreV1Iterator::save() {}
-
-bool CappedRecordStoreV1Iterator::restore() {
- return !_killedByInvalidate;
-}
-
-DiskLoc CappedRecordStoreV1Iterator::getNextCapped(const DiskLoc& dl) {
- invariant(!dl.isNull());
- const RecordStoreV1MetaData* details = _recordStore->details();
-
- if (_forward) {
- // If it's not looped, it's easy.
- if (!_recordStore->details()->capLooped()) {
- return _getNextRecord(dl);
- }
-
- // TODO ELABORATE
- // EOF.
- if (dl == _getExtent(details->capExtent())->lastRecord) {
- return DiskLoc();
- }
-
- DiskLoc ret = nextLoop(dl);
-
- // If we become capFirstNewRecord from same extent, advance to next extent.
- if (ret == details->capFirstNewRecord() &&
- ret != _getExtent(details->capExtent())->firstRecord) {
- ret = nextLoop(_getExtent(details->capExtent())->lastRecord);
- }
-
- // If we have just gotten to beginning of capExtent, skip to capFirstNewRecord
- if (ret == _getExtent(details->capExtent())->firstRecord) {
- ret = details->capFirstNewRecord();
- }
-
- return ret;
- } else {
- if (!details->capLooped()) {
- return _getPrevRecord(dl);
- }
-
- // TODO ELABORATE
- // Last record
- if (details->capFirstNewRecord() == _getExtent(details->capExtent())->firstRecord) {
- if (dl == nextLoop(_getExtent(details->capExtent())->lastRecord)) {
- return DiskLoc();
- }
- } else {
- if (dl == _getExtent(details->capExtent())->firstRecord) {
- return DiskLoc();
- }
- }
-
- DiskLoc ret;
- // If we are capFirstNewRecord, advance to prev extent, otherwise just get prev.
- if (dl == details->capFirstNewRecord()) {
- ret = prevLoop(_getExtent(details->capExtent())->firstRecord);
- } else {
- ret = prevLoop(dl);
- }
-
- // If we just became last in cap extent, advance past capFirstNewRecord
- // (We know ext(capExtent)->firstRecord != capFirstNewRecord, since would
- // have returned DiskLoc() earlier otherwise.)
- if (ret == _getExtent(details->capExtent())->lastRecord) {
- ret = _getPrevRecord(details->capFirstNewRecord());
- }
-
- return ret;
- }
-}
-
-DiskLoc CappedRecordStoreV1Iterator::nextLoop(const DiskLoc& prev) {
- // TODO ELABORATE
- DiskLoc next = _getNextRecord(prev);
- if (!next.isNull()) {
- return next;
- }
- return _recordStore->firstRecord(_opCtx);
-}
-
-DiskLoc CappedRecordStoreV1Iterator::prevLoop(const DiskLoc& curr) {
- // TODO ELABORATE
- DiskLoc prev = _getPrevRecord(curr);
- if (!prev.isNull()) {
- return prev;
- }
- return _recordStore->lastRecord(_opCtx);
-}
-
-
-Extent* CappedRecordStoreV1Iterator::_getExtent(const DiskLoc& loc) {
- return _recordStore->_extentManager->getExtent(loc);
-}
-
-DiskLoc CappedRecordStoreV1Iterator::_getNextRecord(const DiskLoc& loc) {
- return _recordStore->getNextRecord(_opCtx, loc);
-}
-
-DiskLoc CappedRecordStoreV1Iterator::_getPrevRecord(const DiskLoc& loc) {
- return _recordStore->getPrevRecord(_opCtx, loc);
-}
-
-std::unique_ptr<RecordFetcher> CappedRecordStoreV1Iterator::fetcherForNext() const {
- return _recordStore->_extentManager->recordNeedsFetch(_curr);
-}
-
-std::unique_ptr<RecordFetcher> CappedRecordStoreV1Iterator::fetcherForId(const RecordId& id) const {
- return _recordStore->_extentManager->recordNeedsFetch(DiskLoc::fromRecordId(id));
-}
-
-} // namespace mongo
diff --git a/src/mongo/db/storage/mmap_v1/record_store_v1_capped_iterator.h b/src/mongo/db/storage/mmap_v1/record_store_v1_capped_iterator.h
deleted file mode 100644
index 08065109c3f..00000000000
--- a/src/mongo/db/storage/mmap_v1/record_store_v1_capped_iterator.h
+++ /dev/null
@@ -1,98 +0,0 @@
-/**
- * Copyright (C) 2013 10gen Inc.
- *
- * This program is free software: you can redistribute it and/or modify
- * it under the terms of the GNU Affero General Public License, version 3,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU Affero General Public License for more details.
- *
- * You should have received a copy of the GNU Affero General Public License
- * along with this program. If not, see <http://www.gnu.org/licenses/>.
- *
- * As a special exception, the copyright holders give permission to link the
- * code of portions of this program with the OpenSSL library under certain
- * conditions as described in each individual source file and distribute
- * linked combinations including the program with the OpenSSL library. You
- * must comply with the GNU Affero General Public License in all respects for
- * all of the code used other than as permitted herein. If you modify file(s)
- * with this exception, you may extend this exception to your version of the
- * file(s), but you are not obligated to do so. If you do not wish to do so,
- * delete this exception statement from your version. If you delete this
- * exception statement from all source files in the program, then also delete
- * it in the license file.
- */
-
-#pragma once
-
-#include "mongo/db/storage/mmap_v1/diskloc.h"
-#include "mongo/db/storage/record_store.h"
-
-namespace mongo {
-
-class CappedRecordStoreV1;
-
-struct Extent;
-
-/**
- * This class iterates over a capped collection identified by 'ns'.
- * The collection must exist when the constructor is called.
- */
-class CappedRecordStoreV1Iterator final : public SeekableRecordCursor {
-public:
- CappedRecordStoreV1Iterator(OperationContext* opCtx,
- const CappedRecordStoreV1* collection,
- bool forward);
-
- boost::optional<Record> next() final;
- boost::optional<Record> seekExact(const RecordId& id) final;
- void save() final;
- bool restore() final;
- void detachFromOperationContext() final {
- _opCtx = nullptr;
- }
- void reattachToOperationContext(OperationContext* opCtx) final {
- _opCtx = opCtx;
- }
- void invalidate(OperationContext* opCtx, const RecordId& dl) final;
- std::unique_ptr<RecordFetcher> fetcherForNext() const final;
- std::unique_ptr<RecordFetcher> fetcherForId(const RecordId& id) const final;
-
-private:
- void advance();
- bool isEOF() {
- return _curr.isNull();
- }
-
- /**
- * Internal collection navigation helper methods.
- */
- DiskLoc getNextCapped(const DiskLoc& dl);
- DiskLoc prevLoop(const DiskLoc& curr);
- DiskLoc nextLoop(const DiskLoc& prev);
-
- // some helpers - these move to RecordStore probably
- Extent* _getExtent(const DiskLoc& loc);
- DiskLoc _getNextRecord(const DiskLoc& loc);
- DiskLoc _getPrevRecord(const DiskLoc& loc);
-
- // transactional context for read locks. Not owned by us
- OperationContext* _opCtx;
-
- // The collection we're iterating over.
- const CappedRecordStoreV1* const _recordStore;
-
- // The result returned on the next call to getNext().
- DiskLoc _curr;
-
- const bool _forward;
-
- // If invalidate kills the DiskLoc we need to move forward, we kill the iterator. See the
- // comment in the body of invalidate(...).
- bool _killedByInvalidate = false;
-};
-
-} // namespace mongo
diff --git a/src/mongo/db/storage/mmap_v1/record_store_v1_capped_test.cpp b/src/mongo/db/storage/mmap_v1/record_store_v1_capped_test.cpp
deleted file mode 100644
index 280ad6ccee0..00000000000
--- a/src/mongo/db/storage/mmap_v1/record_store_v1_capped_test.cpp
+++ /dev/null
@@ -1,797 +0,0 @@
-// record_store_v1_capped_test.cpp
-
-/**
- * Copyright (C) 2014 MongoDB Inc.
- *
- * This program is free software: you can redistribute it and/or modify
- * it under the terms of the GNU Affero General Public License, version 3,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU Affero General Public License for more details.
- *
- * You should have received a copy of the GNU Affero General Public License
- * along with this program. If not, see <http://www.gnu.org/licenses/>.
- *
- * As a special exception, the copyright holders give permission to link the
- * code of portions of this program with the OpenSSL library under certain
- * conditions as described in each individual source file and distribute
- * linked combinations including the program with the OpenSSL library. You
- * must comply with the GNU Affero General Public License in all respects for
- * all of the code used other than as permitted herein. If you modify file(s)
- * with this exception, you may extend this exception to your version of the
- * file(s), but you are not obligated to do so. If you do not wish to do so,
- * delete this exception statement from your version. If you delete this
- * exception statement from all source files in the program, then also delete
- * it in the license file.
- */
-
-#include "mongo/db/storage/mmap_v1/record_store_v1_capped.h"
-#include "mongo/db/storage/mmap_v1/record_store_v1_capped_iterator.h"
-
-#include "mongo/db/operation_context_noop.h"
-#include "mongo/db/storage/mmap_v1/extent.h"
-#include "mongo/db/storage/mmap_v1/record.h"
-#include "mongo/db/storage/mmap_v1/record_store_v1_test_help.h"
-
-#include "mongo/unittest/unittest.h"
-
-using namespace mongo;
-
-namespace {
-
-using std::string;
-using std::vector;
-
-// Provides data to be inserted. Must be large enough for largest possible record.
-// Should be in BSS so unused portions should be free.
-char zeros[20 * 1024 * 1024] = {};
-
-class DummyCappedCallback : public CappedCallback {
-public:
- Status aboutToDeleteCapped(OperationContext* opCtx, const RecordId& loc, RecordData data) {
- deleted.push_back(DiskLoc::fromRecordId(loc));
- return Status::OK();
- }
-
- bool haveCappedWaiters() {
- return false;
- }
- void notifyCappedWaitersIfNeeded() {}
-
- vector<DiskLoc> deleted;
-};
-
-void simpleInsertTest(const char* buf, int size) {
- OperationContextNoop opCtx;
- DummyExtentManager em;
- DummyRecordStoreV1MetaData* md = new DummyRecordStoreV1MetaData(true, 0);
- DummyCappedCallback cb;
-
- string myns = "test.simple1";
- CappedRecordStoreV1 rs(&opCtx, &cb, myns, md, &em, false);
-
- rs.increaseStorageSize(&opCtx, 1024, false);
-
- ASSERT_NOT_OK(rs.insertRecord(&opCtx, buf, 3, Timestamp(), true).getStatus());
-
- ASSERT_OK(rs.insertRecord(&opCtx, buf, size, Timestamp(), true).getStatus());
-
- {
- BSONObjBuilder b;
- int64_t storageSize = rs.storageSize(&opCtx, &b);
- BSONObj obj = b.obj();
- ASSERT_EQUALS(1, obj["numExtents"].numberInt());
- ASSERT_EQUALS(storageSize, em.quantizeExtentSize(1024));
- }
-
- for (int i = 0; i < 1000; i++) {
- ASSERT_OK(rs.insertRecord(&opCtx, buf, size, Timestamp(), true).getStatus());
- }
-
- long long start = md->numRecords();
- for (int i = 0; i < 1000; i++) {
- ASSERT_OK(rs.insertRecord(&opCtx, buf, size, Timestamp(), true).getStatus());
- }
- ASSERT_EQUALS(start, md->numRecords());
- ASSERT_GREATER_THAN(start, 100);
- ASSERT_LESS_THAN(start, 1000);
-}
-
-TEST(CappedRecordStoreV1, SimpleInsertSize4) {
- simpleInsertTest("abcd", 4);
-}
-TEST(CappedRecordStoreV1, SimpleInsertSize8) {
- simpleInsertTest("abcdefgh", 8);
-}
-
-TEST(CappedRecordStoreV1, EmptySingleExtent) {
- OperationContextNoop opCtx;
- DummyExtentManager em;
- DummyRecordStoreV1MetaData* md = new DummyRecordStoreV1MetaData(true, 0);
- DummyCappedCallback cb;
- CappedRecordStoreV1 rs(&opCtx, &cb, "test.foo", md, &em, false);
-
- {
- LocAndSize records[] = {{}};
- LocAndSize drecs[] = {{DiskLoc(0, 1000), 1000}, {}};
- md->setCapExtent(&opCtx, DiskLoc(0, 0));
- md->setCapFirstNewRecord(&opCtx, DiskLoc().setInvalid());
- initializeV1RS(&opCtx, records, drecs, NULL, &em, md);
- }
-
- ASSERT_OK(
- rs.insertRecord(&opCtx, zeros, 100 - MmapV1RecordHeader::HeaderSize, Timestamp(), false)
- .getStatus());
-
-
- {
- LocAndSize recs[] = {{DiskLoc(0, 1000), 100}, {}};
- LocAndSize drecs[] = {{DiskLoc(0, 1100), 900}, {}};
- assertStateV1RS(&opCtx, recs, drecs, NULL, &em, md);
- ASSERT_EQUALS(md->capExtent(), DiskLoc(0, 0));
- ASSERT_EQUALS(md->capFirstNewRecord(), DiskLoc().setInvalid()); // unlooped
- }
-}
-
-TEST(CappedRecordStoreV1, FirstLoopWithSingleExtentExactSize) {
- OperationContextNoop opCtx;
- DummyExtentManager em;
- DummyRecordStoreV1MetaData* md = new DummyRecordStoreV1MetaData(true, 0);
- DummyCappedCallback cb;
- CappedRecordStoreV1 rs(&opCtx, &cb, "test.foo", md, &em, false);
-
- {
- LocAndSize records[] = {{DiskLoc(0, 1000), 100},
- {DiskLoc(0, 1100), 100},
- {DiskLoc(0, 1200), 100},
- {DiskLoc(0, 1300), 100},
- {DiskLoc(0, 1400), 100},
- {}};
- LocAndSize drecs[] = {{DiskLoc(0, 1500), 50}, {}};
- md->setCapExtent(&opCtx, DiskLoc(0, 0));
- md->setCapFirstNewRecord(&opCtx, DiskLoc().setInvalid()); // unlooped
- initializeV1RS(&opCtx, records, drecs, NULL, &em, md);
- }
-
- ASSERT_OK(
- rs.insertRecord(&opCtx, zeros, 100 - MmapV1RecordHeader::HeaderSize, Timestamp(), false)
- .getStatus());
-
- {
- LocAndSize recs[] = {{DiskLoc(0, 1200), 100}, // first old record
- {DiskLoc(0, 1300), 100},
- {DiskLoc(0, 1400), 100}, // last old record
- {DiskLoc(0, 1000), 100}, // first new record
- {}};
- LocAndSize drecs[] = {
- {DiskLoc(0, 1100), 100}, // gap after newest record XXX this is probably a bug
- {DiskLoc(0, 1500), 50}, // gap at end of extent
- {}};
- assertStateV1RS(&opCtx, recs, drecs, NULL, &em, md);
- ASSERT_EQUALS(md->capExtent(), DiskLoc(0, 0));
- ASSERT_EQUALS(md->capFirstNewRecord(), DiskLoc(0, 1000));
- }
-}
-
-TEST(CappedRecordStoreV1, NonFirstLoopWithSingleExtentExactSize) {
- OperationContextNoop opCtx;
- DummyExtentManager em;
- DummyRecordStoreV1MetaData* md = new DummyRecordStoreV1MetaData(true, 0);
- DummyCappedCallback cb;
- CappedRecordStoreV1 rs(&opCtx, &cb, "test.foo", md, &em, false);
-
- {
- LocAndSize records[] = {{DiskLoc(0, 1000), 100},
- {DiskLoc(0, 1100), 100},
- {DiskLoc(0, 1200), 100},
- {DiskLoc(0, 1300), 100},
- {DiskLoc(0, 1400), 100},
- {}};
- LocAndSize drecs[] = {{DiskLoc(0, 1500), 50}, {}};
- md->setCapExtent(&opCtx, DiskLoc(0, 0));
- md->setCapFirstNewRecord(&opCtx, DiskLoc(0, 1000));
- initializeV1RS(&opCtx, records, drecs, NULL, &em, md);
- }
-
- ASSERT_OK(
- rs.insertRecord(&opCtx, zeros, 100 - MmapV1RecordHeader::HeaderSize, Timestamp(), false)
- .getStatus());
-
- {
- LocAndSize recs[] = {{DiskLoc(0, 1200), 100}, // first old record
- {DiskLoc(0, 1300), 100},
- {DiskLoc(0, 1400), 100}, // last old record
- {DiskLoc(0, 1000), 100}, // first new record
- {}};
- LocAndSize drecs[] = {
- {DiskLoc(0, 1100), 100}, // gap after newest record XXX this is probably a bug
- {DiskLoc(0, 1500), 50}, // gap at end of extent
- {}};
- assertStateV1RS(&opCtx, recs, drecs, NULL, &em, md);
- ASSERT_EQUALS(md->capExtent(), DiskLoc(0, 0));
- ASSERT_EQUALS(md->capFirstNewRecord(), DiskLoc(0, 1000));
- }
-}
-
-/**
- * Current code always tries to leave 24 bytes to create a DeletedRecord.
- */
-TEST(CappedRecordStoreV1, WillLoopWithout24SpareBytes) {
- OperationContextNoop opCtx;
- DummyExtentManager em;
- DummyRecordStoreV1MetaData* md = new DummyRecordStoreV1MetaData(true, 0);
- DummyCappedCallback cb;
- CappedRecordStoreV1 rs(&opCtx, &cb, "test.foo", md, &em, false);
-
- {
- LocAndSize records[] = {{DiskLoc(0, 1000), 100},
- {DiskLoc(0, 1100), 100},
- {DiskLoc(0, 1200), 100},
- {DiskLoc(0, 1300), 100},
- {DiskLoc(0, 1400), 100},
- {}};
- LocAndSize drecs[] = {{DiskLoc(0, 1500), 123}, {}};
- md->setCapExtent(&opCtx, DiskLoc(0, 0));
- md->setCapFirstNewRecord(&opCtx, DiskLoc(0, 1000));
- initializeV1RS(&opCtx, records, drecs, NULL, &em, md);
- }
-
- ASSERT_OK(
- rs.insertRecord(&opCtx, zeros, 100 - MmapV1RecordHeader::HeaderSize, Timestamp(), false)
- .getStatus());
-
- {
- LocAndSize recs[] = {{DiskLoc(0, 1200), 100}, // first old record
- {DiskLoc(0, 1300), 100},
- {DiskLoc(0, 1400), 100}, // last old record
- {DiskLoc(0, 1000), 100}, // first new record
- {}};
- LocAndSize drecs[] = {{DiskLoc(0, 1100), 100}, // gap after newest record
- {DiskLoc(0, 1500), 123}, // gap at end of extent
- {}};
- assertStateV1RS(&opCtx, recs, drecs, NULL, &em, md);
- ASSERT_EQUALS(md->capExtent(), DiskLoc(0, 0));
- ASSERT_EQUALS(md->capFirstNewRecord(), DiskLoc(0, 1000));
- }
-}
-
-TEST(CappedRecordStoreV1, WontLoopWith24SpareBytes) {
- OperationContextNoop opCtx;
- DummyExtentManager em;
- DummyRecordStoreV1MetaData* md = new DummyRecordStoreV1MetaData(true, 0);
- DummyCappedCallback cb;
- CappedRecordStoreV1 rs(&opCtx, &cb, "test.foo", md, &em, false);
-
- {
- LocAndSize records[] = {{DiskLoc(0, 1000), 100},
- {DiskLoc(0, 1100), 100},
- {DiskLoc(0, 1200), 100},
- {DiskLoc(0, 1300), 100},
- {DiskLoc(0, 1400), 100},
- {}};
- LocAndSize drecs[] = {{DiskLoc(0, 1500), 124}, {}};
- md->setCapExtent(&opCtx, DiskLoc(0, 0));
- md->setCapFirstNewRecord(&opCtx, DiskLoc(0, 1000));
- initializeV1RS(&opCtx, records, drecs, NULL, &em, md);
- }
-
- ASSERT_OK(
- rs.insertRecord(&opCtx, zeros, 100 - MmapV1RecordHeader::HeaderSize, Timestamp(), false)
- .getStatus());
-
- {
- LocAndSize recs[] = {{DiskLoc(0, 1000), 100},
- {DiskLoc(0, 1100), 100},
- {DiskLoc(0, 1200), 100},
- {DiskLoc(0, 1300), 100},
- {DiskLoc(0, 1400), 100},
- {DiskLoc(0, 1500), 100},
- {}};
- LocAndSize drecs[] = {{DiskLoc(0, 1600), 24}, // gap at end of extent
- {}};
- assertStateV1RS(&opCtx, recs, drecs, NULL, &em, md);
- ASSERT_EQUALS(md->capExtent(), DiskLoc(0, 0));
- ASSERT_EQUALS(md->capFirstNewRecord(), DiskLoc(0, 1000));
- }
-}
-
-TEST(CappedRecordStoreV1, MoveToSecondExtentUnLooped) {
- OperationContextNoop opCtx;
- DummyExtentManager em;
- DummyRecordStoreV1MetaData* md = new DummyRecordStoreV1MetaData(true, 0);
- DummyCappedCallback cb;
- CappedRecordStoreV1 rs(&opCtx, &cb, "test.foo", md, &em, false);
-
- {
- // Two extents, each with 1000 bytes.
- LocAndSize records[] = {
- {DiskLoc(0, 1000), 500}, {DiskLoc(0, 1500), 300}, {DiskLoc(0, 1800), 100}, {}};
- LocAndSize drecs[] = {{DiskLoc(0, 1900), 100}, {DiskLoc(1, 1000), 1000}, {}};
- md->setCapExtent(&opCtx, DiskLoc(0, 0));
- md->setCapFirstNewRecord(&opCtx, DiskLoc().setInvalid());
- initializeV1RS(&opCtx, records, drecs, NULL, &em, md);
- }
-
- ASSERT_OK(
- rs.insertRecord(&opCtx, zeros, 100 - MmapV1RecordHeader::HeaderSize, Timestamp(), false)
- .getStatus());
-
- {
- LocAndSize recs[] = {{DiskLoc(0, 1000), 500},
- {DiskLoc(0, 1500), 300},
- {DiskLoc(0, 1800), 100},
-
- {DiskLoc(1, 1000), 100},
- {}};
- LocAndSize drecs[] = {{DiskLoc(0, 1900), 100}, {DiskLoc(1, 1100), 900}, {}};
- assertStateV1RS(&opCtx, recs, drecs, NULL, &em, md);
- ASSERT_EQUALS(md->capExtent(), DiskLoc(1, 0));
- ASSERT_EQUALS(md->capFirstNewRecord(), DiskLoc().setInvalid()); // unlooped
- }
-}
-
-TEST(CappedRecordStoreV1, MoveToSecondExtentLooped) {
- OperationContextNoop opCtx;
- DummyExtentManager em;
- DummyRecordStoreV1MetaData* md = new DummyRecordStoreV1MetaData(true, 0);
- DummyCappedCallback cb;
- CappedRecordStoreV1 rs(&opCtx, &cb, "test.foo", md, &em, false);
-
- {
- // Two extents, each with 1000 bytes.
- LocAndSize records[] = {{DiskLoc(0, 1800), 100}, // old
- {DiskLoc(0, 1000), 500}, // first new
- {DiskLoc(0, 1500), 400},
-
- {DiskLoc(1, 1000), 300},
- {DiskLoc(1, 1300), 600},
- {}};
- LocAndSize drecs[] = {{DiskLoc(0, 1900), 100}, {DiskLoc(1, 1900), 100}, {}};
- md->setCapExtent(&opCtx, DiskLoc(0, 0));
- md->setCapFirstNewRecord(&opCtx, DiskLoc(0, 1000));
- initializeV1RS(&opCtx, records, drecs, NULL, &em, md);
- }
-
- ASSERT_OK(
- rs.insertRecord(&opCtx, zeros, 200 - MmapV1RecordHeader::HeaderSize, Timestamp(), false)
- .getStatus());
-
- {
- LocAndSize recs[] = {{DiskLoc(0, 1000), 500},
- {DiskLoc(0, 1500), 400},
-
- {DiskLoc(1, 1300), 600}, // old
- {DiskLoc(1, 1000), 200}, // first new
- {}};
- LocAndSize drecs[] = {
- {DiskLoc(0, 1800), 200}, {DiskLoc(1, 1200), 100}, {DiskLoc(1, 1900), 100}, {}};
- assertStateV1RS(&opCtx, recs, drecs, NULL, &em, md);
- ASSERT_EQUALS(md->capExtent(), DiskLoc(1, 0));
- ASSERT_EQUALS(md->capFirstNewRecord(), DiskLoc(1, 1000));
- }
-}
-
-// Larger than storageSize (fails early)
-TEST(CappedRecordStoreV1, OversizedRecordHuge) {
- OperationContextNoop opCtx;
- DummyExtentManager em;
- DummyRecordStoreV1MetaData* md = new DummyRecordStoreV1MetaData(true, 0);
- DummyCappedCallback cb;
- CappedRecordStoreV1 rs(&opCtx, &cb, "test.foo", md, &em, false);
-
- {
- LocAndSize records[] = {{}};
- LocAndSize drecs[] = {{DiskLoc(0, 1000), 1000}, {}};
- md->setCapExtent(&opCtx, DiskLoc(0, 0));
- md->setCapFirstNewRecord(&opCtx, DiskLoc().setInvalid());
- initializeV1RS(&opCtx, records, drecs, NULL, &em, md);
- }
-
- StatusWith<RecordId> status = rs.insertRecord(&opCtx, zeros, 16000, Timestamp(), false);
- ASSERT_EQUALS(status.getStatus(), ErrorCodes::DocTooLargeForCapped);
- ASSERT_STRING_CONTAINS(status.getStatus().reason(), "larger than capped size");
-}
-
-// Smaller than storageSize, but larger than usable space (fails late)
-TEST(CappedRecordStoreV1, OversizedRecordMedium) {
- OperationContextNoop opCtx;
- DummyExtentManager em;
- DummyRecordStoreV1MetaData* md = new DummyRecordStoreV1MetaData(true, 0);
- DummyCappedCallback cb;
- CappedRecordStoreV1 rs(&opCtx, &cb, "test.foo", md, &em, false);
-
- {
- LocAndSize records[] = {{}};
- LocAndSize drecs[] = {{DiskLoc(0, 1000), 1000}, {}};
- md->setCapExtent(&opCtx, DiskLoc(0, 0));
- md->setCapFirstNewRecord(&opCtx, DiskLoc().setInvalid());
- initializeV1RS(&opCtx, records, drecs, NULL, &em, md);
- }
-
- StatusWith<RecordId> status =
- rs.insertRecord(&opCtx, zeros, 1004 - MmapV1RecordHeader::HeaderSize, Timestamp(), false);
- ASSERT_EQUALS(status.getStatus(), ErrorCodes::DocTooLargeForCapped);
- ASSERT_STRING_CONTAINS(status.getStatus().reason(), "doesn't fit");
-}
-
-//
-// XXX The CappedRecordStoreV1Scrambler suite of tests describe existing behavior that is less
-// than ideal. Any improved implementation will need to be able to handle a collection that has
-// been scrambled like this.
-//
-
-/**
- * This is a minimal example that shows the current allocator laying out records out-of-order.
- */
-TEST(CappedRecordStoreV1Scrambler, Minimal) {
- OperationContextNoop opCtx;
- DummyExtentManager em;
- DummyRecordStoreV1MetaData* md = new DummyRecordStoreV1MetaData(true, 0);
- DummyCappedCallback cb;
- CappedRecordStoreV1 rs(&opCtx, &cb, "test.foo", md, &em, false);
-
- {
- // Starting with a single empty 1000 byte extent.
- LocAndSize records[] = {{}};
- LocAndSize drecs[] = {{DiskLoc(0, 1000), 1000}, {}};
- md->setCapExtent(&opCtx, DiskLoc(0, 0));
- md->setCapFirstNewRecord(&opCtx, DiskLoc().setInvalid()); // unlooped
- initializeV1RS(&opCtx, records, drecs, NULL, &em, md);
- }
-
- ASSERT_OK(
- rs.insertRecord(&opCtx, zeros, 500 - MmapV1RecordHeader::HeaderSize, Timestamp(), false)
- .getStatus());
- ASSERT_OK(
- rs.insertRecord(&opCtx, zeros, 300 - MmapV1RecordHeader::HeaderSize, Timestamp(), false)
- .getStatus());
- ASSERT_OK(
- rs.insertRecord(&opCtx, zeros, 400 - MmapV1RecordHeader::HeaderSize, Timestamp(), false)
- .getStatus()); // won't fit at end so wraps
- ASSERT_OK(
- rs.insertRecord(&opCtx, zeros, 120 - MmapV1RecordHeader::HeaderSize, Timestamp(), false)
- .getStatus()); // fits at end
- ASSERT_OK(
- rs.insertRecord(&opCtx, zeros, 60 - MmapV1RecordHeader::HeaderSize, Timestamp(), false)
- .getStatus()); // fits in earlier hole
-
- {
- LocAndSize recs[] = {{DiskLoc(0, 1500), 300}, // 2nd insert
- {DiskLoc(0, 1000), 400}, // 3rd (1st new)
- {DiskLoc(0, 1800), 120}, // 4th
- {DiskLoc(0, 1400), 60}, // 5th
- {}};
- LocAndSize drecs[] = {{DiskLoc(0, 1460), 40}, {DiskLoc(0, 1920), 80}, {}};
- assertStateV1RS(&opCtx, recs, drecs, NULL, &em, md);
- ASSERT_EQUALS(md->capExtent(), DiskLoc(0, 0));
- ASSERT_EQUALS(md->capFirstNewRecord(), DiskLoc(0, 1000));
- }
-}
-
-/**
- * This tests a specially crafted set of inserts that scrambles a capped collection in a way
- * that leaves 4 deleted records in a single extent.
- */
-TEST(CappedRecordStoreV1Scrambler, FourDeletedRecordsInSingleExtent) {
- OperationContextNoop opCtx;
- DummyExtentManager em;
- DummyRecordStoreV1MetaData* md = new DummyRecordStoreV1MetaData(true, 0);
- DummyCappedCallback cb;
- CappedRecordStoreV1 rs(&opCtx, &cb, "test.foo", md, &em, false);
-
- {
- // Starting with a single empty 1000 byte extent.
- LocAndSize records[] = {{}};
- LocAndSize drecs[] = {{DiskLoc(0, 1000), 1000}, {}};
- md->setCapExtent(&opCtx, DiskLoc(0, 0));
- md->setCapFirstNewRecord(&opCtx, DiskLoc().setInvalid()); // unlooped
- initializeV1RS(&opCtx, records, drecs, NULL, &em, md);
- }
-
- // This list of sizes was empirically generated to achieve this outcome. Don't think too
- // much about them.
- ASSERT_OK(
- rs.insertRecord(&opCtx, zeros, 500 - MmapV1RecordHeader::HeaderSize, Timestamp(), false)
- .getStatus());
- ASSERT_OK(
- rs.insertRecord(&opCtx, zeros, 300 - MmapV1RecordHeader::HeaderSize, Timestamp(), false)
- .getStatus());
- ASSERT_OK(
- rs.insertRecord(&opCtx, zeros, 304 - MmapV1RecordHeader::HeaderSize, Timestamp(), false)
- .getStatus());
- ASSERT_OK(
- rs.insertRecord(&opCtx, zeros, 76 - MmapV1RecordHeader::HeaderSize, Timestamp(), false)
- .getStatus());
- ASSERT_OK(
- rs.insertRecord(&opCtx, zeros, 100 - MmapV1RecordHeader::HeaderSize, Timestamp(), false)
- .getStatus());
- ASSERT_OK(
- rs.insertRecord(&opCtx, zeros, 96 - MmapV1RecordHeader::HeaderSize, Timestamp(), false)
- .getStatus());
- ASSERT_OK(
- rs.insertRecord(&opCtx, zeros, 76 - MmapV1RecordHeader::HeaderSize, Timestamp(), false)
- .getStatus());
- ASSERT_OK(
- rs.insertRecord(&opCtx, zeros, 200 - MmapV1RecordHeader::HeaderSize, Timestamp(), false)
- .getStatus());
- ASSERT_OK(
- rs.insertRecord(&opCtx, zeros, 100 - MmapV1RecordHeader::HeaderSize, Timestamp(), false)
- .getStatus());
- ASSERT_OK(
- rs.insertRecord(&opCtx, zeros, 100 - MmapV1RecordHeader::HeaderSize, Timestamp(), false)
- .getStatus());
- ASSERT_OK(
- rs.insertRecord(&opCtx, zeros, 200 - MmapV1RecordHeader::HeaderSize, Timestamp(), false)
- .getStatus());
- ASSERT_OK(
- rs.insertRecord(&opCtx, zeros, 56 - MmapV1RecordHeader::HeaderSize, Timestamp(), false)
- .getStatus());
- ASSERT_OK(
- rs.insertRecord(&opCtx, zeros, 100 - MmapV1RecordHeader::HeaderSize, Timestamp(), false)
- .getStatus());
- ASSERT_OK(
- rs.insertRecord(&opCtx, zeros, 96 - MmapV1RecordHeader::HeaderSize, Timestamp(), false)
- .getStatus());
- ASSERT_OK(
- rs.insertRecord(&opCtx, zeros, 104 - MmapV1RecordHeader::HeaderSize, Timestamp(), false)
- .getStatus());
- ASSERT_OK(
- rs.insertRecord(&opCtx, zeros, 96 - MmapV1RecordHeader::HeaderSize, Timestamp(), false)
- .getStatus());
- ASSERT_OK(
- rs.insertRecord(&opCtx, zeros, 60 - MmapV1RecordHeader::HeaderSize, Timestamp(), false)
- .getStatus());
- ASSERT_OK(
- rs.insertRecord(&opCtx, zeros, 60 - MmapV1RecordHeader::HeaderSize, Timestamp(), false)
- .getStatus());
- ASSERT_OK(
- rs.insertRecord(&opCtx, zeros, 146 - MmapV1RecordHeader::HeaderSize, Timestamp(), false)
- .getStatus());
- ASSERT_OK(
- rs.insertRecord(&opCtx, zeros, 146 - MmapV1RecordHeader::HeaderSize, Timestamp(), false)
- .getStatus());
- ASSERT_OK(
- rs.insertRecord(&opCtx, zeros, 40 - MmapV1RecordHeader::HeaderSize, Timestamp(), false)
- .getStatus());
- ASSERT_OK(
- rs.insertRecord(&opCtx, zeros, 40 - MmapV1RecordHeader::HeaderSize, Timestamp(), false)
- .getStatus());
- ASSERT_OK(
- rs.insertRecord(&opCtx, zeros, 36 - MmapV1RecordHeader::HeaderSize, Timestamp(), false)
- .getStatus());
- ASSERT_OK(
- rs.insertRecord(&opCtx, zeros, 100 - MmapV1RecordHeader::HeaderSize, Timestamp(), false)
- .getStatus());
- ASSERT_OK(
- rs.insertRecord(&opCtx, zeros, 96 - MmapV1RecordHeader::HeaderSize, Timestamp(), false)
- .getStatus());
- ASSERT_OK(
- rs.insertRecord(&opCtx, zeros, 200 - MmapV1RecordHeader::HeaderSize, Timestamp(), false)
- .getStatus());
- ASSERT_OK(
- rs.insertRecord(&opCtx, zeros, 60 - MmapV1RecordHeader::HeaderSize, Timestamp(), false)
- .getStatus());
- ASSERT_OK(
- rs.insertRecord(&opCtx, zeros, 64 - MmapV1RecordHeader::HeaderSize, Timestamp(), false)
- .getStatus());
-
- {
- LocAndSize recs[] = {{DiskLoc(0, 1148), 148},
- {DiskLoc(0, 1936), 40},
- {DiskLoc(0, 1712), 40},
- {DiskLoc(0, 1296), 36},
- {DiskLoc(0, 1752), 100},
- {DiskLoc(0, 1332), 96},
- {DiskLoc(0, 1428), 200},
- {DiskLoc(0, 1852), 60},
- {DiskLoc(0, 1000), 64}, // (1st new)
- {}};
- LocAndSize drecs[] = {{DiskLoc(0, 1064), 84},
- {DiskLoc(0, 1976), 24},
- {DiskLoc(0, 1912), 24},
- {DiskLoc(0, 1628), 84},
- {}};
- assertStateV1RS(&opCtx, recs, drecs, NULL, &em, md);
- ASSERT_EQUALS(md->capExtent(), DiskLoc(0, 0));
- ASSERT_EQUALS(md->capFirstNewRecord(), DiskLoc(0, 1000));
- }
-}
-
-//
-// The CappedRecordStoreV1QueryStage tests some nitty-gritty capped
-// collection details. Ported and polished from pdfiletests.cpp.
-//
-
-class CollscanHelper {
-public:
- CollscanHelper(int nExtents)
- : md(new DummyRecordStoreV1MetaData(true, 0)), rs(&opCtx, &cb, ns(), md, &em, false) {
- LocAndSize recs[] = {{}};
- LocAndSize drecs[8];
- ASSERT_LESS_THAN(nExtents, 8);
- for (int j = 0; j < nExtents; ++j) {
- drecs[j].loc = DiskLoc(j, 1000);
- drecs[j].size = 1000;
- }
- drecs[nExtents].loc = DiskLoc();
- drecs[nExtents].size = 0;
-
- md->setCapExtent(&opCtx, DiskLoc(0, 0));
- md->setCapFirstNewRecord(&opCtx, DiskLoc().setInvalid()); // unlooped
- initializeV1RS(&opCtx, recs, drecs, NULL, &em, md);
- }
-
- // Insert bypasses standard alloc/insert routines to use the extent we want.
- // TODO: Directly declare resulting record store state instead of procedurally creating it
- DiskLoc insert(const DiskLoc& ext, int i) {
- // Copied verbatim.
- BSONObjBuilder b;
- b.append("a", i);
- BSONObj o = b.done();
- int len = o.objsize();
- Extent* e = em.getExtent(ext);
- e = opCtx.recoveryUnit()->writing(e);
- int ofs;
- if (e->lastRecord.isNull()) {
- ofs = ext.getOfs() + (e->_extentData - (char*)e);
- } else {
- ofs = e->lastRecord.getOfs() + em.recordForV1(e->lastRecord)->lengthWithHeaders();
- }
- DiskLoc dl(ext.a(), ofs);
- MmapV1RecordHeader* r = em.recordForV1(dl);
- r = (MmapV1RecordHeader*)opCtx.recoveryUnit()->writingPtr(
- r, MmapV1RecordHeader::HeaderSize + len);
- r->lengthWithHeaders() = MmapV1RecordHeader::HeaderSize + len;
- r->extentOfs() = e->myLoc.getOfs();
- r->nextOfs() = DiskLoc::NullOfs;
- r->prevOfs() = e->lastRecord.isNull() ? DiskLoc::NullOfs : e->lastRecord.getOfs();
- memcpy(r->data(), o.objdata(), len);
- if (e->firstRecord.isNull())
- e->firstRecord = dl;
- else
- opCtx.recoveryUnit()->writingInt(em.recordForV1(e->lastRecord)->nextOfs()) = ofs;
- e->lastRecord = dl;
- return dl;
- }
-
- // TODO: Directly assert the desired record store state instead of just walking it
- void walkAndCount(int expectedCount) {
- // Walk the collection going forward.
- {
- CappedRecordStoreV1Iterator cursor(&opCtx, &rs, /*forward=*/true);
- int resultCount = 0;
- while (auto record = cursor.next()) {
- ++resultCount;
- }
-
- ASSERT_EQUALS(resultCount, expectedCount);
- }
-
- // Walk the collection going backwards.
- {
- CappedRecordStoreV1Iterator cursor(&opCtx, &rs, /*forward=*/false);
- int resultCount = expectedCount;
- while (auto record = cursor.next()) {
- --resultCount;
- }
-
- ASSERT_EQUALS(resultCount, 0);
- }
- }
-
- static const char* ns() {
- return "unittests.QueryStageCollectionScanCapped";
- }
-
- OperationContextNoop opCtx;
- DummyRecordStoreV1MetaData* md;
- DummyExtentManager em;
-
-private:
- DummyCappedCallback cb;
- CappedRecordStoreV1 rs;
-};
-
-
-TEST(CappedRecordStoreV1QueryStage, CollscanCappedBase) {
- CollscanHelper h(1);
- h.walkAndCount(0);
-}
-
-TEST(CappedRecordStoreV1QueryStage, CollscanEmptyLooped) {
- CollscanHelper h(1);
- h.md->setCapFirstNewRecord(&h.opCtx, DiskLoc());
- h.walkAndCount(0);
-}
-
-TEST(CappedRecordStoreV1QueryStage, CollscanEmptyMultiExtentLooped) {
- CollscanHelper h(3);
- h.md->setCapFirstNewRecord(&h.opCtx, DiskLoc());
- h.walkAndCount(0);
-}
-
-TEST(CappedRecordStoreV1QueryStage, CollscanSingle) {
- CollscanHelper h(1);
-
- h.md->setCapFirstNewRecord(&h.opCtx, h.insert(h.md->capExtent(), 0));
- h.walkAndCount(1);
-}
-
-TEST(CappedRecordStoreV1QueryStage, CollscanNewCapFirst) {
- CollscanHelper h(1);
- DiskLoc x = h.insert(h.md->capExtent(), 0);
- h.md->setCapFirstNewRecord(&h.opCtx, x);
- h.insert(h.md->capExtent(), 1);
- h.walkAndCount(2);
-}
-
-TEST(CappedRecordStoreV1QueryStage, CollscanNewCapMiddle) {
- CollscanHelper h(1);
- h.insert(h.md->capExtent(), 0);
- h.md->setCapFirstNewRecord(&h.opCtx, h.insert(h.md->capExtent(), 1));
- h.insert(h.md->capExtent(), 2);
- h.walkAndCount(3);
-}
-
-TEST(CappedRecordStoreV1QueryStage, CollscanFirstExtent) {
- CollscanHelper h(2);
- h.insert(h.md->capExtent(), 0);
- h.insert(h.md->lastExtent(&h.opCtx), 1);
- h.md->setCapFirstNewRecord(&h.opCtx, h.insert(h.md->capExtent(), 2));
- h.insert(h.md->capExtent(), 3);
- h.walkAndCount(4);
-}
-
-TEST(CappedRecordStoreV1QueryStage, CollscanLastExtent) {
- CollscanHelper h(2);
- h.md->setCapExtent(&h.opCtx, h.md->lastExtent(&h.opCtx));
- h.insert(h.md->capExtent(), 0);
- h.insert(h.md->firstExtent(&h.opCtx), 1);
- h.md->setCapFirstNewRecord(&h.opCtx, h.insert(h.md->capExtent(), 2));
- h.insert(h.md->capExtent(), 3);
- h.walkAndCount(4);
-}
-
-TEST(CappedRecordStoreV1QueryStage, CollscanMidExtent) {
- CollscanHelper h(3);
- h.md->setCapExtent(&h.opCtx, h.em.getExtent(h.md->firstExtent(&h.opCtx))->xnext);
- h.insert(h.md->capExtent(), 0);
- h.insert(h.md->lastExtent(&h.opCtx), 1);
- h.insert(h.md->firstExtent(&h.opCtx), 2);
- h.md->setCapFirstNewRecord(&h.opCtx, h.insert(h.md->capExtent(), 3));
- h.insert(h.md->capExtent(), 4);
- h.walkAndCount(5);
-}
-
-TEST(CappedRecordStoreV1QueryStage, CollscanAloneInExtent) {
- CollscanHelper h(3);
- h.md->setCapExtent(&h.opCtx, h.em.getExtent(h.md->firstExtent(&h.opCtx))->xnext);
- h.insert(h.md->lastExtent(&h.opCtx), 0);
- h.insert(h.md->firstExtent(&h.opCtx), 1);
- h.md->setCapFirstNewRecord(&h.opCtx, h.insert(h.md->capExtent(), 2));
- h.walkAndCount(3);
-}
-
-TEST(CappedRecordStoreV1QueryStage, CollscanFirstInExtent) {
- CollscanHelper h(3);
- h.md->setCapExtent(&h.opCtx, h.em.getExtent(h.md->firstExtent(&h.opCtx))->xnext);
- h.insert(h.md->lastExtent(&h.opCtx), 0);
- h.insert(h.md->firstExtent(&h.opCtx), 1);
- h.md->setCapFirstNewRecord(&h.opCtx, h.insert(h.md->capExtent(), 2));
- h.insert(h.md->capExtent(), 3);
- h.walkAndCount(4);
-}
-
-TEST(CappedRecordStoreV1QueryStage, CollscanLastInExtent) {
- CollscanHelper h(3);
- h.md->setCapExtent(&h.opCtx, h.em.getExtent(h.md->firstExtent(&h.opCtx))->xnext);
- h.insert(h.md->capExtent(), 0);
- h.insert(h.md->lastExtent(&h.opCtx), 1);
- h.insert(h.md->firstExtent(&h.opCtx), 2);
- h.md->setCapFirstNewRecord(&h.opCtx, h.insert(h.md->capExtent(), 3));
- h.walkAndCount(4);
-}
-}
diff --git a/src/mongo/db/storage/mmap_v1/record_store_v1_repair_iterator.cpp b/src/mongo/db/storage/mmap_v1/record_store_v1_repair_iterator.cpp
deleted file mode 100644
index 872c29e112b..00000000000
--- a/src/mongo/db/storage/mmap_v1/record_store_v1_repair_iterator.cpp
+++ /dev/null
@@ -1,210 +0,0 @@
-/**
- * Copyright (C) 2014 10gen Inc.
- *
- * This program is free software: you can redistribute it and/or modify
- * it under the terms of the GNU Affero General Public License, version 3,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU Affero General Public License for more details.
- *
- * You should have received a copy of the GNU Affero General Public License
- * along with this program. If not, see <http://www.gnu.org/licenses/>.
- *
- * As a special exception, the copyright holders give permission to link the
- * code of portions of this program with the OpenSSL library under certain
- * conditions as described in each individual source file and distribute
- * linked combinations including the program with the OpenSSL library. You
- * must comply with the GNU Affero General Public License in all respects for
- * all of the code used other than as permitted herein. If you modify file(s)
- * with this exception, you may extend this exception to your version of the
- * file(s), but you are not obligated to do so. If you do not wish to do so,
- * delete this exception statement from your version. If you delete this
- * exception statement from all source files in the program, then also delete
- * it in the license file.
- */
-
-#define MONGO_LOG_DEFAULT_COMPONENT ::mongo::logger::LogComponent::kStorage
-
-#include "mongo/db/storage/mmap_v1/record_store_v1_repair_iterator.h"
-
-#include "mongo/db/catalog/collection.h"
-#include "mongo/db/storage/mmap_v1/extent.h"
-#include "mongo/db/storage/mmap_v1/extent_manager.h"
-#include "mongo/db/storage/mmap_v1/record_store_v1_simple.h"
-#include "mongo/util/log.h"
-
-namespace mongo {
-
-using std::endl;
-
-RecordStoreV1RepairCursor::RecordStoreV1RepairCursor(OperationContext* opCtx,
- const RecordStoreV1Base* recordStore)
- : _opCtx(opCtx), _recordStore(recordStore), _stage(FORWARD_SCAN) {
- // Position the iterator at the first record
- //
- advance();
-}
-
-boost::optional<Record> RecordStoreV1RepairCursor::next() {
- if (_currRecord.isNull())
- return {};
- auto out = _currRecord.toRecordId();
- advance();
- return {{out, _recordStore->dataFor(_opCtx, out)}};
-}
-
-void RecordStoreV1RepairCursor::advance() {
- const ExtentManager* em = _recordStore->_extentManager;
-
- while (true) {
- if (_currRecord.isNull()) {
- if (!_advanceToNextValidExtent()) {
- return;
- }
-
- _seenInCurrentExtent.clear();
-
- // Otherwise _advanceToNextValidExtent would have returned false
- //
- invariant(!_currExtent.isNull());
-
- const Extent* e = em->getExtent(_currExtent, false);
- _currRecord = (FORWARD_SCAN == _stage ? e->firstRecord : e->lastRecord);
- } else {
- switch (_stage) {
- case FORWARD_SCAN:
- _currRecord = _recordStore->getNextRecordInExtent(_opCtx, _currRecord);
- break;
- case BACKWARD_SCAN:
- _currRecord = _recordStore->getPrevRecordInExtent(_opCtx, _currRecord);
- break;
- default:
- invariant(!"This should never be reached.");
- break;
- }
- }
-
- if (_currRecord.isNull()) {
- continue;
- }
-
- // Validate the contents of the record's disk location and deduplicate
- //
- if (!_seenInCurrentExtent.insert(_currRecord).second) {
- error() << "infinite loop in extent, seen: " << _currRecord << " before" << endl;
- _currRecord = DiskLoc();
- continue;
- }
-
- if (_currRecord.getOfs() <= 0) {
- error() << "offset is 0 for record which should be impossible" << endl;
- _currRecord = DiskLoc();
- continue;
- }
-
- return;
- }
-}
-
-bool RecordStoreV1RepairCursor::_advanceToNextValidExtent() {
- const ExtentManager* em = _recordStore->_extentManager;
-
- while (true) {
- if (_currExtent.isNull()) {
- switch (_stage) {
- case FORWARD_SCAN:
- _currExtent = _recordStore->details()->firstExtent(_opCtx);
- break;
- case BACKWARD_SCAN:
- _currExtent = _recordStore->details()->lastExtent(_opCtx);
- break;
- default:
- invariant(DONE == _stage);
- return false;
- }
- } else {
- // If _currExtent is not NULL, then it must point to a valid extent, so no extra
- // checks here.
- //
- const Extent* e = em->getExtent(_currExtent, false);
- _currExtent = (FORWARD_SCAN == _stage ? e->xnext : e->xprev);
- }
-
- bool hasNextExtent = !_currExtent.isNull();
-
- // Sanity checks for the extent's disk location
- //
- if (hasNextExtent && (!_currExtent.isValid() || (_currExtent.getOfs() < 0))) {
- error() << "Invalid extent location: " << _currExtent << endl;
-
- // Switch the direction of scan
- //
- hasNextExtent = false;
- }
-
- if (hasNextExtent) {
- break;
- }
-
- // Swap the direction of scan and loop again
- //
- switch (_stage) {
- case FORWARD_SCAN:
- _stage = BACKWARD_SCAN;
- break;
- case BACKWARD_SCAN:
- _stage = DONE;
- break;
- default:
- invariant(!"This should never be reached.");
- break;
- }
-
- _currExtent = DiskLoc();
- }
-
-
- // Check _currExtent's contents for validity, but do not count is as failure if they
- // don't check out.
- //
- const Extent* e = em->getExtent(_currExtent, false);
- if (!e->isOk()) {
- warning() << "Extent not ok magic: " << e->magic << " going to try to continue" << endl;
- }
-
- log() << (FORWARD_SCAN == _stage ? "FORWARD" : "BACKWARD") << " Extent loc: " << _currExtent
- << ", length: " << e->length << endl;
-
- return true;
-}
-
-void RecordStoreV1RepairCursor::invalidate(OperationContext* opCtx, const RecordId& id) {
- // If we see this record again it probably means it was reinserted rather than an infinite
- // loop. If we do loop, we should quickly hit another seen record that hasn't been
- // invalidated.
- DiskLoc dl = DiskLoc::fromRecordId(id);
- _seenInCurrentExtent.erase(dl);
-
- if (_currRecord == dl) {
- // The DiskLoc being invalidated is also the one pointed at by this iterator. We
- // advance the iterator so it's not pointing at invalid data.
- // We don't worry about undoing invalidations on rollback here, as we shouldn't have
- // concurrent writes that can rollback to a database we're trying to recover.
- advance();
-
- if (_currRecord == dl) {
- // Even after advancing the iterator, we're still pointing at the DiskLoc being
- // invalidated. This is expected when 'dl' is the last DiskLoc in the FORWARD scan,
- // and the initial call to getNext() moves the iterator to the first loc in the
- // BACKWARDS scan.
- advance();
- }
-
- invariant(_currRecord != dl);
- }
-}
-
-} // namespace mongo
diff --git a/src/mongo/db/storage/mmap_v1/record_store_v1_repair_iterator.h b/src/mongo/db/storage/mmap_v1/record_store_v1_repair_iterator.h
deleted file mode 100644
index d95683a7c42..00000000000
--- a/src/mongo/db/storage/mmap_v1/record_store_v1_repair_iterator.h
+++ /dev/null
@@ -1,96 +0,0 @@
-/**
- * Copyright (C) 2014 10gen Inc.
- *
- * This program is free software: you can redistribute it and/or modify
- * it under the terms of the GNU Affero General Public License, version 3,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU Affero General Public License for more details.
- *
- * You should have received a copy of the GNU Affero General Public License
- * along with this program. If not, see <http://www.gnu.org/licenses/>.
- *
- * As a special exception, the copyright holders give permission to link the
- * code of portions of this program with the OpenSSL library under certain
- * conditions as described in each individual source file and distribute
- * linked combinations including the program with the OpenSSL library. You
- * must comply with the GNU Affero General Public License in all respects for
- * all of the code used other than as permitted herein. If you modify file(s)
- * with this exception, you may extend this exception to your version of the
- * file(s), but you are not obligated to do so. If you do not wish to do so,
- * delete this exception statement from your version. If you delete this
- * exception statement from all source files in the program, then also delete
- * it in the license file.
- */
-
-#pragma once
-
-#include <set>
-
-#include "mongo/db/storage/mmap_v1/record_store_v1_base.h"
-#include "mongo/db/storage/record_store.h"
-
-namespace mongo {
-
-/**
- * This iterator will go over the collection twice - once going forward (first extent -> last
- * extent) and once backwards in an attempt to salvage potentially corrupted or unreachable
- * records. It is used by the mongodump --repair option.
- */
-class RecordStoreV1RepairCursor final : public RecordCursor {
-public:
- RecordStoreV1RepairCursor(OperationContext* opCtx, const RecordStoreV1Base* recordStore);
-
- boost::optional<Record> next() final;
- void invalidate(OperationContext* opCtx, const RecordId& dl);
- void save() final {}
- bool restore() final {
- return true;
- }
- void detachFromOperationContext() final {
- _opCtx = nullptr;
- }
- void reattachToOperationContext(OperationContext* opCtx) final {
- _opCtx = opCtx;
- }
-
- // Explicitly not supporting fetcherForNext(). The expected use case for this class is a
- // special offline operation where there are no concurrent operations, so it would be better
- // to take the pagefault inline with the operation.
-
-private:
- void advance();
-
- /**
- * Based on the direction of scan, finds the next valid (un-corrupted) extent in the chain
- * and sets _currExtent to point to that.
- *
- * @return true if valid extent was found (_currExtent will not be null)
- * false otherwise and _currExtent will be null
- */
- bool _advanceToNextValidExtent();
-
- // transactional context for read locks. Not owned by us
- OperationContext* _opCtx;
-
- // Reference to the owning RecordStore. The store must not be deleted while there are
- // active iterators on it.
- //
- const RecordStoreV1Base* _recordStore;
-
- DiskLoc _currExtent;
- DiskLoc _currRecord;
-
- enum Stage { FORWARD_SCAN = 0, BACKWARD_SCAN = 1, DONE = 2 };
-
- Stage _stage;
-
- // Used to find cycles within an extent. Cleared after each extent has been processed.
- //
- std::set<DiskLoc> _seenInCurrentExtent;
-};
-
-} // namespace mongo
diff --git a/src/mongo/db/storage/mmap_v1/record_store_v1_simple.cpp b/src/mongo/db/storage/mmap_v1/record_store_v1_simple.cpp
deleted file mode 100644
index fc30532ed31..00000000000
--- a/src/mongo/db/storage/mmap_v1/record_store_v1_simple.cpp
+++ /dev/null
@@ -1,486 +0,0 @@
-// record_store_v1_simple.cpp
-
-/**
- * Copyright (C) 2013-2014 MongoDB Inc.
- *
- * This program is free software: you can redistribute it and/or modify
- * it under the terms of the GNU Affero General Public License, version 3,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU Affero General Public License for more details.
- *
- * You should have received a copy of the GNU Affero General Public License
- * along with this program. If not, see <http://www.gnu.org/licenses/>.
- *
- * As a special exception, the copyright holders give permission to link the
- * code of portions of this program with the OpenSSL library under certain
- * conditions as described in each individual source file and distribute
- * linked combinations including the program with the OpenSSL library. You
- * must comply with the GNU Affero General Public License in all respects for
- * all of the code used other than as permitted herein. If you modify file(s)
- * with this exception, you may extend this exception to your version of the
- * file(s), but you are not obligated to do so. If you do not wish to do so,
- * delete this exception statement from your version. If you delete this
- * exception statement from all source files in the program, then also delete
- * it in the license file.
- */
-
-#define MONGO_LOG_DEFAULT_COMPONENT ::mongo::logger::LogComponent::kStorage
-
-#include "mongo/platform/basic.h"
-
-#include "mongo/db/storage/mmap_v1/record_store_v1_simple.h"
-
-#include "mongo/base/counter.h"
-#include "mongo/db/catalog/collection.h"
-#include "mongo/db/client.h"
-#include "mongo/db/commands/server_status_metric.h"
-#include "mongo/db/curop.h"
-#include "mongo/db/operation_context.h"
-#include "mongo/db/storage/mmap_v1/extent.h"
-#include "mongo/db/storage/mmap_v1/extent_manager.h"
-#include "mongo/db/storage/mmap_v1/record.h"
-#include "mongo/db/storage/mmap_v1/record_store_v1_simple_iterator.h"
-#include "mongo/db/storage/mmap_v1/touch_pages.h"
-#include "mongo/stdx/memory.h"
-#include "mongo/util/log.h"
-#include "mongo/util/mongoutils/str.h"
-#include "mongo/util/progress_meter.h"
-#include "mongo/util/timer.h"
-
-namespace mongo {
-
-using std::endl;
-using std::vector;
-
-static Counter64 freelistAllocs;
-static Counter64 freelistBucketExhausted;
-static Counter64 freelistIterations;
-
-// TODO figure out what to do about these.
-static ServerStatusMetricField<Counter64> dFreelist1("storage.freelist.search.requests",
- &freelistAllocs);
-
-static ServerStatusMetricField<Counter64> dFreelist2("storage.freelist.search.bucketExhausted",
- &freelistBucketExhausted);
-
-static ServerStatusMetricField<Counter64> dFreelist3("storage.freelist.search.scanned",
- &freelistIterations);
-
-SimpleRecordStoreV1::SimpleRecordStoreV1(OperationContext* opCtx,
- StringData ns,
- RecordStoreV1MetaData* details,
- ExtentManager* em,
- bool isSystemIndexes)
- : RecordStoreV1Base(ns, details, em, isSystemIndexes) {
- invariant(!details->isCapped());
- _normalCollection = NamespaceString::normal(ns);
-}
-
-SimpleRecordStoreV1::~SimpleRecordStoreV1() {}
-
-DiskLoc SimpleRecordStoreV1::_allocFromExistingExtents(OperationContext* opCtx, int lenToAllocRaw) {
- // Slowly drain the deletedListLegacyGrabBag by popping one record off and putting it in the
- // correct deleted list each time we try to allocate a new record. This ensures we won't
- // orphan any data when upgrading from old versions, without needing a long upgrade phase.
- // This is done before we try to allocate the new record so we can take advantage of the new
- // space immediately.
- {
- const DiskLoc head = _details->deletedListLegacyGrabBag();
- if (!head.isNull()) {
- _details->setDeletedListLegacyGrabBag(opCtx, drec(head)->nextDeleted());
- addDeletedRec(opCtx, head);
- }
- }
-
- // align size up to a multiple of 4
- const int lenToAlloc = (lenToAllocRaw + (4 - 1)) & ~(4 - 1);
-
- freelistAllocs.increment();
- DiskLoc loc;
- DeletedRecord* dr = NULL;
- {
- int myBucket;
- for (myBucket = bucket(lenToAlloc); myBucket < Buckets; myBucket++) {
- // Only look at the first entry in each bucket. This works because we are either
- // quantizing or allocating fixed-size blocks.
- const DiskLoc head = _details->deletedListEntry(myBucket);
- if (head.isNull())
- continue;
- DeletedRecord* const candidate = drec(head);
- if (candidate->lengthWithHeaders() >= lenToAlloc) {
- loc = head;
- dr = candidate;
- break;
- }
- }
-
- if (!dr)
- return DiskLoc(); // no space
-
- // Unlink ourself from the deleted list
- _details->setDeletedListEntry(opCtx, myBucket, dr->nextDeleted());
- *opCtx->recoveryUnit()->writing(&dr->nextDeleted()) = DiskLoc().setInvalid(); // defensive
- }
-
- invariant(dr->extentOfs() < loc.getOfs());
-
- // Split the deleted record if it has at least as much left over space as our smallest
- // allocation size. Otherwise, just take the whole DeletedRecord.
- const int remainingLength = dr->lengthWithHeaders() - lenToAlloc;
- if (remainingLength >= bucketSizes[0]) {
- opCtx->recoveryUnit()->writingInt(dr->lengthWithHeaders()) = lenToAlloc;
- const DiskLoc newDelLoc = DiskLoc(loc.a(), loc.getOfs() + lenToAlloc);
- DeletedRecord* newDel = opCtx->recoveryUnit()->writing(drec(newDelLoc));
- newDel->extentOfs() = dr->extentOfs();
- newDel->lengthWithHeaders() = remainingLength;
- newDel->nextDeleted().Null();
-
- addDeletedRec(opCtx, newDelLoc);
- }
-
- return loc;
-}
-
-StatusWith<DiskLoc> SimpleRecordStoreV1::allocRecord(OperationContext* opCtx,
- int lengthWithHeaders,
- bool enforceQuota) {
- if (lengthWithHeaders > MaxAllowedAllocation) {
- return StatusWith<DiskLoc>(
- ErrorCodes::InvalidLength,
- str::stream() << "Attempting to allocate a record larger than maximum size: "
- << lengthWithHeaders
- << " > 16.5MB");
- }
-
- DiskLoc loc = _allocFromExistingExtents(opCtx, lengthWithHeaders);
- if (!loc.isNull())
- return StatusWith<DiskLoc>(loc);
-
- LOG(1) << "allocating new extent";
-
- increaseStorageSize(
- opCtx,
- _extentManager->followupSize(lengthWithHeaders, _details->lastExtentSize(opCtx)),
- enforceQuota);
-
- loc = _allocFromExistingExtents(opCtx, lengthWithHeaders);
- if (!loc.isNull()) {
- // got on first try
- return StatusWith<DiskLoc>(loc);
- }
-
- log() << "warning: alloc() failed after allocating new extent. "
- << "lengthWithHeaders: " << lengthWithHeaders
- << " last extent size:" << _details->lastExtentSize(opCtx) << "; trying again";
-
- for (int z = 0; z < 10 && lengthWithHeaders > _details->lastExtentSize(opCtx); z++) {
- log() << "try #" << z << endl;
-
- increaseStorageSize(
- opCtx,
- _extentManager->followupSize(lengthWithHeaders, _details->lastExtentSize(opCtx)),
- enforceQuota);
-
- loc = _allocFromExistingExtents(opCtx, lengthWithHeaders);
- if (!loc.isNull())
- return StatusWith<DiskLoc>(loc);
- }
-
- return StatusWith<DiskLoc>(ErrorCodes::InternalError, "cannot allocate space");
-}
-
-Status SimpleRecordStoreV1::truncate(OperationContext* opCtx) {
- const DiskLoc firstExtLoc = _details->firstExtent(opCtx);
- if (firstExtLoc.isNull() || !firstExtLoc.isValid()) {
- // Already empty
- return Status::OK();
- }
-
- // Free all extents except the first.
- Extent* firstExt = _extentManager->getExtent(firstExtLoc);
- if (!firstExt->xnext.isNull()) {
- const DiskLoc extNextLoc = firstExt->xnext;
- const DiskLoc oldLastExtLoc = _details->lastExtent(opCtx);
- Extent* const nextExt = _extentManager->getExtent(extNextLoc);
-
- // Unlink other extents;
- *opCtx->recoveryUnit()->writing(&nextExt->xprev) = DiskLoc();
- *opCtx->recoveryUnit()->writing(&firstExt->xnext) = DiskLoc();
- _details->setLastExtent(opCtx, firstExtLoc);
- _details->setLastExtentSize(opCtx, firstExt->length);
-
- _extentManager->freeExtents(opCtx, extNextLoc, oldLastExtLoc);
- }
-
- // Make the first (now only) extent a single large deleted record.
- *opCtx->recoveryUnit()->writing(&firstExt->firstRecord) = DiskLoc();
- *opCtx->recoveryUnit()->writing(&firstExt->lastRecord) = DiskLoc();
- _details->orphanDeletedList(opCtx);
- addDeletedRec(opCtx, _findFirstSpot(opCtx, firstExtLoc, firstExt));
-
- // Make stats reflect that there are now no documents in this record store.
- _details->setStats(opCtx, 0, 0);
-
- return Status::OK();
-}
-
-void SimpleRecordStoreV1::addDeletedRec(OperationContext* opCtx, const DiskLoc& dloc) {
- DeletedRecord* d = drec(dloc);
-
- int b = bucket(d->lengthWithHeaders());
- *opCtx->recoveryUnit()->writing(&d->nextDeleted()) = _details->deletedListEntry(b);
- _details->setDeletedListEntry(opCtx, b, dloc);
-}
-
-std::unique_ptr<SeekableRecordCursor> SimpleRecordStoreV1::getCursor(OperationContext* opCtx,
- bool forward) const {
- return stdx::make_unique<SimpleRecordStoreV1Iterator>(opCtx, this, forward);
-}
-
-vector<std::unique_ptr<RecordCursor>> SimpleRecordStoreV1::getManyCursors(
- OperationContext* opCtx) const {
- vector<std::unique_ptr<RecordCursor>> cursors;
- const Extent* ext;
- for (DiskLoc extLoc = details()->firstExtent(opCtx); !extLoc.isNull(); extLoc = ext->xnext) {
- ext = _getExtent(opCtx, extLoc);
- if (ext->firstRecord.isNull())
- continue;
- cursors.push_back(stdx::make_unique<RecordStoreV1Base::IntraExtentIterator>(
- opCtx, ext->firstRecord, this));
- }
-
- return cursors;
-}
-
-class CompactDocWriter final : public DocWriter {
-public:
- /**
- * param allocationSize - allocation size WITH header
- */
- CompactDocWriter(const MmapV1RecordHeader* rec, unsigned dataSize, size_t allocationSize)
- : _rec(rec), _dataSize(dataSize), _allocationSize(allocationSize) {}
-
- virtual ~CompactDocWriter() {}
-
- virtual void writeDocument(char* buf) const {
- memcpy(buf, _rec->data(), _dataSize);
- }
-
- virtual size_t documentSize() const {
- return _allocationSize - MmapV1RecordHeader::HeaderSize;
- }
-
- virtual bool addPadding() const {
- return false;
- }
-
-private:
- const MmapV1RecordHeader* _rec;
- size_t _dataSize;
- size_t _allocationSize;
-};
-
-void SimpleRecordStoreV1::_compactExtent(OperationContext* opCtx,
- const DiskLoc extentLoc,
- int extentNumber,
- RecordStoreCompactAdaptor* adaptor,
- const CompactOptions* compactOptions,
- CompactStats* stats) {
- log() << "compact begin extent #" << extentNumber << " for namespace " << _ns << " "
- << extentLoc;
-
- unsigned oldObjSize = 0; // we'll report what the old padding was
- unsigned oldObjSizeWithPadding = 0;
-
- Extent* const sourceExtent = _extentManager->getExtent(extentLoc);
- sourceExtent->assertOk();
- fassert(17437, sourceExtent->validates(extentLoc));
-
- {
- // The next/prev MmapV1RecordHeader pointers within the Extent might not be in order so we
- // first page in the whole Extent sequentially.
- // TODO benchmark on slow storage to verify this is measurably faster.
- log() << "compact paging in len=" << sourceExtent->length / 1000000.0 << "MB" << endl;
- Timer t;
- size_t length = sourceExtent->length;
-
- touch_pages(reinterpret_cast<const char*>(sourceExtent), length);
- int ms = t.millis();
- if (ms > 1000)
- log() << "compact end paging in " << ms << "ms "
- << sourceExtent->length / 1000000.0 / t.seconds() << "MB/sec" << endl;
- }
-
- {
- // Move each MmapV1RecordHeader out of this extent and insert it in to the "new" extents.
- log() << "compact copying records" << endl;
- long long totalNetSize = 0;
- long long nrecords = 0;
- DiskLoc nextSourceLoc = sourceExtent->firstRecord;
- while (!nextSourceLoc.isNull()) {
- opCtx->checkForInterrupt();
-
- WriteUnitOfWork wunit(opCtx);
- MmapV1RecordHeader* recOld = recordFor(nextSourceLoc);
- RecordData oldData = recOld->toRecordData();
- nextSourceLoc = getNextRecordInExtent(opCtx, nextSourceLoc);
-
- if (compactOptions->validateDocuments && !adaptor->isDataValid(oldData)) {
- // object is corrupt!
- log() << "compact removing corrupt document!";
- stats->corruptDocuments++;
- } else {
- // How much data is in the record. Excludes padding and MmapV1RecordHeader headers.
- const unsigned rawDataSize = adaptor->dataSize(oldData);
-
- nrecords++;
- oldObjSize += rawDataSize;
- oldObjSizeWithPadding += recOld->netLength();
-
- // Allocation sizes include the headers and possibly some padding.
- const unsigned minAllocationSize = rawDataSize + MmapV1RecordHeader::HeaderSize;
- unsigned allocationSize = minAllocationSize;
- switch (compactOptions->paddingMode) {
- case CompactOptions::NONE: // default padding
- if (shouldPadInserts()) {
- allocationSize = quantizeAllocationSpace(minAllocationSize);
- }
- break;
-
- case CompactOptions::PRESERVE: // keep original padding
- allocationSize = recOld->lengthWithHeaders();
- break;
-
- case CompactOptions::MANUAL: // user specified how much padding to use
- allocationSize = compactOptions->computeRecordSize(minAllocationSize);
- if (allocationSize < minAllocationSize ||
- allocationSize > BSONObjMaxUserSize / 2) {
- allocationSize = minAllocationSize;
- }
- break;
- }
- invariant(allocationSize >= minAllocationSize);
-
- // Copy the data to a new record. Because we orphaned the record freelist at the
- // start of the compact, this insert will allocate a record in a new extent.
- // See the comment in compact() for more details.
- CompactDocWriter writer(recOld, rawDataSize, allocationSize);
- StatusWith<RecordId> status =
- insertRecordWithDocWriter(opCtx, &writer, Timestamp());
- uassertStatusOK(status.getStatus());
- const MmapV1RecordHeader* newRec =
- recordFor(DiskLoc::fromRecordId(status.getValue()));
- invariant(unsigned(newRec->netLength()) >= rawDataSize);
- totalNetSize += newRec->netLength();
-
- // Tells the caller that the record has been moved, so it can do things such as
- // add it to indexes.
- adaptor->inserted(newRec->toRecordData(), status.getValue());
- }
-
- // Remove the old record from the linked list of records withing the sourceExtent.
- // The old record is not added to the freelist as we will be freeing the whole
- // extent at the end.
- *opCtx->recoveryUnit()->writing(&sourceExtent->firstRecord) = nextSourceLoc;
- if (nextSourceLoc.isNull()) {
- // Just moved the last record out of the extent. Mark extent as empty.
- *opCtx->recoveryUnit()->writing(&sourceExtent->lastRecord) = DiskLoc();
- } else {
- MmapV1RecordHeader* newFirstRecord = recordFor(nextSourceLoc);
- opCtx->recoveryUnit()->writingInt(newFirstRecord->prevOfs()) = DiskLoc::NullOfs;
- }
-
- // Adjust the stats to reflect the removal of the old record. The insert above
- // handled adjusting the stats for the new record.
- _details->incrementStats(opCtx, -(recOld->netLength()), -1);
-
- wunit.commit();
- }
-
- // The extent must now be empty.
- invariant(sourceExtent->firstRecord.isNull());
- invariant(sourceExtent->lastRecord.isNull());
-
- // We are still the first extent, but we must not be the only extent.
- invariant(_details->firstExtent(opCtx) == extentLoc);
- invariant(_details->lastExtent(opCtx) != extentLoc);
-
- // Remove the newly emptied sourceExtent from the extent linked list and return it to
- // the extent manager.
- WriteUnitOfWork wunit(opCtx);
- const DiskLoc newFirst = sourceExtent->xnext;
- _details->setFirstExtent(opCtx, newFirst);
- *opCtx->recoveryUnit()->writing(&_extentManager->getExtent(newFirst)->xprev) = DiskLoc();
- _extentManager->freeExtent(opCtx, extentLoc);
- wunit.commit();
-
- {
- const double oldPadding = oldObjSize ? double(oldObjSizeWithPadding) / oldObjSize
- : 1.0; // defining 0/0 as 1 for this.
-
- log() << "compact finished extent #" << extentNumber << " containing " << nrecords
- << " documents (" << totalNetSize / (1024 * 1024.0) << "MB)"
- << " oldPadding: " << oldPadding;
- }
- }
-}
-
-Status SimpleRecordStoreV1::compact(OperationContext* opCtx,
- RecordStoreCompactAdaptor* adaptor,
- const CompactOptions* options,
- CompactStats* stats) {
- std::vector<DiskLoc> extents;
- for (DiskLoc extLocation = _details->firstExtent(opCtx); !extLocation.isNull();
- extLocation = _extentManager->getExtent(extLocation)->xnext) {
- extents.push_back(extLocation);
- }
- log() << "compact " << extents.size() << " extents";
-
- {
- WriteUnitOfWork wunit(opCtx);
- // Orphaning the deleted lists ensures that all inserts go to new extents rather than
- // the ones that existed before starting the compact. If we abort the operation before
- // completion, any free space in the old extents will be leaked and never reused unless
- // the collection is compacted again or dropped. This is considered an acceptable
- // failure mode as no data will be lost.
- log() << "compact orphan deleted lists" << endl;
- _details->orphanDeletedList(opCtx);
-
- // Start over from scratch with our extent sizing and growth
- _details->setLastExtentSize(opCtx, 0);
-
- // create a new extent so new records go there
- const bool enforceQuota = false;
- increaseStorageSize(opCtx, _details->lastExtentSize(opCtx), enforceQuota);
- wunit.commit();
- }
-
- stdx::unique_lock<Client> lk(*opCtx->getClient());
- ProgressMeterHolder pm(CurOp::get(opCtx)->setMessage_inlock(
- "compact extent", "Extent Compacting Progress", extents.size()));
- lk.unlock();
-
- // Go through all old extents and move each record to a new set of extents.
- int extentNumber = 0;
- for (std::vector<DiskLoc>::iterator it = extents.begin(); it != extents.end(); it++) {
- opCtx->checkForInterrupt();
- invariant(_details->firstExtent(opCtx) == *it);
- // empties and removes the first extent
- _compactExtent(opCtx, *it, extentNumber++, adaptor, options, stats);
- invariant(_details->firstExtent(opCtx) != *it);
- pm.hit();
- }
-
- invariant(_extentManager->getExtent(_details->firstExtent(opCtx))->xprev.isNull());
- invariant(_extentManager->getExtent(_details->lastExtent(opCtx))->xnext.isNull());
-
- // indexes will do their own progress meter
- pm.finished();
-
- return Status::OK();
-}
-}
diff --git a/src/mongo/db/storage/mmap_v1/record_store_v1_simple.h b/src/mongo/db/storage/mmap_v1/record_store_v1_simple.h
deleted file mode 100644
index 61c04bbf420..00000000000
--- a/src/mongo/db/storage/mmap_v1/record_store_v1_simple.h
+++ /dev/null
@@ -1,106 +0,0 @@
-// record_store_v1_simple.h
-
-/**
-* Copyright (C) 2013-2014 MongoDB Inc.
-*
-* This program is free software: you can redistribute it and/or modify
-* it under the terms of the GNU Affero General Public License, version 3,
-* as published by the Free Software Foundation.
-*
-* This program is distributed in the hope that it will be useful,
-* but WITHOUT ANY WARRANTY; without even the implied warranty of
-* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-* GNU Affero General Public License for more details.
-*
-* You should have received a copy of the GNU Affero General Public License
-* along with this program. If not, see <http://www.gnu.org/licenses/>.
-*
-* As a special exception, the copyright holders give permission to link the
-* code of portions of this program with the OpenSSL library under certain
-* conditions as described in each individual source file and distribute
-* linked combinations including the program with the OpenSSL library. You
-* must comply with the GNU Affero General Public License in all respects for
-* all of the code used other than as permitted herein. If you modify file(s)
-* with this exception, you may extend this exception to your version of the
-* file(s), but you are not obligated to do so. If you do not wish to do so,
-* delete this exception statement from your version. If you delete this
-* exception statement from all source files in the program, then also delete
-* it in the license file.
-*/
-
-#pragma once
-
-#include "mongo/db/catalog/collection_options.h"
-#include "mongo/db/storage/mmap_v1/diskloc.h"
-#include "mongo/db/storage/mmap_v1/record_store_v1_base.h"
-
-namespace mongo {
-
-class SimpleRecordStoreV1Cursor;
-
-// used by index and original collections
-class SimpleRecordStoreV1 : public RecordStoreV1Base {
-public:
- SimpleRecordStoreV1(OperationContext* opCtx,
- StringData ns,
- RecordStoreV1MetaData* details,
- ExtentManager* em,
- bool isSystemIndexes);
-
- virtual ~SimpleRecordStoreV1();
-
- const char* name() const {
- return "SimpleRecordStoreV1";
- }
-
- std::unique_ptr<SeekableRecordCursor> getCursor(OperationContext* opCtx,
- bool forward) const final;
-
- std::vector<std::unique_ptr<RecordCursor>> getManyCursors(OperationContext* opCtx) const final;
-
- virtual Status truncate(OperationContext* opCtx);
-
- virtual void cappedTruncateAfter(OperationContext* opCtx, RecordId end, bool inclusive) {
- invariant(!"cappedTruncateAfter not supported");
- }
-
- virtual bool compactSupported() const {
- return true;
- }
- virtual bool compactsInPlace() const {
- return false;
- }
- virtual Status compact(OperationContext* opCtx,
- RecordStoreCompactAdaptor* adaptor,
- const CompactOptions* options,
- CompactStats* stats);
-
-protected:
- virtual bool isCapped() const {
- return false;
- }
- virtual bool shouldPadInserts() const {
- return !_details->isUserFlagSet(CollectionOptions::Flag_NoPadding);
- }
-
- virtual StatusWith<DiskLoc> allocRecord(OperationContext* opCtx,
- int lengthWithHeaders,
- bool enforceQuota);
-
- virtual void addDeletedRec(OperationContext* opCtx, const DiskLoc& dloc);
-
-private:
- DiskLoc _allocFromExistingExtents(OperationContext* opCtx, int lengthWithHeaders);
-
- void _compactExtent(OperationContext* opCtx,
- const DiskLoc diskloc,
- int extentNumber,
- RecordStoreCompactAdaptor* adaptor,
- const CompactOptions* compactOptions,
- CompactStats* stats);
-
- bool _normalCollection;
-
- friend class SimpleRecordStoreV1Iterator;
-};
-}
diff --git a/src/mongo/db/storage/mmap_v1/record_store_v1_simple_iterator.cpp b/src/mongo/db/storage/mmap_v1/record_store_v1_simple_iterator.cpp
deleted file mode 100644
index 414e1016a6b..00000000000
--- a/src/mongo/db/storage/mmap_v1/record_store_v1_simple_iterator.cpp
+++ /dev/null
@@ -1,129 +0,0 @@
-/**
- * Copyright (C) 2013 10gen Inc.
- *
- * This program is free software: you can redistribute it and/or modify
- * it under the terms of the GNU Affero General Public License, version 3,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU Affero General Public License for more details.
- *
- * You should have received a copy of the GNU Affero General Public License
- * along with this program. If not, see <http://www.gnu.org/licenses/>.
- *
- * As a special exception, the copyright holders give permission to link the
- * code of portions of this program with the OpenSSL library under certain
- * conditions as described in each individual source file and distribute
- * linked combinations including the program with the OpenSSL library. You
- * must comply with the GNU Affero General Public License in all respects for
- * all of the code used other than as permitted herein. If you modify file(s)
- * with this exception, you may extend this exception to your version of the
- * file(s), but you are not obligated to do so. If you do not wish to do so,
- * delete this exception statement from your version. If you delete this
- * exception statement from all source files in the program, then also delete
- * it in the license file.
- */
-
-#include "mongo/db/storage/mmap_v1/record_store_v1_simple_iterator.h"
-
-#include "mongo/db/catalog/collection.h"
-#include "mongo/db/storage/mmap_v1/extent.h"
-#include "mongo/db/storage/mmap_v1/extent_manager.h"
-#include "mongo/db/storage/mmap_v1/record_store_v1_simple.h"
-
-namespace mongo {
-
-//
-// Regular / non-capped collection traversal
-//
-
-SimpleRecordStoreV1Iterator::SimpleRecordStoreV1Iterator(OperationContext* opCtx,
- const SimpleRecordStoreV1* collection,
- bool forward)
- : _opCtx(opCtx), _recordStore(collection), _forward(forward) {
- // Eagerly seek to first Record on creation since it is cheap.
- const ExtentManager* em = _recordStore->_extentManager;
- if (_recordStore->details()->firstExtent(opCtx).isNull()) {
- // nothing in the collection
- verify(_recordStore->details()->lastExtent(opCtx).isNull());
- } else if (_forward) {
- // Find a non-empty extent and start with the first record in it.
- Extent* e = em->getExtent(_recordStore->details()->firstExtent(opCtx));
-
- while (e->firstRecord.isNull() && !e->xnext.isNull()) {
- e = em->getExtent(e->xnext);
- }
-
- // _curr may be set to DiskLoc() here if e->lastRecord isNull but there is no
- // valid e->xnext
- _curr = e->firstRecord;
- } else {
- // Walk backwards, skipping empty extents, and use the last record in the first
- // non-empty extent we see.
- Extent* e = em->getExtent(_recordStore->details()->lastExtent(opCtx));
-
- // TODO ELABORATE
- // Does one of e->lastRecord.isNull(), e.firstRecord.isNull() imply the other?
- while (e->lastRecord.isNull() && !e->xprev.isNull()) {
- e = em->getExtent(e->xprev);
- }
-
- // _curr may be set to DiskLoc() here if e->lastRecord isNull but there is no
- // valid e->xprev
- _curr = e->lastRecord;
- }
-}
-
-boost::optional<Record> SimpleRecordStoreV1Iterator::next() {
- if (isEOF())
- return {};
- auto toReturn = _curr.toRecordId();
- advance();
- return {{toReturn, _recordStore->RecordStore::dataFor(_opCtx, toReturn)}};
-}
-
-boost::optional<Record> SimpleRecordStoreV1Iterator::seekExact(const RecordId& id) {
- _curr = DiskLoc::fromRecordId(id);
- advance();
- return {{id, _recordStore->RecordStore::dataFor(_opCtx, id)}};
-}
-
-void SimpleRecordStoreV1Iterator::advance() {
- // Move to the next thing.
- if (!isEOF()) {
- if (_forward) {
- _curr = _recordStore->getNextRecord(_opCtx, _curr);
- } else {
- _curr = _recordStore->getPrevRecord(_opCtx, _curr);
- }
- }
-}
-
-void SimpleRecordStoreV1Iterator::invalidate(OperationContext* opCtx, const RecordId& dl) {
- // Just move past the thing being deleted.
- if (dl == _curr.toRecordId()) {
- const DiskLoc origLoc = _curr;
-
- // Undo the advance on rollback, as the deletion that forced it "never happened".
- opCtx->recoveryUnit()->onRollback([this, origLoc]() { this->_curr = origLoc; });
- advance();
- }
-}
-
-void SimpleRecordStoreV1Iterator::save() {}
-
-bool SimpleRecordStoreV1Iterator::restore() {
- // if the collection is dropped, then the cursor should be destroyed
- return true;
-}
-
-std::unique_ptr<RecordFetcher> SimpleRecordStoreV1Iterator::fetcherForNext() const {
- return _recordStore->_extentManager->recordNeedsFetch(_curr);
-}
-
-std::unique_ptr<RecordFetcher> SimpleRecordStoreV1Iterator::fetcherForId(const RecordId& id) const {
- return _recordStore->_extentManager->recordNeedsFetch(DiskLoc::fromRecordId(id));
-}
-}
diff --git a/src/mongo/db/storage/mmap_v1/record_store_v1_simple_iterator.h b/src/mongo/db/storage/mmap_v1/record_store_v1_simple_iterator.h
deleted file mode 100644
index dd54877ee93..00000000000
--- a/src/mongo/db/storage/mmap_v1/record_store_v1_simple_iterator.h
+++ /dev/null
@@ -1,79 +0,0 @@
-/**
- * Copyright (C) 2013 10gen Inc.
- *
- * This program is free software: you can redistribute it and/or modify
- * it under the terms of the GNU Affero General Public License, version 3,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU Affero General Public License for more details.
- *
- * You should have received a copy of the GNU Affero General Public License
- * along with this program. If not, see <http://www.gnu.org/licenses/>.
- *
- * As a special exception, the copyright holders give permission to link the
- * code of portions of this program with the OpenSSL library under certain
- * conditions as described in each individual source file and distribute
- * linked combinations including the program with the OpenSSL library. You
- * must comply with the GNU Affero General Public License in all respects for
- * all of the code used other than as permitted herein. If you modify file(s)
- * with this exception, you may extend this exception to your version of the
- * file(s), but you are not obligated to do so. If you do not wish to do so,
- * delete this exception statement from your version. If you delete this
- * exception statement from all source files in the program, then also delete
- * it in the license file.
- */
-
-#pragma once
-
-#include "mongo/db/storage/mmap_v1/diskloc.h"
-#include "mongo/db/storage/record_store.h"
-
-namespace mongo {
-
-class SimpleRecordStoreV1;
-
-/**
- * This class iterates over a non-capped collection identified by 'ns'.
- * The collection must exist when the constructor is called.
- *
- * If start is not DiskLoc(), the iteration begins at that DiskLoc.
- */
-class SimpleRecordStoreV1Iterator final : public SeekableRecordCursor {
-public:
- SimpleRecordStoreV1Iterator(OperationContext* opCtx,
- const SimpleRecordStoreV1* records,
- bool forward);
-
- boost::optional<Record> next() final;
- boost::optional<Record> seekExact(const RecordId& id) final;
- void save() final;
- bool restore() final;
- void detachFromOperationContext() final {
- _opCtx = nullptr;
- }
- void reattachToOperationContext(OperationContext* opCtx) final {
- _opCtx = opCtx;
- }
- void invalidate(OperationContext* opCtx, const RecordId& dl) final;
- std::unique_ptr<RecordFetcher> fetcherForNext() const final;
- std::unique_ptr<RecordFetcher> fetcherForId(const RecordId& id) const final;
-
-private:
- void advance();
- bool isEOF() {
- return _curr.isNull();
- }
-
- // for getNext, not owned
- OperationContext* _opCtx;
-
- // The result returned on the next call to getNext().
- DiskLoc _curr;
- const SimpleRecordStoreV1* const _recordStore;
- const bool _forward;
-};
-
-} // namespace mongo
diff --git a/src/mongo/db/storage/mmap_v1/record_store_v1_simple_test.cpp b/src/mongo/db/storage/mmap_v1/record_store_v1_simple_test.cpp
deleted file mode 100644
index d1b3cb5c234..00000000000
--- a/src/mongo/db/storage/mmap_v1/record_store_v1_simple_test.cpp
+++ /dev/null
@@ -1,468 +0,0 @@
-// record_store_v1_simple_test.cpp
-
-/**
- * Copyright (C) 2014 MongoDB Inc.
- *
- * This program is free software: you can redistribute it and/or modify
- * it under the terms of the GNU Affero General Public License, version 3,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU Affero General Public License for more details.
- *
- * You should have received a copy of the GNU Affero General Public License
- * along with this program. If not, see <http://www.gnu.org/licenses/>.
- *
- * As a special exception, the copyright holders give permission to link the
- * code of portions of this program with the OpenSSL library under certain
- * conditions as described in each individual source file and distribute
- * linked combinations including the program with the OpenSSL library. You
- * must comply with the GNU Affero General Public License in all respects for
- * all of the code used other than as permitted herein. If you modify file(s)
- * with this exception, you may extend this exception to your version of the
- * file(s), but you are not obligated to do so. If you do not wish to do so,
- * delete this exception statement from your version. If you delete this
- * exception statement from all source files in the program, then also delete
- * it in the license file.
- */
-
-#include "mongo/db/storage/mmap_v1/record_store_v1_simple.h"
-
-#include "mongo/db/operation_context_noop.h"
-#include "mongo/db/storage/mmap_v1/extent.h"
-#include "mongo/db/storage/mmap_v1/record.h"
-#include "mongo/db/storage/mmap_v1/record_store_v1_test_help.h"
-#include "mongo/unittest/unittest.h"
-
-using namespace mongo;
-
-namespace {
-
-using std::string;
-
-TEST(SimpleRecordStoreV1, quantizeAllocationSpaceSimple) {
- ASSERT_EQUALS(RecordStoreV1Base::quantizeAllocationSpace(33), 64);
- ASSERT_EQUALS(RecordStoreV1Base::quantizeAllocationSpace(1000), 1024);
- ASSERT_EQUALS(RecordStoreV1Base::quantizeAllocationSpace(10001), 16 * 1024);
- ASSERT_EQUALS(RecordStoreV1Base::quantizeAllocationSpace(100000), 128 * 1024);
- ASSERT_EQUALS(RecordStoreV1Base::quantizeAllocationSpace(1000001), 1024 * 1024);
- ASSERT_EQUALS(RecordStoreV1Base::quantizeAllocationSpace(10000000), 10 * 1024 * 1024);
- ASSERT_EQUALS(RecordStoreV1Base::quantizeAllocationSpace(14 * 1024 * 1024 - 1),
- 14 * 1024 * 1024);
- ASSERT_EQUALS(RecordStoreV1Base::quantizeAllocationSpace(14 * 1024 * 1024), 14 * 1024 * 1024);
- ASSERT_EQUALS(RecordStoreV1Base::quantizeAllocationSpace(14 * 1024 * 1024 + 1),
- 16 * 1024 * 1024 + 512 * 1024);
- ASSERT_EQUALS(RecordStoreV1Base::quantizeAllocationSpace(16 * 1024 * 1024 + 512 * 1024),
- 16 * 1024 * 1024 + 512 * 1024);
-}
-
-TEST(SimpleRecordStoreV1, quantizeAllocationMinMaxBound) {
- const int maxSize = RecordStoreV1Base::MaxAllowedAllocation;
- ASSERT_EQUALS(RecordStoreV1Base::quantizeAllocationSpace(1), 32);
- ASSERT_EQUALS(RecordStoreV1Base::quantizeAllocationSpace(maxSize), maxSize);
-}
-
-/**
- * Tests quantization of sizes around all valid bucket sizes.
- */
-TEST(SimpleRecordStoreV1, quantizeAroundBucketSizes) {
- for (int bucket = 0; bucket < RecordStoreV1Base::Buckets - 2; bucket++) {
- const int size = RecordStoreV1Base::bucketSizes[bucket];
- const int nextSize = RecordStoreV1Base::bucketSizes[bucket + 1];
-
- // size - 1 is quantized to size.
- ASSERT_EQUALS(size, RecordStoreV1Base::quantizeAllocationSpace(size - 1));
-
- // size is quantized to size.
- ASSERT_EQUALS(size, RecordStoreV1Base::quantizeAllocationSpace(size));
-
- // size + 1 is quantized to nextSize (if it is a valid allocation)
- if (size + 1 <= RecordStoreV1Base::MaxAllowedAllocation) {
- ASSERT_EQUALS(nextSize, RecordStoreV1Base::quantizeAllocationSpace(size + 1));
- }
- }
-}
-
-BSONObj docForRecordSize(int size) {
- BSONObjBuilder b;
- b.append("_id", 5);
- b.append("x", string(size - MmapV1RecordHeader::HeaderSize - 22, 'x'));
- BSONObj x = b.obj();
- ASSERT_EQUALS(MmapV1RecordHeader::HeaderSize + x.objsize(), size);
- return x;
-}
-
-class BsonDocWriter final : public DocWriter {
-public:
- BsonDocWriter(const BSONObj& obj, bool padding) : _obj(obj), _padding(padding) {}
-
- virtual void writeDocument(char* buf) const {
- memcpy(buf, _obj.objdata(), _obj.objsize());
- }
- virtual size_t documentSize() const {
- return _obj.objsize();
- }
- virtual bool addPadding() const {
- return _padding;
- }
-
-private:
- BSONObj _obj;
- bool _padding;
-};
-
-/** alloc() quantizes the requested size using quantizeAllocationSpace() rules. */
-TEST(SimpleRecordStoreV1, AllocQuantized) {
- OperationContextNoop opCtx;
- DummyExtentManager em;
- DummyRecordStoreV1MetaData* md = new DummyRecordStoreV1MetaData(false, 0);
-
- string myns = "test.AllocQuantized";
- SimpleRecordStoreV1 rs(&opCtx, myns, md, &em, false);
-
- BSONObj obj = docForRecordSize(300);
- StatusWith<RecordId> result =
- rs.insertRecord(&opCtx, obj.objdata(), obj.objsize(), Timestamp(), false);
- ASSERT(result.isOK());
-
- // The length of the allocated record is quantized.
- ASSERT_EQUALS(512,
- rs.dataFor(&opCtx, result.getValue()).size() + MmapV1RecordHeader::HeaderSize);
-}
-
-TEST(SimpleRecordStoreV1, AllocNonQuantized) {
- OperationContextNoop opCtx;
- DummyExtentManager em;
- DummyRecordStoreV1MetaData* md = new DummyRecordStoreV1MetaData(false, 0);
- md->setUserFlag(&opCtx, CollectionOptions::Flag_NoPadding);
-
- string myns = "test.AllocQuantized";
- SimpleRecordStoreV1 rs(&opCtx, myns, md, &em, false);
-
- BSONObj obj = docForRecordSize(300);
- StatusWith<RecordId> result =
- rs.insertRecord(&opCtx, obj.objdata(), obj.objsize(), Timestamp(), false);
- ASSERT(result.isOK());
-
- // The length of the allocated record is quantized.
- ASSERT_EQUALS(300,
- rs.dataFor(&opCtx, result.getValue()).size() + MmapV1RecordHeader::HeaderSize);
-}
-
-TEST(SimpleRecordStoreV1, AllocNonQuantizedStillAligned) {
- OperationContextNoop opCtx;
- DummyExtentManager em;
- DummyRecordStoreV1MetaData* md = new DummyRecordStoreV1MetaData(false, 0);
- md->setUserFlag(&opCtx, CollectionOptions::Flag_NoPadding);
-
- string myns = "test.AllocQuantized";
- SimpleRecordStoreV1 rs(&opCtx, myns, md, &em, false);
-
- BSONObj obj = docForRecordSize(298);
- StatusWith<RecordId> result =
- rs.insertRecord(&opCtx, obj.objdata(), obj.objsize(), Timestamp(), false);
- ASSERT(result.isOK());
-
- // The length of the allocated record is quantized.
- ASSERT_EQUALS(300,
- rs.dataFor(&opCtx, result.getValue()).size() + MmapV1RecordHeader::HeaderSize);
-}
-
-/** alloc() quantizes the requested size if DocWriter::addPadding() returns true. */
-TEST(SimpleRecordStoreV1, AllocQuantizedWithDocWriter) {
- OperationContextNoop opCtx;
- DummyExtentManager em;
- DummyRecordStoreV1MetaData* md = new DummyRecordStoreV1MetaData(false, 0);
-
- string myns = "test.AllocQuantized";
- SimpleRecordStoreV1 rs(&opCtx, myns, md, &em, false);
-
- BsonDocWriter docWriter(docForRecordSize(300), true);
- StatusWith<RecordId> result = rs.insertRecordWithDocWriter(&opCtx, &docWriter, Timestamp());
- ASSERT(result.isOK());
-
- // The length of the allocated record is quantized.
- ASSERT_EQUALS(512,
- rs.dataFor(&opCtx, result.getValue()).size() + MmapV1RecordHeader::HeaderSize);
-}
-
-/**
- * alloc() does not quantize records if DocWriter::addPadding() returns false
- */
-TEST(SimpleRecordStoreV1, AllocNonQuantizedDocWriter) {
- OperationContextNoop opCtx;
- DummyExtentManager em;
- DummyRecordStoreV1MetaData* md = new DummyRecordStoreV1MetaData(false, 0);
-
- string myns = "test.AllocIndexNamespaceNotQuantized";
- SimpleRecordStoreV1 rs(&opCtx, myns + "$x", md, &em, false);
-
- BsonDocWriter docWriter(docForRecordSize(300), false);
- StatusWith<RecordId> result = rs.insertRecordWithDocWriter(&opCtx, &docWriter, Timestamp());
- ASSERT(result.isOK());
-
- // The length of the allocated record is not quantized.
- ASSERT_EQUALS(300,
- rs.dataFor(&opCtx, result.getValue()).size() + MmapV1RecordHeader::HeaderSize);
-}
-
-/** alloc() aligns record sizes up to 4 bytes even if DocWriter::addPadding returns false. */
-TEST(SimpleRecordStoreV1, AllocAlignedDocWriter) {
- OperationContextNoop opCtx;
- DummyExtentManager em;
- DummyRecordStoreV1MetaData* md = new DummyRecordStoreV1MetaData(false, 0);
-
- string myns = "test.AllocIndexNamespaceNotQuantized";
- SimpleRecordStoreV1 rs(&opCtx, myns + "$x", md, &em, false);
-
- BsonDocWriter docWriter(docForRecordSize(298), false);
- StatusWith<RecordId> result = rs.insertRecordWithDocWriter(&opCtx, &docWriter, Timestamp());
- ASSERT(result.isOK());
-
- ASSERT_EQUALS(300,
- rs.dataFor(&opCtx, result.getValue()).size() + MmapV1RecordHeader::HeaderSize);
-}
-/**
- * alloc() with quantized size doesn't split if enough room left over.
- */
-TEST(SimpleRecordStoreV1, AllocUseQuantizedDeletedRecordWithoutSplit) {
- OperationContextNoop opCtx;
- DummyExtentManager em;
- DummyRecordStoreV1MetaData* md = new DummyRecordStoreV1MetaData(false, 0);
- SimpleRecordStoreV1 rs(&opCtx, "test.foo", md, &em, false);
-
- {
- LocAndSize drecs[] = {{DiskLoc(0, 1000), 512 + 31}, {}};
- initializeV1RS(&opCtx, NULL, drecs, NULL, &em, md);
- }
-
- BsonDocWriter docWriter(docForRecordSize(300), true);
- StatusWith<RecordId> actualLocation =
- rs.insertRecordWithDocWriter(&opCtx, &docWriter, Timestamp());
- ASSERT_OK(actualLocation.getStatus());
-
- {
- LocAndSize recs[] = {{DiskLoc(0, 1000), 512 + 31}, {}};
- LocAndSize drecs[] = {{}};
- assertStateV1RS(&opCtx, recs, drecs, NULL, &em, md);
- }
-}
-
-/**
- * alloc() with quantized size splits if enough room left over.
- */
-TEST(SimpleRecordStoreV1, AllocUseQuantizedDeletedRecordWithSplit) {
- OperationContextNoop opCtx;
- DummyExtentManager em;
- DummyRecordStoreV1MetaData* md = new DummyRecordStoreV1MetaData(false, 0);
- SimpleRecordStoreV1 rs(&opCtx, "test.foo", md, &em, false);
-
- {
- LocAndSize drecs[] = {{DiskLoc(0, 1000), 512 + 32}, {}};
- initializeV1RS(&opCtx, NULL, drecs, NULL, &em, md);
- }
-
- BsonDocWriter docWriter(docForRecordSize(300), true);
- StatusWith<RecordId> actualLocation =
- rs.insertRecordWithDocWriter(&opCtx, &docWriter, Timestamp());
- ASSERT_OK(actualLocation.getStatus());
-
- {
- LocAndSize recs[] = {{DiskLoc(0, 1000), 512}, {}};
- LocAndSize drecs[] = {{DiskLoc(0, 1512), 32}, {}};
- assertStateV1RS(&opCtx, recs, drecs, NULL, &em, md);
- }
-}
-
-/**
- * alloc() with non quantized size doesn't split if enough room left over.
- */
-TEST(SimpleRecordStoreV1, AllocUseNonQuantizedDeletedRecordWithoutSplit) {
- OperationContextNoop opCtx;
- DummyExtentManager em;
- DummyRecordStoreV1MetaData* md = new DummyRecordStoreV1MetaData(false, 0);
- SimpleRecordStoreV1 rs(&opCtx, "test.foo", md, &em, false);
-
- {
- LocAndSize drecs[] = {{DiskLoc(0, 1000), 331}, {}};
- initializeV1RS(&opCtx, NULL, drecs, NULL, &em, md);
- }
-
- BsonDocWriter docWriter(docForRecordSize(300), false);
- StatusWith<RecordId> actualLocation =
- rs.insertRecordWithDocWriter(&opCtx, &docWriter, Timestamp());
- ASSERT_OK(actualLocation.getStatus());
-
- {
- LocAndSize recs[] = {{DiskLoc(0, 1000), 331}, {}};
- LocAndSize drecs[] = {{}};
- assertStateV1RS(&opCtx, recs, drecs, NULL, &em, md);
- }
-}
-
-/**
- * alloc() with non quantized size splits if enough room left over.
- */
-TEST(SimpleRecordStoreV1, AllocUseNonQuantizedDeletedRecordWithSplit) {
- OperationContextNoop opCtx;
- DummyExtentManager em;
- DummyRecordStoreV1MetaData* md = new DummyRecordStoreV1MetaData(false, 0);
- SimpleRecordStoreV1 rs(&opCtx, "test.foo", md, &em, false);
-
- {
- LocAndSize drecs[] = {{DiskLoc(0, 1000), 332}, {}};
- initializeV1RS(&opCtx, NULL, drecs, NULL, &em, md);
- }
-
- BsonDocWriter docWriter(docForRecordSize(300), false);
- StatusWith<RecordId> actualLocation =
- rs.insertRecordWithDocWriter(&opCtx, &docWriter, Timestamp());
- ASSERT_OK(actualLocation.getStatus());
-
- {
- LocAndSize recs[] = {{DiskLoc(0, 1000), 300}, {}};
- LocAndSize drecs[] = {{DiskLoc(0, 1300), 32}, {}};
- assertStateV1RS(&opCtx, recs, drecs, NULL, &em, md);
- }
-}
-
-/**
- * alloc() will use from the legacy grab bag if it can.
- */
-TEST(SimpleRecordStoreV1, GrabBagIsUsed) {
- OperationContextNoop opCtx;
- DummyExtentManager em;
- DummyRecordStoreV1MetaData* md = new DummyRecordStoreV1MetaData(false, 0);
- SimpleRecordStoreV1 rs(&opCtx, "test.foo", md, &em, false);
-
- {
- LocAndSize drecs[] = {{}};
- LocAndSize grabBag[] = {
- {DiskLoc(0, 1000), 4 * 1024 * 1024}, {DiskLoc(1, 1000), 4 * 1024 * 1024}, {}};
- initializeV1RS(&opCtx, NULL, drecs, grabBag, &em, md);
- }
-
- BsonDocWriter docWriter(docForRecordSize(256), false);
- StatusWith<RecordId> actualLocation =
- rs.insertRecordWithDocWriter(&opCtx, &docWriter, Timestamp());
- ASSERT_OK(actualLocation.getStatus());
-
- {
- LocAndSize recs[] = {{DiskLoc(0, 1000), 256}, {}};
- LocAndSize drecs[] = {{DiskLoc(0, 1256), 4 * 1024 * 1024 - 256}, {}};
- LocAndSize grabBag[] = {{DiskLoc(1, 1000), 4 * 1024 * 1024}, {}};
- assertStateV1RS(&opCtx, recs, drecs, grabBag, &em, md);
- }
-}
-
-/**
- * alloc() will pull from the legacy grab bag even if it isn't needed.
- */
-TEST(SimpleRecordStoreV1, GrabBagIsPoppedEvenIfUnneeded) {
- OperationContextNoop opCtx;
- DummyExtentManager em;
- DummyRecordStoreV1MetaData* md = new DummyRecordStoreV1MetaData(false, 0);
- SimpleRecordStoreV1 rs(&opCtx, "test.foo", md, &em, false);
-
- {
- LocAndSize drecs[] = {{DiskLoc(0, 1000), 1000}, {}};
- LocAndSize grabBag[] = {
- {DiskLoc(1, 1000), 4 * 1024 * 1024}, {DiskLoc(2, 1000), 4 * 1024 * 1024}, {}};
- initializeV1RS(&opCtx, NULL, drecs, grabBag, &em, md);
- }
-
- BsonDocWriter docWriter(docForRecordSize(1000), false);
- StatusWith<RecordId> actualLocation =
- rs.insertRecordWithDocWriter(&opCtx, &docWriter, Timestamp());
- ASSERT_OK(actualLocation.getStatus());
-
- {
- LocAndSize recs[] = {{DiskLoc(0, 1000), 1000}, {}};
- LocAndSize drecs[] = {{DiskLoc(1, 1000), 4 * 1024 * 1024}, {}};
- LocAndSize grabBag[] = {{DiskLoc(2, 1000), 4 * 1024 * 1024}, {}};
- assertStateV1RS(&opCtx, recs, drecs, grabBag, &em, md);
- }
-}
-
-/**
- * alloc() will pull from the legacy grab bag even if it can't be used
- */
-TEST(SimpleRecordStoreV1, GrabBagIsPoppedEvenIfUnusable) {
- OperationContextNoop opCtx;
- DummyExtentManager em;
- DummyRecordStoreV1MetaData* md = new DummyRecordStoreV1MetaData(false, 0);
- SimpleRecordStoreV1 rs(&opCtx, "test.foo", md, &em, false);
-
- {
- LocAndSize drecs[] = {{DiskLoc(0, 1000), 8 * 1024 * 1024}, {}};
- LocAndSize grabBag[] = {
- {DiskLoc(1, 1000), 4 * 1024 * 1024}, {DiskLoc(2, 1000), 4 * 1024 * 1024}, {}};
- initializeV1RS(&opCtx, NULL, drecs, grabBag, &em, md);
- }
-
- BsonDocWriter docWriter(docForRecordSize(8 * 1024 * 1024), false);
- StatusWith<RecordId> actualLocation =
- rs.insertRecordWithDocWriter(&opCtx, &docWriter, Timestamp());
- ASSERT_OK(actualLocation.getStatus());
-
- {
- LocAndSize recs[] = {{DiskLoc(0, 1000), 8 * 1024 * 1024}, {}};
- LocAndSize drecs[] = {{DiskLoc(1, 1000), 4 * 1024 * 1024}, {}};
- LocAndSize grabBag[] = {{DiskLoc(2, 1000), 4 * 1024 * 1024}, {}};
- assertStateV1RS(&opCtx, recs, drecs, grabBag, &em, md);
- }
-}
-
-// -----------------
-
-TEST(SimpleRecordStoreV1, FullSimple1) {
- OperationContextNoop opCtx;
- DummyExtentManager em;
- DummyRecordStoreV1MetaData* md = new DummyRecordStoreV1MetaData(false, 0);
- SimpleRecordStoreV1 rs(&opCtx, "test.foo", md, &em, false);
-
-
- ASSERT_EQUALS(0, md->numRecords());
- StatusWith<RecordId> result = rs.insertRecord(&opCtx, "abc", 4, Timestamp(), true);
- ASSERT_TRUE(result.isOK());
- ASSERT_EQUALS(1, md->numRecords());
- RecordData recordData = rs.dataFor(&opCtx, result.getValue());
- ASSERT_EQUALS(string("abc"), string(recordData.data()));
-}
-
-// -----------------
-
-TEST(SimpleRecordStoreV1, Truncate) {
- OperationContextNoop opCtx;
- DummyExtentManager em;
- DummyRecordStoreV1MetaData* md = new DummyRecordStoreV1MetaData(false, 0);
- SimpleRecordStoreV1 rs(&opCtx, "test.foo", md, &em, false);
-
- {
- LocAndSize recs[] = {{DiskLoc(0, 1000), 100},
- {DiskLoc(0, 1100), 100},
- {DiskLoc(0, 1300), 100},
- {DiskLoc(2, 1100), 100},
- {}};
- LocAndSize drecs[] = {
- {DiskLoc(0, 1200), 100}, {DiskLoc(2, 1000), 100}, {DiskLoc(1, 1000), 1000}, {}};
-
- initializeV1RS(&opCtx, recs, drecs, NULL, &em, md);
-
- ASSERT_EQUALS(em.getExtent(DiskLoc(0, 0))->length, em.minSize());
- }
-
- rs.truncate(&opCtx).transitional_ignore();
-
- {
- LocAndSize recs[] = {{}};
- LocAndSize drecs[] = {
- // One extent filled with a single deleted record.
- {DiskLoc(0, Extent::HeaderSize()), em.minSize() - Extent::HeaderSize()},
- {}};
- assertStateV1RS(&opCtx, recs, drecs, NULL, &em, md);
- }
-}
-}
diff --git a/src/mongo/db/storage/mmap_v1/record_store_v1_test_help.cpp b/src/mongo/db/storage/mmap_v1/record_store_v1_test_help.cpp
deleted file mode 100644
index 8c55c72301b..00000000000
--- a/src/mongo/db/storage/mmap_v1/record_store_v1_test_help.cpp
+++ /dev/null
@@ -1,668 +0,0 @@
-// record_store_v1_test_help.cpp
-
-/**
-* Copyright (C) 2014 MongoDB Inc.
-*
-* This program is free software: you can redistribute it and/or modify
-* it under the terms of the GNU Affero General Public License, version 3,
-* as published by the Free Software Foundation.
-*
-* This program is distributed in the hope that it will be useful,
-* but WITHOUT ANY WARRANTY; without even the implied warranty of
-* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-* GNU Affero General Public License for more details.
-*
-* You should have received a copy of the GNU Affero General Public License
-* along with this program. If not, see <http://www.gnu.org/licenses/>.
-*
-* As a special exception, the copyright holders give permission to link the
-* code of portions of this program with the OpenSSL library under certain
-* conditions as described in each individual source file and distribute
-* linked combinations including the program with the OpenSSL library. You
-* must comply with the GNU Affero General Public License in all respects for
-* all of the code used other than as permitted herein. If you modify file(s)
-* with this exception, you may extend this exception to your version of the
-* file(s), but you are not obligated to do so. If you do not wish to do so,
-* delete this exception statement from your version. If you delete this
-* exception statement from all source files in the program, then also delete
-* it in the license file.
-*/
-
-#define MONGO_LOG_DEFAULT_COMPONENT ::mongo::logger::LogComponent::kStorage
-
-#include "mongo/db/storage/mmap_v1/record_store_v1_test_help.h"
-
-#include <algorithm>
-#include <boost/next_prior.hpp>
-#include <map>
-#include <set>
-#include <vector>
-
-#include "mongo/db/storage/mmap_v1/extent.h"
-#include "mongo/db/storage/mmap_v1/record.h"
-#include "mongo/db/storage/record_fetcher.h"
-#include "mongo/unittest/unittest.h"
-#include "mongo/util/allocator.h"
-#include "mongo/util/log.h"
-
-namespace mongo {
-
-using std::numeric_limits;
-
-DummyRecordStoreV1MetaData::DummyRecordStoreV1MetaData(bool capped, int userFlags) {
- _dataSize = 0;
- _numRecords = 0;
- _capped = capped;
- _userFlags = userFlags;
- _lastExtentSize = 0;
- _paddingFactor = 1;
- _maxCappedDocs = numeric_limits<long long>::max();
- _capFirstNewRecord.setInvalid();
- if (_capped) {
- // copied from NamespaceDetails::NamespaceDetails()
- setDeletedListEntry(NULL, 1, DiskLoc().setInvalid());
- }
-}
-
-const DiskLoc& DummyRecordStoreV1MetaData::capExtent() const {
- return _capExtent;
-}
-
-void DummyRecordStoreV1MetaData::setCapExtent(OperationContext* opCtx, const DiskLoc& loc) {
- _capExtent = loc;
-}
-
-const DiskLoc& DummyRecordStoreV1MetaData::capFirstNewRecord() const {
- return _capFirstNewRecord;
-}
-
-void DummyRecordStoreV1MetaData::setCapFirstNewRecord(OperationContext* opCtx, const DiskLoc& loc) {
- _capFirstNewRecord = loc;
-}
-
-long long DummyRecordStoreV1MetaData::dataSize() const {
- return _dataSize;
-}
-
-long long DummyRecordStoreV1MetaData::numRecords() const {
- return _numRecords;
-}
-
-void DummyRecordStoreV1MetaData::incrementStats(OperationContext* opCtx,
- long long dataSizeIncrement,
- long long numRecordsIncrement) {
- _dataSize += dataSizeIncrement;
- _numRecords += numRecordsIncrement;
-}
-
-void DummyRecordStoreV1MetaData::setStats(OperationContext* opCtx,
- long long dataSize,
- long long numRecords) {
- _dataSize = dataSize;
- _numRecords = numRecords;
-}
-
-namespace {
-DiskLoc myNull;
-}
-
-DiskLoc DummyRecordStoreV1MetaData::deletedListEntry(int bucket) const {
- invariant(bucket >= 0);
- if (static_cast<size_t>(bucket) >= _deletedLists.size())
- return myNull;
- return _deletedLists[bucket];
-}
-
-void DummyRecordStoreV1MetaData::setDeletedListEntry(OperationContext* opCtx,
- int bucket,
- const DiskLoc& loc) {
- invariant(bucket >= 0);
- invariant(bucket < 1000);
- while (static_cast<size_t>(bucket) >= _deletedLists.size())
- _deletedLists.push_back(DiskLoc());
- _deletedLists[bucket] = loc;
-}
-
-DiskLoc DummyRecordStoreV1MetaData::deletedListLegacyGrabBag() const {
- return _deletedListLegacyGrabBag;
-}
-
-void DummyRecordStoreV1MetaData::setDeletedListLegacyGrabBag(OperationContext* opCtx,
- const DiskLoc& loc) {
- _deletedListLegacyGrabBag = loc;
-}
-
-void DummyRecordStoreV1MetaData::orphanDeletedList(OperationContext* opCtx) {
- // They will be recreated on demand.
- _deletedLists.clear();
-}
-
-const DiskLoc& DummyRecordStoreV1MetaData::firstExtent(OperationContext* opCtx) const {
- return _firstExtent;
-}
-
-void DummyRecordStoreV1MetaData::setFirstExtent(OperationContext* opCtx, const DiskLoc& loc) {
- _firstExtent = loc;
-}
-
-const DiskLoc& DummyRecordStoreV1MetaData::lastExtent(OperationContext* opCtx) const {
- return _lastExtent;
-}
-
-void DummyRecordStoreV1MetaData::setLastExtent(OperationContext* opCtx, const DiskLoc& loc) {
- _lastExtent = loc;
-}
-
-bool DummyRecordStoreV1MetaData::isCapped() const {
- return _capped;
-}
-
-bool DummyRecordStoreV1MetaData::isUserFlagSet(int flag) const {
- return _userFlags & flag;
-}
-
-bool DummyRecordStoreV1MetaData::setUserFlag(OperationContext* opCtx, int flag) {
- if ((_userFlags & flag) == flag)
- return false;
-
- _userFlags |= flag;
- return true;
-}
-bool DummyRecordStoreV1MetaData::clearUserFlag(OperationContext* opCtx, int flag) {
- if ((_userFlags & flag) == 0)
- return false;
-
- _userFlags &= ~flag;
- return true;
-}
-bool DummyRecordStoreV1MetaData::replaceUserFlags(OperationContext* opCtx, int flags) {
- if (_userFlags == flags)
- return false;
- _userFlags = flags;
- return true;
-}
-
-
-int DummyRecordStoreV1MetaData::lastExtentSize(OperationContext* opCtx) const {
- return _lastExtentSize;
-}
-
-void DummyRecordStoreV1MetaData::setLastExtentSize(OperationContext* opCtx, int newMax) {
- _lastExtentSize = newMax;
-}
-
-long long DummyRecordStoreV1MetaData::maxCappedDocs() const {
- return _maxCappedDocs;
-}
-
-// -----------------------------------------
-
-DummyExtentManager::~DummyExtentManager() {
- for (size_t i = 0; i < _extents.size(); i++) {
- if (_extents[i].data)
- free(_extents[i].data);
- }
-}
-
-void DummyExtentManager::close(OperationContext* opCtx) {}
-
-Status DummyExtentManager::init(OperationContext* opCtx) {
- return Status::OK();
-}
-
-int DummyExtentManager::numFiles() const {
- return static_cast<int>(_extents.size());
-}
-
-long long DummyExtentManager::fileSize() const {
- MONGO_UNREACHABLE;
-}
-
-DiskLoc DummyExtentManager::allocateExtent(OperationContext* opCtx,
- bool capped,
- int size,
- bool enforceQuota) {
- size = quantizeExtentSize(size);
-
- ExtentInfo info;
- info.data = static_cast<char*>(mongoMalloc(size));
- info.length = size;
-
- DiskLoc loc(_extents.size(), 0);
- _extents.push_back(info);
-
- Extent* e = getExtent(loc, false);
- e->magic = Extent::extentSignature;
- e->myLoc = loc;
- e->xnext.Null();
- e->xprev.Null();
- e->length = size;
- e->firstRecord.Null();
- e->lastRecord.Null();
-
- return loc;
-}
-
-void DummyExtentManager::freeExtents(OperationContext* opCtx, DiskLoc firstExt, DiskLoc lastExt) {
- // XXX
-}
-
-void DummyExtentManager::freeExtent(OperationContext* opCtx, DiskLoc extent) {
- // XXX
-}
-void DummyExtentManager::freeListStats(OperationContext* opCtx,
- int* numExtents,
- int64_t* totalFreeSizeBytes) const {
- MONGO_UNREACHABLE;
-}
-
-std::unique_ptr<RecordFetcher> DummyExtentManager::recordNeedsFetch(const DiskLoc& loc) const {
- return {};
-}
-
-MmapV1RecordHeader* DummyExtentManager::recordForV1(const DiskLoc& loc) const {
- if (static_cast<size_t>(loc.a()) >= _extents.size())
- return NULL;
- if (static_cast<size_t>(loc.getOfs()) >= _extents[loc.a()].length)
- return NULL;
- char* root = _extents[loc.a()].data;
- return reinterpret_cast<MmapV1RecordHeader*>(root + loc.getOfs());
-}
-
-Extent* DummyExtentManager::extentForV1(const DiskLoc& loc) const {
- MONGO_UNREACHABLE;
-}
-
-DiskLoc DummyExtentManager::extentLocForV1(const DiskLoc& loc) const {
- return DiskLoc(loc.a(), 0);
-}
-
-Extent* DummyExtentManager::getExtent(const DiskLoc& loc, bool doSanityCheck) const {
- invariant(!loc.isNull());
- invariant(static_cast<size_t>(loc.a()) < _extents.size());
- invariant(loc.getOfs() == 0);
- Extent* ext = reinterpret_cast<Extent*>(_extents[loc.a()].data);
- if (doSanityCheck)
- ext->assertOk();
- return ext;
-}
-
-int DummyExtentManager::maxSize() const {
- return 1024 * 1024 * 64;
-}
-
-DummyExtentManager::CacheHint* DummyExtentManager::cacheHint(const DiskLoc& extentLoc,
- const HintType& hint) {
- return new CacheHint();
-}
-
-DataFileVersion DummyExtentManager::getFileFormat(OperationContext* opCtx) const {
- return DataFileVersion::defaultForNewFiles();
-}
-
-void DummyExtentManager::setFileFormat(OperationContext* opCtx, DataFileVersion newVersion) {}
-
-const DataFile* DummyExtentManager::getOpenFile(int n) const {
- return nullptr;
-}
-
-namespace {
-void accumulateExtentSizeRequirements(const LocAndSize* las, std::map<int, size_t>* sizes) {
- if (!las)
- return;
-
- while (!las->loc.isNull()) {
- // We require passed in offsets to be > 1000 to leave room for Extent headers.
- invariant(Extent::HeaderSize() < 1000);
- invariant(las->loc.getOfs() >= 1000);
-
- const size_t end = las->loc.getOfs() + las->size;
- size_t& sizeNeeded = (*sizes)[las->loc.a()];
- sizeNeeded = std::max(sizeNeeded, end);
- las++;
- }
-}
-
-void printRecList(OperationContext* opCtx,
- const ExtentManager* em,
- const RecordStoreV1MetaData* md) {
- log() << " *** BEGIN ACTUAL RECORD LIST *** ";
- DiskLoc extLoc = md->firstExtent(opCtx);
- std::set<DiskLoc> seenLocs;
- while (!extLoc.isNull()) {
- Extent* ext = em->getExtent(extLoc, true);
- DiskLoc actualLoc = ext->firstRecord;
- while (!actualLoc.isNull()) {
- const MmapV1RecordHeader* actualRec = em->recordForV1(actualLoc);
- const int actualSize = actualRec->lengthWithHeaders();
-
- log() << "loc: " << actualLoc // <--hex
- << " (" << actualLoc.getOfs() << ")"
- << " size: " << actualSize << " prev: " << actualRec->prevOfs()
- << " next: " << actualRec->nextOfs()
- << (actualLoc == md->capFirstNewRecord() ? " (CAP_FIRST_NEW)" : "");
-
- const bool foundCycle = !seenLocs.insert(actualLoc).second;
- invariant(!foundCycle);
-
- const int nextOfs = actualRec->nextOfs();
- actualLoc = (nextOfs == DiskLoc::NullOfs ? DiskLoc() : DiskLoc(actualLoc.a(), nextOfs));
- }
- extLoc = ext->xnext;
- }
- log() << " *** END ACTUAL RECORD LIST *** ";
-}
-
-void printDRecList(const ExtentManager* em, const RecordStoreV1MetaData* md) {
- log() << " *** BEGIN ACTUAL DELETED RECORD LIST *** ";
- std::set<DiskLoc> seenLocs;
- for (int bucketIdx = 0; bucketIdx < RecordStoreV1Base::Buckets; bucketIdx++) {
- DiskLoc actualLoc = md->deletedListEntry(bucketIdx);
- while (!actualLoc.isNull()) {
- const DeletedRecord* actualDrec = &em->recordForV1(actualLoc)->asDeleted();
- const int actualSize = actualDrec->lengthWithHeaders();
-
- log() << "loc: " << actualLoc // <--hex
- << " (" << actualLoc.getOfs() << ")"
- << " size: " << actualSize << " bucket: " << bucketIdx
- << " next: " << actualDrec->nextDeleted();
-
- const bool foundCycle = !seenLocs.insert(actualLoc).second;
- invariant(!foundCycle);
-
- actualLoc = actualDrec->nextDeleted();
- }
-
- // Only print bucket 0 in capped collections since it contains all deleted records
- if (md->isCapped())
- break;
- }
- log() << " *** END ACTUAL DELETED RECORD LIST *** ";
-}
-}
-
-void initializeV1RS(OperationContext* opCtx,
- const LocAndSize* records,
- const LocAndSize* drecs,
- const LocAndSize* legacyGrabBag,
- DummyExtentManager* em,
- DummyRecordStoreV1MetaData* md) {
- invariant(records || drecs); // if both are NULL nothing is being created...
-
- // Need to start with a blank slate
- invariant(em->numFiles() == 0);
- invariant(md->firstExtent(opCtx).isNull());
-
- // pre-allocate extents (even extents that aren't part of this RS)
- {
- typedef std::map<int, size_t> ExtentSizes;
- ExtentSizes extentSizes;
- accumulateExtentSizeRequirements(records, &extentSizes);
- accumulateExtentSizeRequirements(drecs, &extentSizes);
- accumulateExtentSizeRequirements(legacyGrabBag, &extentSizes);
- invariant(!extentSizes.empty());
-
- const int maxExtent = extentSizes.rbegin()->first;
- for (int i = 0; i <= maxExtent; i++) {
- const size_t size = extentSizes.count(i) ? extentSizes[i] : 0;
- const DiskLoc loc = em->allocateExtent(opCtx, md->isCapped(), size, 0);
-
- // This function and assertState depend on these details of DummyExtentManager
- invariant(loc.a() == i);
- invariant(loc.getOfs() == 0);
- }
-
- // link together extents that should be part of this RS
- md->setFirstExtent(opCtx, DiskLoc(extentSizes.begin()->first, 0));
- md->setLastExtent(opCtx, DiskLoc(extentSizes.rbegin()->first, 0));
- for (ExtentSizes::iterator it = extentSizes.begin(); boost::next(it) != extentSizes.end();
- /* ++it */) {
- const int a = it->first;
- ++it;
- const int b = it->first;
- em->getExtent(DiskLoc(a, 0))->xnext = DiskLoc(b, 0);
- em->getExtent(DiskLoc(b, 0))->xprev = DiskLoc(a, 0);
- }
-
- // This signals "done allocating new extents".
- if (md->isCapped())
- md->setDeletedListEntry(opCtx, 1, DiskLoc());
- }
-
- if (records && !records[0].loc.isNull()) {
- int recIdx = 0;
- DiskLoc extLoc = md->firstExtent(opCtx);
- while (!extLoc.isNull()) {
- Extent* ext = em->getExtent(extLoc);
- int prevOfs = DiskLoc::NullOfs;
- while (extLoc.a() == records[recIdx].loc.a()) { // for all records in this extent
- const DiskLoc loc = records[recIdx].loc;
- const int size = records[recIdx].size;
- ;
- invariant(size >= MmapV1RecordHeader::HeaderSize);
-
- md->incrementStats(opCtx, size - MmapV1RecordHeader::HeaderSize, 1);
-
- if (ext->firstRecord.isNull())
- ext->firstRecord = loc;
-
- MmapV1RecordHeader* rec = em->recordForV1(loc);
- rec->lengthWithHeaders() = size;
- rec->extentOfs() = 0;
-
- rec->prevOfs() = prevOfs;
- prevOfs = loc.getOfs();
-
- const DiskLoc nextLoc = records[recIdx + 1].loc;
- if (nextLoc.a() == loc.a()) { // if next is in same extent
- rec->nextOfs() = nextLoc.getOfs();
- } else {
- rec->nextOfs() = DiskLoc::NullOfs;
- ext->lastRecord = loc;
- }
-
- recIdx++;
- }
- extLoc = ext->xnext;
- }
- invariant(records[recIdx].loc.isNull());
- }
-
- if (drecs && !drecs[0].loc.isNull()) {
- int drecIdx = 0;
- DiskLoc* prevNextPtr = NULL;
- int lastBucket = -1;
- while (!drecs[drecIdx].loc.isNull()) {
- const DiskLoc loc = drecs[drecIdx].loc;
- const int size = drecs[drecIdx].size;
- invariant(size >= MmapV1RecordHeader::HeaderSize);
- const int bucket = RecordStoreV1Base::bucket(size);
-
- if (md->isCapped()) {
- // All drecs form a single list in bucket 0
- if (prevNextPtr == NULL) {
- md->setDeletedListEntry(opCtx, 0, loc);
- } else {
- *prevNextPtr = loc;
- }
-
- if (loc.a() < md->capExtent().a() &&
- drecs[drecIdx + 1].loc.a() == md->capExtent().a()) {
- // Bucket 1 is known as cappedLastDelRecLastExtent
- md->setDeletedListEntry(opCtx, 1, loc);
- }
- } else if (bucket != lastBucket) {
- invariant(bucket > lastBucket); // if this fails, drecs weren't sorted by bucket
- md->setDeletedListEntry(opCtx, bucket, loc);
- lastBucket = bucket;
- } else {
- *prevNextPtr = loc;
- }
-
- DeletedRecord* drec = &em->recordForV1(loc)->asDeleted();
- drec->lengthWithHeaders() = size;
- drec->extentOfs() = 0;
- drec->nextDeleted() = DiskLoc();
- prevNextPtr = &drec->nextDeleted();
-
- drecIdx++;
- }
- }
-
- if (legacyGrabBag && !legacyGrabBag[0].loc.isNull()) {
- invariant(!md->isCapped()); // capped should have an empty legacy grab bag.
-
- int grabBagIdx = 0;
- DiskLoc* prevNextPtr = NULL;
- while (!legacyGrabBag[grabBagIdx].loc.isNull()) {
- const DiskLoc loc = legacyGrabBag[grabBagIdx].loc;
- const int size = legacyGrabBag[grabBagIdx].size;
- invariant(size >= MmapV1RecordHeader::HeaderSize);
-
- if (grabBagIdx == 0) {
- md->setDeletedListLegacyGrabBag(opCtx, loc);
- } else {
- *prevNextPtr = loc;
- }
-
- DeletedRecord* drec = &em->recordForV1(loc)->asDeleted();
- drec->lengthWithHeaders() = size;
- drec->extentOfs() = 0;
- drec->nextDeleted() = DiskLoc();
- prevNextPtr = &drec->nextDeleted();
-
- grabBagIdx++;
- }
- }
-
- // Make sure we set everything up as requested.
- assertStateV1RS(opCtx, records, drecs, legacyGrabBag, em, md);
-}
-
-void assertStateV1RS(OperationContext* opCtx,
- const LocAndSize* records,
- const LocAndSize* drecs,
- const LocAndSize* legacyGrabBag,
- const ExtentManager* em,
- const DummyRecordStoreV1MetaData* md) {
- invariant(records || drecs); // if both are NULL nothing is being asserted...
-
- try {
- if (records) {
- long long dataSize = 0;
- long long numRecs = 0;
-
- int recIdx = 0;
-
- DiskLoc extLoc = md->firstExtent(opCtx);
- while (!extLoc.isNull()) { // for each Extent
- Extent* ext = em->getExtent(extLoc, true);
- int expectedPrevOfs = DiskLoc::NullOfs;
- DiskLoc actualLoc = ext->firstRecord;
- while (!actualLoc.isNull()) { // for each MmapV1RecordHeader in this Extent
- const MmapV1RecordHeader* actualRec = em->recordForV1(actualLoc);
- const int actualSize = actualRec->lengthWithHeaders();
-
- dataSize += actualSize - MmapV1RecordHeader::HeaderSize;
- numRecs += 1;
-
- ASSERT_EQUALS(actualLoc, records[recIdx].loc);
- ASSERT_EQUALS(actualSize, records[recIdx].size);
-
- ASSERT_EQUALS(actualRec->extentOfs(), extLoc.getOfs());
- ASSERT_EQUALS(actualRec->prevOfs(), expectedPrevOfs);
- expectedPrevOfs = actualLoc.getOfs();
-
- recIdx++;
- const int nextOfs = actualRec->nextOfs();
- actualLoc =
- (nextOfs == DiskLoc::NullOfs ? DiskLoc() : DiskLoc(actualLoc.a(), nextOfs));
- }
-
- if (ext->xnext.isNull()) {
- ASSERT_EQUALS(md->lastExtent(opCtx), extLoc);
- }
-
- extLoc = ext->xnext;
- }
-
- // both the expected and actual record lists must be done at this point
- ASSERT_EQUALS(records[recIdx].loc, DiskLoc());
-
- ASSERT_EQUALS(dataSize, md->dataSize());
- ASSERT_EQUALS(numRecs, md->numRecords());
- }
-
- if (drecs) {
- int drecIdx = 0;
- for (int bucketIdx = 0; bucketIdx < RecordStoreV1Base::Buckets; bucketIdx++) {
- DiskLoc actualLoc = md->deletedListEntry(bucketIdx);
-
- if (md->isCapped() && bucketIdx == 1) {
- // In capped collections, the 2nd bucket (index 1) points to the drec before
- // the first drec in the capExtent. If the capExtent is the first Extent,
- // it should be Null.
-
- if (md->capExtent() == md->firstExtent(opCtx)) {
- ASSERT_EQUALS(actualLoc, DiskLoc());
- } else {
- ASSERT_NOT_EQUALS(actualLoc.a(), md->capExtent().a());
- const DeletedRecord* actualDrec = &em->recordForV1(actualLoc)->asDeleted();
- ASSERT_EQUALS(actualDrec->nextDeleted().a(), md->capExtent().a());
- }
-
- // Don't do normal checking of bucket 1 in capped collections. Checking
- // other buckets to verify that they are Null.
- continue;
- }
-
- while (!actualLoc.isNull()) {
- const DeletedRecord* actualDrec = &em->recordForV1(actualLoc)->asDeleted();
- const int actualSize = actualDrec->lengthWithHeaders();
-
- ASSERT_EQUALS(actualLoc, drecs[drecIdx].loc);
- ASSERT_EQUALS(actualSize, drecs[drecIdx].size);
-
- // Make sure the drec is correct
- ASSERT_EQUALS(actualDrec->extentOfs(), 0);
-
- // in capped collections all drecs are linked into a single list in bucket 0
- ASSERT_EQUALS(bucketIdx,
- md->isCapped() ? 0 : RecordStoreV1Base::bucket(actualSize));
-
- drecIdx++;
- actualLoc = actualDrec->nextDeleted();
- }
- }
- // both the expected and actual deleted lists must be done at this point
- ASSERT_EQUALS(drecs[drecIdx].loc, DiskLoc());
- }
-
- if (legacyGrabBag) {
- int grabBagIdx = 0;
- DiskLoc actualLoc = md->deletedListLegacyGrabBag();
- while (!actualLoc.isNull()) {
- const DeletedRecord* actualDrec = &em->recordForV1(actualLoc)->asDeleted();
- const int actualSize = actualDrec->lengthWithHeaders();
-
- ASSERT_EQUALS(actualLoc, legacyGrabBag[grabBagIdx].loc);
- ASSERT_EQUALS(actualSize, legacyGrabBag[grabBagIdx].size);
-
- grabBagIdx++;
- actualLoc = actualDrec->nextDeleted();
- }
-
- // both the expected and actual deleted lists must be done at this point
- ASSERT_EQUALS(legacyGrabBag[grabBagIdx].loc, DiskLoc());
- } else {
- // Unless a test is actually using the grabBag it should be empty
- ASSERT_EQUALS(md->deletedListLegacyGrabBag(), DiskLoc());
- }
- } catch (...) {
- // If a test fails, provide extra info to make debugging easier
- printRecList(opCtx, em, md);
- printDRecList(em, md);
- throw;
- }
-}
-}
diff --git a/src/mongo/db/storage/mmap_v1/record_store_v1_test_help.h b/src/mongo/db/storage/mmap_v1/record_store_v1_test_help.h
deleted file mode 100644
index c9af1e5cc36..00000000000
--- a/src/mongo/db/storage/mmap_v1/record_store_v1_test_help.h
+++ /dev/null
@@ -1,211 +0,0 @@
-// record_store_v1_test_help.h
-
-/**
-* Copyright (C) 2014 MongoDB Inc.
-*
-* This program is free software: you can redistribute it and/or modify
-* it under the terms of the GNU Affero General Public License, version 3,
-* as published by the Free Software Foundation.
-*
-* This program is distributed in the hope that it will be useful,
-* but WITHOUT ANY WARRANTY; without even the implied warranty of
-* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-* GNU Affero General Public License for more details.
-*
-* You should have received a copy of the GNU Affero General Public License
-* along with this program. If not, see <http://www.gnu.org/licenses/>.
-*
-* As a special exception, the copyright holders give permission to link the
-* code of portions of this program with the OpenSSL library under certain
-* conditions as described in each individual source file and distribute
-* linked combinations including the program with the OpenSSL library. You
-* must comply with the GNU Affero General Public License in all respects for
-* all of the code used other than as permitted herein. If you modify file(s)
-* with this exception, you may extend this exception to your version of the
-* file(s), but you are not obligated to do so. If you do not wish to do so,
-* delete this exception statement from your version. If you delete this
-* exception statement from all source files in the program, then also delete
-* it in the license file.
-*/
-
-#pragma once
-
-#include <vector>
-
-#include "mongo/db/storage/mmap_v1/data_file.h"
-#include "mongo/db/storage/mmap_v1/extent_manager.h"
-#include "mongo/db/storage/mmap_v1/record_store_v1_base.h"
-
-namespace mongo {
-
-class DummyRecordStoreV1MetaData : public RecordStoreV1MetaData {
-public:
- DummyRecordStoreV1MetaData(bool capped, int userFlags);
- virtual ~DummyRecordStoreV1MetaData() {}
-
- virtual const DiskLoc& capExtent() const;
- virtual void setCapExtent(OperationContext* opCtx, const DiskLoc& loc);
-
- virtual const DiskLoc& capFirstNewRecord() const;
- virtual void setCapFirstNewRecord(OperationContext* opCtx, const DiskLoc& loc);
-
- virtual long long dataSize() const;
- virtual long long numRecords() const;
-
- virtual void incrementStats(OperationContext* opCtx,
- long long dataSizeIncrement,
- long long numRecordsIncrement);
-
- virtual void setStats(OperationContext* opCtx, long long dataSize, long long numRecords);
-
- virtual DiskLoc deletedListEntry(int bucket) const;
- virtual void setDeletedListEntry(OperationContext* opCtx, int bucket, const DiskLoc& loc);
-
- virtual DiskLoc deletedListLegacyGrabBag() const;
- virtual void setDeletedListLegacyGrabBag(OperationContext* opCtx, const DiskLoc& loc);
-
- virtual void orphanDeletedList(OperationContext* opCtx);
-
- virtual const DiskLoc& firstExtent(OperationContext* opCtx) const;
- virtual void setFirstExtent(OperationContext* opCtx, const DiskLoc& loc);
-
- virtual const DiskLoc& lastExtent(OperationContext* opCtx) const;
- virtual void setLastExtent(OperationContext* opCtx, const DiskLoc& loc);
-
- virtual bool isCapped() const;
-
- virtual bool isUserFlagSet(int flag) const;
- virtual int userFlags() const {
- return _userFlags;
- }
- virtual bool setUserFlag(OperationContext* opCtx, int flag);
- virtual bool clearUserFlag(OperationContext* opCtx, int flag);
- virtual bool replaceUserFlags(OperationContext* opCtx, int flags);
-
-
- virtual int lastExtentSize(OperationContext* opCtx) const;
- virtual void setLastExtentSize(OperationContext* opCtx, int newMax);
-
- virtual long long maxCappedDocs() const;
-
-protected:
- DiskLoc _capExtent;
- DiskLoc _capFirstNewRecord;
-
- long long _dataSize;
- long long _numRecords;
-
- DiskLoc _firstExtent;
- DiskLoc _lastExtent;
-
- bool _capped;
- int _userFlags;
- long long _maxCappedDocs;
-
- int _lastExtentSize;
- double _paddingFactor;
-
- std::vector<DiskLoc> _deletedLists;
- DiskLoc _deletedListLegacyGrabBag;
-};
-
-class DummyExtentManager : public ExtentManager {
-public:
- virtual ~DummyExtentManager();
-
- virtual void close(OperationContext* opCtx);
-
- virtual Status init(OperationContext* opCtx);
-
- virtual int numFiles() const;
- virtual long long fileSize() const;
-
- virtual DiskLoc allocateExtent(OperationContext* opCtx,
- bool capped,
- int size,
- bool enforceQuota);
-
- virtual void freeExtents(OperationContext* opCtx, DiskLoc firstExt, DiskLoc lastExt);
-
- virtual void freeExtent(OperationContext* opCtx, DiskLoc extent);
-
- virtual void freeListStats(OperationContext* opCtx,
- int* numExtents,
- int64_t* totalFreeSizeBytes) const;
-
- virtual MmapV1RecordHeader* recordForV1(const DiskLoc& loc) const;
-
- virtual std::unique_ptr<RecordFetcher> recordNeedsFetch(const DiskLoc& loc) const final;
-
- virtual Extent* extentForV1(const DiskLoc& loc) const;
-
- virtual DiskLoc extentLocForV1(const DiskLoc& loc) const;
-
- virtual Extent* getExtent(const DiskLoc& loc, bool doSanityCheck = true) const;
-
- virtual int maxSize() const;
-
- virtual CacheHint* cacheHint(const DiskLoc& extentLoc, const HintType& hint);
-
- DataFileVersion getFileFormat(OperationContext* opCtx) const final;
-
- virtual void setFileFormat(OperationContext* opCtx, DataFileVersion newVersion) final;
-
- const DataFile* getOpenFile(int n) const final;
-
-
-protected:
- struct ExtentInfo {
- char* data;
- size_t length;
- };
-
- std::vector<ExtentInfo> _extents;
-};
-
-struct LocAndSize {
- DiskLoc loc;
- int size; // with headers
-};
-
-/**
- * Creates a V1 storage/mmap_v1 with the passed in records and DeletedRecords (drecs).
- *
- * List of LocAndSize are terminated by a Null DiskLoc. Passing a NULL pointer is shorthand for
- * an empty list. Each extent gets it's own DiskLoc file number. DiskLoc Offsets must be > 1000.
- *
- * records must be sorted by extent/file. offsets within an extent can be in any order.
- *
- * In a simple RS, drecs must be grouped into size-buckets, but the ordering within the size
- * buckets is up to you.
- *
- * In a capped collection, all drecs form a single list and must be grouped by extent, with each
- * extent having at least one drec. capFirstNewRecord() and capExtent() *must* be correctly set
- * on md before calling.
- *
- * You are responsible for ensuring the records and drecs don't overlap.
- *
- * ExtentManager and MetaData must both be empty.
- */
-void initializeV1RS(OperationContext* opCtx,
- const LocAndSize* records,
- const LocAndSize* drecs,
- const LocAndSize* legacyGrabBag,
- DummyExtentManager* em,
- DummyRecordStoreV1MetaData* md);
-
-/**
- * Asserts that the V1RecordStore defined by md has the passed in records and drecs in the
- * correct order.
- *
- * List of LocAndSize are terminated by a Null DiskLoc. Passing a NULL pointer means don't check
- * that list.
- */
-void assertStateV1RS(OperationContext* opCtx,
- const LocAndSize* records,
- const LocAndSize* drecs,
- const LocAndSize* legacyGrabBag,
- const ExtentManager* em,
- const DummyRecordStoreV1MetaData* md);
-
-} // namespace mongo
diff --git a/src/mongo/db/storage/mmap_v1/repair_database.cpp b/src/mongo/db/storage/mmap_v1/repair_database.cpp
deleted file mode 100644
index 416ff14063e..00000000000
--- a/src/mongo/db/storage/mmap_v1/repair_database.cpp
+++ /dev/null
@@ -1,499 +0,0 @@
-// repair_database.cpp
-
-/**
-* Copyright (C) 2014 MongoDB Inc.
-*
-* This program is free software: you can redistribute it and/or modify
-* it under the terms of the GNU Affero General Public License, version 3,
-* as published by the Free Software Foundation.
-*
-* This program is distributed in the hope that it will be useful,
-* but WITHOUT ANY WARRANTY; without even the implied warranty of
-* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-* GNU Affero General Public License for more details.
-*
-* You should have received a copy of the GNU Affero General Public License
-* along with this program. If not, see <http://www.gnu.org/licenses/>.
-*
-* As a special exception, the copyright holders give permission to link the
-* code of portions of this program with the OpenSSL library under certain
-* conditions as described in each individual source file and distribute
-* linked combinations including the program with the OpenSSL library. You
-* must comply with the GNU Affero General Public License in all respects for
-* all of the code used other than as permitted herein. If you modify file(s)
-* with this exception, you may extend this exception to your version of the
-* file(s), but you are not obligated to do so. If you do not wish to do so,
-* delete this exception statement from your version. If you delete this
-* exception statement from all source files in the program, then also delete
-* it in the license file.
-*/
-
-#define MONGO_LOG_DEFAULT_COMPONENT ::mongo::logger::LogComponent::kStorage
-
-#include "mongo/platform/basic.h"
-
-#include "mongo/db/storage/mmap_v1/mmap_v1_engine.h"
-
-#include <boost/filesystem/operations.hpp>
-
-#include "mongo/db/background.h"
-#include "mongo/db/catalog/collection.h"
-#include "mongo/db/catalog/database.h"
-#include "mongo/db/catalog/database_holder.h"
-#include "mongo/db/catalog/index_create.h"
-#include "mongo/db/catalog/uuid_catalog.h"
-#include "mongo/db/client.h"
-#include "mongo/db/db_raii.h"
-#include "mongo/db/index/index_descriptor.h"
-#include "mongo/db/storage/mmap_v1/dur.h"
-#include "mongo/db/storage/mmap_v1/file_allocator.h"
-#include "mongo/db/storage/mmap_v1/mmap.h"
-#include "mongo/db/storage/mmap_v1/mmap_v1_database_catalog_entry.h"
-#include "mongo/db/storage/mmap_v1/mmap_v1_options.h"
-#include "mongo/db/storage/mmap_v1/repair_database_interface.h"
-#include "mongo/util/file.h"
-#include "mongo/util/log.h"
-#include "mongo/util/scopeguard.h"
-
-namespace mongo {
-
-using std::unique_ptr;
-using std::endl;
-using std::map;
-using std::string;
-using std::stringstream;
-using std::vector;
-
-typedef boost::filesystem::path Path;
-
-// inheritable class to implement an operation that may be applied to all
-// files in a database using _applyOpToDataFiles()
-class FileOp {
-public:
- virtual ~FileOp() {}
- // Return true if file exists and operation successful
- virtual bool apply(const boost::filesystem::path& p) = 0;
- virtual const char* op() const = 0;
-};
-
-void _applyOpToDataFiles(const string& database,
- FileOp& fo,
- bool afterAllocator = false,
- const string& path = storageGlobalParams.dbpath);
-
-void _deleteDataFiles(const std::string& database) {
- if (storageGlobalParams.directoryperdb) {
- FileAllocator::get()->waitUntilFinished();
- MONGO_ASSERT_ON_EXCEPTION_WITH_MSG(
- boost::filesystem::remove_all(boost::filesystem::path(storageGlobalParams.dbpath) /
- database),
- "delete data files with a directoryperdb");
- return;
- }
- class : public FileOp {
- virtual bool apply(const boost::filesystem::path& p) {
- return boost::filesystem::remove(p);
- }
- virtual const char* op() const {
- return "remove";
- }
- } deleter;
- _applyOpToDataFiles(database, deleter, true);
-}
-
-void boostRenameWrapper(const Path& from, const Path& to) {
- try {
- boost::filesystem::rename(from, to);
- } catch (const boost::filesystem::filesystem_error&) {
- // boost rename doesn't work across partitions
- boost::filesystem::copy_file(from, to);
- boost::filesystem::remove(from);
- }
-}
-
-// back up original database files to 'temp' dir
-void _renameForBackup(const std::string& database, const Path& reservedPath) {
- Path newPath(reservedPath);
- if (storageGlobalParams.directoryperdb)
- newPath /= database;
- class Renamer : public FileOp {
- public:
- Renamer(const Path& newPath) : newPath_(newPath) {}
-
- private:
- const boost::filesystem::path& newPath_;
- virtual bool apply(const Path& p) {
- if (!boost::filesystem::exists(p))
- return false;
- boostRenameWrapper(p, newPath_ / (p.leaf().string() + ".bak"));
- return true;
- }
- virtual const char* op() const {
- return "renaming";
- }
- } renamer(newPath);
- _applyOpToDataFiles(database, renamer, true);
-}
-
-intmax_t dbSize(const string& database) {
- class SizeAccumulator : public FileOp {
- public:
- SizeAccumulator() : totalSize_(0) {}
- intmax_t size() const {
- return totalSize_;
- }
-
- private:
- virtual bool apply(const boost::filesystem::path& p) {
- if (!boost::filesystem::exists(p))
- return false;
- totalSize_ += boost::filesystem::file_size(p);
- return true;
- }
- virtual const char* op() const {
- return "checking size";
- }
- intmax_t totalSize_;
- };
- SizeAccumulator sa;
- _applyOpToDataFiles(database, sa);
- return sa.size();
-}
-
-// move temp files to standard data dir
-void _replaceWithRecovered(const string& database, const char* reservedPathString) {
- Path newPath(storageGlobalParams.dbpath);
- if (storageGlobalParams.directoryperdb)
- newPath /= database;
- class Replacer : public FileOp {
- public:
- Replacer(const Path& newPath) : newPath_(newPath) {}
-
- private:
- const boost::filesystem::path& newPath_;
- virtual bool apply(const Path& p) {
- if (!boost::filesystem::exists(p))
- return false;
- boostRenameWrapper(p, newPath_ / p.leaf());
- return true;
- }
- virtual const char* op() const {
- return "renaming";
- }
- } replacer(newPath);
- _applyOpToDataFiles(database, replacer, true, reservedPathString);
-}
-
-// generate a directory name for storing temp data files
-Path uniqueReservedPath(const char* prefix) {
- Path repairPath = Path(storageGlobalParams.repairpath);
- Path reservedPath;
- int i = 0;
- bool exists = false;
- do {
- stringstream ss;
- ss << prefix << "_repairDatabase_" << i++;
- reservedPath = repairPath / ss.str();
- MONGO_ASSERT_ON_EXCEPTION(exists = boost::filesystem::exists(reservedPath));
- } while (exists);
- return reservedPath;
-}
-
-void _applyOpToDataFiles(const string& database,
- FileOp& fo,
- bool afterAllocator,
- const string& path) {
- if (afterAllocator)
- FileAllocator::get()->waitUntilFinished();
- string c = database;
- c += '.';
- boost::filesystem::path p(path);
- if (storageGlobalParams.directoryperdb)
- p /= database;
- boost::filesystem::path q;
- q = p / (c + "ns");
- bool ok = false;
- MONGO_ASSERT_ON_EXCEPTION(ok = fo.apply(q));
- if (ok) {
- LOG(2) << fo.op() << " file " << q.string() << endl;
- }
- int i = 0;
- int extra = 10; // should not be necessary, this is defensive in case there are missing files
- while (1) {
- verify(i <= DiskLoc::MaxFiles);
- stringstream ss;
- ss << c << i;
- q = p / ss.str();
- MONGO_ASSERT_ON_EXCEPTION(ok = fo.apply(q));
- if (ok) {
- if (extra != 10) {
- LOG(1) << fo.op() << " file " << q.string() << endl;
- log() << " _applyOpToDataFiles() warning: extra == " << extra << endl;
- }
- } else if (--extra <= 0)
- break;
- i++;
- }
-}
-
-class RepairFileDeleter {
-public:
- RepairFileDeleter(OperationContext* opCtx,
- const string& dbName,
- const string& pathString,
- const Path& path)
- : _opCtx(opCtx), _dbName(dbName), _pathString(pathString), _path(path), _success(false) {}
-
- ~RepairFileDeleter() {
- if (_success)
- return;
-
- log() << "cleaning up failed repair "
- << "db: " << _dbName << " path: " << _pathString;
-
- try {
- getDur().syncDataAndTruncateJournal(_opCtx);
-
- // need both in case journaling is disabled
- MongoFile::flushAll(_opCtx, true);
-
- MONGO_ASSERT_ON_EXCEPTION(boost::filesystem::remove_all(_path));
- } catch (DBException& e) {
- error() << "RepairFileDeleter failed to cleanup: " << redact(e);
- error() << "aborting";
- fassertFailed(17402);
- }
- }
-
- void success() {
- _success = true;
- }
-
-private:
- OperationContext* _opCtx;
- string _dbName;
- string _pathString;
- Path _path;
- bool _success;
-};
-
-Status MMAPV1Engine::repairDatabase(OperationContext* opCtx,
- const std::string& dbName,
- bool preserveClonedFilesOnFailure,
- bool backupOriginalFiles) {
- unique_ptr<RepairFileDeleter> repairFileDeleter;
-
- // Must be done before and after repair
- getDur().syncDataAndTruncateJournal(opCtx);
-
- intmax_t totalSize = dbSize(dbName);
- intmax_t freeSize = File::freeSpace(storageGlobalParams.repairpath);
-
- if (freeSize > -1 && freeSize < totalSize) {
- return Status(ErrorCodes::OutOfDiskSpace,
- str::stream() << "Cannot repair database " << dbName << " having size: "
- << totalSize
- << " (bytes) because free disk space is: "
- << freeSize
- << " (bytes)");
- }
-
- opCtx->checkForInterrupt();
-
- Path reservedPath = uniqueReservedPath(
- (preserveClonedFilesOnFailure || backupOriginalFiles) ? "backup" : "_tmp");
- bool created = false;
- MONGO_ASSERT_ON_EXCEPTION(created = boost::filesystem::create_directory(reservedPath));
- invariant(created);
- string reservedPathString = reservedPath.string();
-
- if (!preserveClonedFilesOnFailure)
- repairFileDeleter.reset(
- new RepairFileDeleter(opCtx, dbName, reservedPathString, reservedPath));
-
- {
- Database* originalDatabase = DatabaseHolder::getDatabaseHolder().openDb(opCtx, dbName);
- if (originalDatabase == NULL) {
- return Status(ErrorCodes::NamespaceNotFound, "database does not exist to repair");
- }
-
- unique_ptr<MMAPV1DatabaseCatalogEntry> dbEntry;
- unique_ptr<Database> tempDatabase;
-
- // Must call this before MMAPV1DatabaseCatalogEntry's destructor closes the DB files
- ON_BLOCK_EXIT([&dbEntry, &opCtx, &tempDatabase] {
- getDur().syncDataAndTruncateJournal(opCtx);
- UUIDCatalog::get(opCtx).onCloseDatabase(tempDatabase.get());
- dbEntry->close(opCtx);
- });
-
- {
- dbEntry.reset(new MMAPV1DatabaseCatalogEntry(
- opCtx,
- dbName,
- reservedPathString,
- storageGlobalParams.directoryperdb,
- true,
- _extentManagerFactory->create(
- dbName, reservedPathString, storageGlobalParams.directoryperdb)));
- tempDatabase.reset(new Database(opCtx, dbName, dbEntry.get()));
- }
-
- map<string, CollectionOptions> namespacesToCopy;
- {
- NamespaceString nss(dbName, "system.namespaces");
- OldClientContext ctx(opCtx, nss.ns());
- Collection* coll = originalDatabase->getCollection(opCtx, nss);
- if (coll) {
- auto cursor = coll->getCursor(opCtx);
- while (auto record = cursor->next()) {
- BSONObj obj = record->data.releaseToBson();
-
- string ns = obj["name"].String();
-
- NamespaceString nss(ns);
- if (nss.isSystem()) {
- if (nss.isSystemDotIndexes())
- continue;
- if (nss.coll() == "system.namespaces")
- continue;
- }
-
- if (!nss.isNormal())
- continue;
-
- CollectionOptions options;
- if (obj["options"].isABSONObj()) {
- Status status =
- options.parse(obj["options"].Obj(), CollectionOptions::parseForStorage);
- if (!status.isOK())
- return status;
- }
- namespacesToCopy[ns] = options;
- }
- }
- }
-
- for (map<string, CollectionOptions>::const_iterator i = namespacesToCopy.begin();
- i != namespacesToCopy.end();
- ++i) {
- string ns = i->first;
- NamespaceString nss(ns);
- CollectionOptions options = i->second;
-
- Collection* tempCollection = NULL;
- {
- WriteUnitOfWork wunit(opCtx);
- if (options.uuid) {
- UUIDCatalog::get(opCtx).onDropCollection(opCtx, options.uuid.get());
- }
- tempCollection = tempDatabase->createCollection(opCtx, ns, options, false);
- wunit.commit();
- }
-
- OldClientContext readContext(opCtx, ns, originalDatabase);
- Collection* originalCollection = originalDatabase->getCollection(opCtx, nss);
- invariant(originalCollection);
-
- // data
-
- // TODO SERVER-14812 add a mode that drops duplicates rather than failing
- MultiIndexBlock indexer(opCtx, tempCollection);
- {
- vector<BSONObj> indexes;
- IndexCatalog::IndexIterator ii =
- originalCollection->getIndexCatalog()->getIndexIterator(opCtx, false);
- while (ii.more()) {
- IndexDescriptor* desc = ii.next();
- indexes.push_back(desc->infoObj());
- }
-
- Status status = indexer.init(indexes).getStatus();
- if (!status.isOK()) {
- return status;
- }
- }
-
- std::vector<MultiIndexBlock*> indexers{&indexer};
- auto cursor = originalCollection->getCursor(opCtx);
- while (auto record = cursor->next()) {
- BSONObj doc = record->data.releaseToBson();
-
- WriteUnitOfWork wunit(opCtx);
- Status status = tempCollection->insertDocument(opCtx, doc, indexers, false);
- if (!status.isOK())
- return status;
-
- wunit.commit();
- opCtx->checkForInterrupt();
- }
-
- Status status = indexer.doneInserting();
- if (!status.isOK())
- return status;
-
- {
- WriteUnitOfWork wunit(opCtx);
- indexer.commit();
- wunit.commit();
- }
- }
-
- getDur().syncDataAndTruncateJournal(opCtx);
-
- // need both in case journaling is disabled
- MongoFile::flushAll(opCtx, true);
-
- opCtx->checkForInterrupt();
- }
-
- // at this point if we abort, we don't want to delete new files
- // as they might be the only copies
-
- if (repairFileDeleter.get())
- repairFileDeleter->success();
-
- // Close the database so we can rename/delete the original data files
- DatabaseHolder::getDatabaseHolder().close(opCtx, dbName, "database closed for repair");
-
- if (backupOriginalFiles) {
- _renameForBackup(dbName, reservedPath);
- } else {
- // first make new directory before deleting data
- Path newDir = Path(storageGlobalParams.dbpath) / dbName;
- MONGO_ASSERT_ON_EXCEPTION(boost::filesystem::create_directory(newDir));
-
- // this deletes old files
- _deleteDataFiles(dbName);
-
- if (!boost::filesystem::exists(newDir)) {
- // we deleted because of directoryperdb
- // re-create
- MONGO_ASSERT_ON_EXCEPTION(boost::filesystem::create_directory(newDir));
- }
- }
-
- _replaceWithRecovered(dbName, reservedPathString.c_str());
-
- if (!backupOriginalFiles) {
- MONGO_ASSERT_ON_EXCEPTION(boost::filesystem::remove_all(reservedPath));
- }
-
- // Reopen the database so it's discoverable
- DatabaseHolder::getDatabaseHolder().openDb(opCtx, dbName);
-
- return Status::OK();
-}
-
-MONGO_INITIALIZER(RepairDatabaseMMapV1)(InitializerContext* context) {
- setRepairDatabaseMmapv1Impl([](StorageEngine* engine,
- OperationContext* opCtx,
- const std::string& dbName,
- bool preserveClonedFilesOnFailure,
- bool backupOriginalFiles) {
- return static_cast<MMAPV1Engine*>(engine)->repairDatabase(
- opCtx, dbName, preserveClonedFilesOnFailure, backupOriginalFiles);
- });
- return Status::OK();
-}
-}
diff --git a/src/mongo/db/storage/mmap_v1/repair_database_interface.cpp b/src/mongo/db/storage/mmap_v1/repair_database_interface.cpp
deleted file mode 100644
index 6988cf76b66..00000000000
--- a/src/mongo/db/storage/mmap_v1/repair_database_interface.cpp
+++ /dev/null
@@ -1,51 +0,0 @@
-/**
- * Copyright (C) 2018 MongoDB Inc.
- *
- * This program is free software: you can redistribute it and/or modify
- * it under the terms of the GNU Affero General Public License, version 3,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU Affero General Public License for more details.
- *
- * You should have received a copy of the GNU Affero General Public License
- * along with this program. If not, see <http://www.gnu.org/licenses/>.
- *
- * As a special exception, the copyright holders give permission to link the
- * code of portions of this program with the OpenSSL library under certain
- * conditions as described in each individual source file and distribute
- * linked combinations including the program with the OpenSSL library. You
- * must comply with the GNU Affero General Public License in all respects for
- * all of the code used other than as permitted herein. If you modify file(s)
- * with this exception, you may extend this exception to your version of the
- * file(s), but you are not obligated to do so. If you do not wish to do so,
- * delete this exception statement from your version. If you delete this
- * exception statement from all source files in the program, then also delete
- * it in the license file.
- */
-
-#include "mongo/db/storage/mmap_v1/repair_database_interface.h"
-
-namespace mongo {
-namespace {
-stdx::function<Status(StorageEngine*, OperationContext*, const std::string&, bool, bool)>
- repairDatabaseMmapv1Func;
-} // namespace
-
-void setRepairDatabaseMmapv1Impl(
- stdx::function<Status(StorageEngine* engine, OperationContext*, const std::string&, bool, bool)>
- impl) {
- repairDatabaseMmapv1Func = std::move(impl);
-}
-
-Status repairDatabaseMmapv1(StorageEngine* engine,
- OperationContext* opCtx,
- const std::string& dbName,
- bool preserveClonedFilesOnFailure,
- bool backupOriginalFiles) {
- return repairDatabaseMmapv1Func(
- engine, opCtx, dbName, preserveClonedFilesOnFailure, backupOriginalFiles);
-}
-} // namespace mongo
diff --git a/src/mongo/db/storage/mmap_v1/repair_database_interface.h b/src/mongo/db/storage/mmap_v1/repair_database_interface.h
deleted file mode 100644
index c9fab68cc81..00000000000
--- a/src/mongo/db/storage/mmap_v1/repair_database_interface.h
+++ /dev/null
@@ -1,46 +0,0 @@
-/**
- * Copyright (C) 2018 MongoDB Inc.
- *
- * This program is free software: you can redistribute it and/or modify
- * it under the terms of the GNU Affero General Public License, version 3,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU Affero General Public License for more details.
- *
- * You should have received a copy of the GNU Affero General Public License
- * along with this program. If not, see <http://www.gnu.org/licenses/>.
- *
- * As a special exception, the copyright holders give permission to link the
- * code of portions of this program with the OpenSSL library under certain
- * conditions as described in each individual source file and distribute
- * linked combinations including the program with the OpenSSL library. You
- * must comply with the GNU Affero General Public License in all respects for
- * all of the code used other than as permitted herein. If you modify file(s)
- * with this exception, you may extend this exception to your version of the
- * file(s), but you are not obligated to do so. If you do not wish to do so,
- * delete this exception statement from your version. If you delete this
- * exception statement from all source files in the program, then also delete
- * it in the license file.
- */
-
-#pragma once
-
-#include "mongo/base/status.h"
-#include "mongo/stdx/functional.h"
-
-namespace mongo {
-class StorageEngine;
-class OperationContext;
-
-void setRepairDatabaseMmapv1Impl(
- stdx::function<Status(StorageEngine*, OperationContext*, std::string const&, bool, bool)> impl);
-
-Status repairDatabaseMmapv1(StorageEngine* engine,
- OperationContext* opCtx,
- const std::string& dbName,
- bool preserveClonedFilesOnFailure,
- bool backupOriginalFiles);
-} // namespace mongo
diff --git a/src/mongo/db/storage/mmap_v1/touch_pages.cpp b/src/mongo/db/storage/mmap_v1/touch_pages.cpp
deleted file mode 100644
index 7aedffe2fe3..00000000000
--- a/src/mongo/db/storage/mmap_v1/touch_pages.cpp
+++ /dev/null
@@ -1,42 +0,0 @@
-/* Copyright 2009 10gen Inc.
- *
- * This program is free software: you can redistribute it and/or modify
- * it under the terms of the GNU Affero General Public License, version 3,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU Affero General Public License for more details.
- *
- * You should have received a copy of the GNU Affero General Public License
- * along with this program. If not, see <http://www.gnu.org/licenses/>.
- *
- * As a special exception, the copyright holders give permission to link the
- * code of portions of this program with the OpenSSL library under certain
- * conditions as described in each individual source file and distribute
- * linked combinations including the program with the OpenSSL library. You
- * must comply with the GNU Affero General Public License in all respects
- * for all of the code used other than as permitted herein. If you modify
- * file(s) with this exception, you may extend this exception to your
- * version of the file(s), but you are not obligated to do so. If you do not
- * wish to do so, delete this exception statement from your version. If you
- * delete this exception statement from all source files in the program,
- * then also delete it in the license file.
- */
-
-#include "mongo/platform/basic.h"
-
-#include "mongo/db/storage/mmap_v1/touch_pages.h"
-
-namespace mongo {
-
-char _touch_pages_char_reader; // goes in .bss
-
-void touch_pages(const char* buf, size_t length, size_t pageSize) {
- // read first byte of every page, in order
- for (size_t i = 0; i < length; i += pageSize) {
- _touch_pages_char_reader += buf[i];
- }
-}
-}
diff --git a/src/mongo/db/storage/mmap_v1/touch_pages.h b/src/mongo/db/storage/mmap_v1/touch_pages.h
deleted file mode 100644
index c98b0e9a427..00000000000
--- a/src/mongo/db/storage/mmap_v1/touch_pages.h
+++ /dev/null
@@ -1,38 +0,0 @@
-/* Copyright 2009 10gen Inc.
- *
- * This program is free software: you can redistribute it and/or modify
- * it under the terms of the GNU Affero General Public License, version 3,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU Affero General Public License for more details.
- *
- * You should have received a copy of the GNU Affero General Public License
- * along with this program. If not, see <http://www.gnu.org/licenses/>.
- *
- * As a special exception, the copyright holders give permission to link the
- * code of portions of this program with the OpenSSL library under certain
- * conditions as described in each individual source file and distribute
- * linked combinations including the program with the OpenSSL library. You
- * must comply with the GNU Affero General Public License in all respects
- * for all of the code used other than as permitted herein. If you modify
- * file(s) with this exception, you may extend this exception to your
- * version of the file(s), but you are not obligated to do so. If you do not
- * wish to do so, delete this exception statement from your version. If you
- * delete this exception statement from all source files in the program,
- * then also delete it in the license file.
- */
-
-#pragma once
-
-#include <cstdlib>
-
-namespace mongo {
-
-/**
- * Touches a range of pages, to encourage the OS to get them into the buffer pool.
- */
-void touch_pages(const char* buf, size_t length, size_t pageSize = 4096);
-}
diff --git a/src/mongo/db/storage/mobile/mobile_recovery_unit.h b/src/mongo/db/storage/mobile/mobile_recovery_unit.h
index db008586f50..6f13edab943 100644
--- a/src/mongo/db/storage/mobile/mobile_recovery_unit.h
+++ b/src/mongo/db/storage/mobile/mobile_recovery_unit.h
@@ -62,12 +62,6 @@ public:
void registerChange(Change* change) override;
- void* writingPtr(void* data, size_t len) override {
- MONGO_UNREACHABLE;
- }
-
- void setRollbackWritesDisabled() override {}
-
SnapshotId getSnapshotId() const override {
return SnapshotId();
}
diff --git a/src/mongo/db/storage/recovery_unit.h b/src/mongo/db/storage/recovery_unit.h
index c4acb3935bb..7a12dfe136e 100644
--- a/src/mongo/db/storage/recovery_unit.h
+++ b/src/mongo/db/storage/recovery_unit.h
@@ -355,50 +355,6 @@ public:
registerChange(new OnCommitChange(std::move(callback)));
}
- //
- // The remaining methods probably belong on DurRecoveryUnit rather than on the interface.
- //
-
- /**
- * Declare that the data at [x, x + len) is being written.
- */
- virtual void* writingPtr(void* data, size_t len) = 0;
-
- //
- // Syntactic sugar
- //
-
- /**
- * Declare write intent for an int
- */
- inline int& writingInt(int& d) {
- return *writing(&d);
- }
-
- /**
- * A templated helper for writingPtr.
- */
- template <typename T>
- inline T* writing(T* x) {
- writingPtr(x, sizeof(T));
- return x;
- }
-
- /**
- * Sets a flag that declares this RecoveryUnit will skip rolling back writes, for the
- * duration of the current outermost WriteUnitOfWork. This function can only be called
- * between a pair of unnested beginUnitOfWork() / endUnitOfWork() calls.
- * The flag is cleared when endUnitOfWork() is called.
- * While the flag is set, rollback will skip rolling back writes, but custom rollback
- * change functions are still called. Clearly, this functionality should only be used when
- * writing to temporary collections that can be cleaned up externally. For example,
- * foreground index builds write to a temporary collection; if something goes wrong that
- * normally requires a rollback, we can instead clean up the index by dropping the entire
- * index.
- * Setting the flag may permit increased performance.
- */
- virtual void setRollbackWritesDisabled() = 0;
-
virtual void setOrderedCommit(bool orderedCommit) = 0;
protected:
diff --git a/src/mongo/db/storage/recovery_unit_noop.h b/src/mongo/db/storage/recovery_unit_noop.h
index 9713b6aa1ec..16916414b7e 100644
--- a/src/mongo/db/storage/recovery_unit_noop.h
+++ b/src/mongo/db/storage/recovery_unit_noop.h
@@ -71,11 +71,6 @@ public:
_changes.push_back(std::unique_ptr<Change>(change));
}
- virtual void* writingPtr(void* data, size_t len) {
- return data;
- }
- virtual void setRollbackWritesDisabled() {}
-
virtual SnapshotId getSnapshotId() const {
return SnapshotId();
}
diff --git a/src/mongo/db/storage/storage_engine_init.cpp b/src/mongo/db/storage/storage_engine_init.cpp
index 2c4a526e64b..c22e6915e52 100644
--- a/src/mongo/db/storage/storage_engine_init.cpp
+++ b/src/mongo/db/storage/storage_engine_init.cpp
@@ -126,12 +126,6 @@ void initializeStorageEngine(ServiceContext* service, const StorageEngineInitFla
log() << startupWarningsLog;
}
- const std::string repairpath = storageGlobalParams.repairpath;
- uassert(40311,
- str::stream() << "Cannot start server. The command line option '--repairpath'"
- << " is only supported by the mmapv1 storage engine",
- repairpath.empty() || repairpath == dbpath || storageGlobalParams.engine == "mmapv1");
-
const StorageEngine::Factory* factory =
getFactoryForStorageEngine(service, storageGlobalParams.engine);
diff --git a/src/mongo/db/storage/storage_engine_metadata.cpp b/src/mongo/db/storage/storage_engine_metadata.cpp
index 1f1c061f4fa..8892baf1a1b 100644
--- a/src/mongo/db/storage/storage_engine_metadata.cpp
+++ b/src/mongo/db/storage/storage_engine_metadata.cpp
@@ -40,9 +40,15 @@
#include <ostream>
#include <vector>
+#ifdef __linux__ // Only needed by flushDirectory for Linux
+#include <boost/filesystem/path.hpp>
+#include <fcntl.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#endif
+
#include "mongo/db/bson/dotted_path_support.h"
#include "mongo/db/jsobj.h"
-#include "mongo/db/storage/mmap_v1/paths.h"
#include "mongo/util/assert_util.h"
#include "mongo/util/file.h"
#include "mongo/util/log.h"
@@ -56,15 +62,6 @@ namespace {
const std::string kMetadataBasename = "storage.bson";
-/**
- * Returns true if local.ns is found in 'directory' or 'directory'/local/.
- */
-bool containsMMapV1LocalNsFile(const std::string& directory) {
- boost::filesystem::path directoryPath(directory);
- return boost::filesystem::exists(directoryPath / "local.ns") ||
- boost::filesystem::exists((directoryPath / "local") / "local.ns");
-}
-
bool fsyncFile(boost::filesystem::path path) {
invariant(path.has_filename());
File file;
@@ -99,11 +96,6 @@ boost::optional<std::string> StorageEngineMetadata::getStorageEngineForPath(
return {metadata->getStorageEngine()};
}
- // Fallback to checking for MMAPv1-specific files to handle upgrades from before the
- // storage.bson metadata file was introduced in 3.0.
- if (containsMMapV1LocalNsFile(dbpath)) {
- return {std::string("mmapv1")};
- }
return {};
}
@@ -221,6 +213,51 @@ Status StorageEngineMetadata::read() {
return Status::OK();
}
+void flushMyDirectory(const boost::filesystem::path& file) {
+#ifdef __linux__ // this isn't needed elsewhere
+ static bool _warnedAboutFilesystem = false;
+ // if called without a fully qualified path it asserts; that makes mongoperf fail.
+ // so make a warning. need a better solution longer term.
+ // massert(13652, str::stream() << "Couldn't find parent dir for file: " << file.string(),);
+ if (!file.has_branch_path()) {
+ log() << "warning flushMyDirectory couldn't find parent dir for file: " << file.string();
+ return;
+ }
+
+
+ boost::filesystem::path dir = file.branch_path(); // parent_path in new boosts
+
+ LOG(1) << "flushing directory " << dir.string();
+
+ int fd = ::open(dir.string().c_str(), O_RDONLY); // DO NOT THROW OR ASSERT BEFORE CLOSING
+ massert(13650,
+ str::stream() << "Couldn't open directory '" << dir.string() << "' for flushing: "
+ << errnoWithDescription(),
+ fd >= 0);
+ if (fsync(fd) != 0) {
+ int e = errno;
+ if (e == EINVAL) { // indicates filesystem does not support synchronization
+ if (!_warnedAboutFilesystem) {
+ log() << "\tWARNING: This file system is not supported. For further information"
+ << " see:" << startupWarningsLog;
+ log() << "\t\t\thttp://dochub.mongodb.org/core/unsupported-filesystems"
+ << startupWarningsLog;
+ log() << "\t\tPlease notify MongoDB, Inc. if an unlisted filesystem generated "
+ << "this warning." << startupWarningsLog;
+ _warnedAboutFilesystem = true;
+ }
+ } else {
+ close(fd);
+ massert(13651,
+ str::stream() << "Couldn't fsync directory '" << dir.string() << "': "
+ << errnoWithDescription(e),
+ false);
+ }
+ }
+ close(fd);
+#endif
+}
+
Status StorageEngineMetadata::write() const {
if (_storageEngine.empty()) {
return Status(ErrorCodes::BadValue,
diff --git a/src/mongo/db/storage/storage_options.h b/src/mongo/db/storage/storage_options.h
index 57c58ea747a..144dd1eb77b 100644
--- a/src/mongo/db/storage/storage_options.h
+++ b/src/mongo/db/storage/storage_options.h
@@ -74,12 +74,6 @@ struct StorageGlobalParams {
// running the repairDatabase database command on all databases.
bool repair;
- // --repairpath
- // Specifies the root directory containing MongoDB data files to use for the --repair
- // operation.
- // Default: A _tmp directory within the path specified by the dbPath option.
- std::string repairpath;
-
bool dur; // --dur durability (now --journal)
// --journalCommitInterval
diff --git a/src/mongo/db/storage/wiredtiger/wiredtiger_recovery_unit.cpp b/src/mongo/db/storage/wiredtiger/wiredtiger_recovery_unit.cpp
index b188b8de6a1..dd569219ab5 100644
--- a/src/mongo/db/storage/wiredtiger/wiredtiger_recovery_unit.cpp
+++ b/src/mongo/db/storage/wiredtiger/wiredtiger_recovery_unit.cpp
@@ -236,11 +236,6 @@ void WiredTigerRecoveryUnit::preallocateSnapshot() {
getSession();
}
-void* WiredTigerRecoveryUnit::writingPtr(void* data, size_t len) {
- // This API should not be used for anything other than the MMAP V1 storage engine
- MONGO_UNREACHABLE;
-}
-
void WiredTigerRecoveryUnit::_txnClose(bool commit) {
invariant(_active);
WT_SESSION* s = _session->getSession();
diff --git a/src/mongo/db/storage/wiredtiger/wiredtiger_recovery_unit.h b/src/mongo/db/storage/wiredtiger/wiredtiger_recovery_unit.h
index 8bae90f0368..6eda5ef4588 100644
--- a/src/mongo/db/storage/wiredtiger/wiredtiger_recovery_unit.h
+++ b/src/mongo/db/storage/wiredtiger/wiredtiger_recovery_unit.h
@@ -102,10 +102,6 @@ public:
ReadSource getTimestampReadSource() const override;
- void* writingPtr(void* data, size_t len) override;
-
- void setRollbackWritesDisabled() override {}
-
virtual void setOrderedCommit(bool orderedCommit) override {
_orderedCommit = orderedCommit;
}
diff --git a/src/mongo/dbtests/SConscript b/src/mongo/dbtests/SConscript
index 790e17896c2..34d16d7dfea 100644
--- a/src/mongo/dbtests/SConscript
+++ b/src/mongo/dbtests/SConscript
@@ -11,7 +11,6 @@ env.Library(
"framework_options.cpp",
],
LIBDEPS=[
- '$BUILD_DIR/mongo/db/storage/mmap_v1/storage_mmapv1',
'$BUILD_DIR/mongo/db/storage/storage_options',
'$BUILD_DIR/mongo/util/options_parser/options_parser_init',
'$BUILD_DIR/mongo/unittest/unittest',
@@ -75,11 +74,9 @@ dbtest = env.Program(
'jstests.cpp',
'logical_sessions_tests.cpp',
'matchertests.cpp',
- 'mmaptests.cpp',
'mock_dbclient_conn_test.cpp',
'mock_replica_set_test.cpp',
'multikey_paths_test.cpp',
- 'namespacetests.cpp',
'oplogstarttests.cpp',
'pdfiletests.cpp',
'plan_ranking.cpp',
@@ -138,7 +135,6 @@ dbtest = env.Program(
"$BUILD_DIR/mongo/db/repl/storage_interface_impl",
"$BUILD_DIR/mongo/db/serveronly",
"$BUILD_DIR/mongo/db/sessions_collection_standalone",
- "$BUILD_DIR/mongo/db/storage/mmap_v1/paths",
"$BUILD_DIR/mongo/db/storage/kv/kv_engine_core",
"$BUILD_DIR/mongo/transport/transport_layer_manager",
"$BUILD_DIR/mongo/util/clock_source_mock",
diff --git a/src/mongo/dbtests/basictests.cpp b/src/mongo/dbtests/basictests.cpp
index 541d2c776ba..4e9832639e9 100644
--- a/src/mongo/dbtests/basictests.cpp
+++ b/src/mongo/dbtests/basictests.cpp
@@ -34,8 +34,6 @@
#include <iostream>
#include "mongo/db/client.h"
-#include "mongo/db/storage/mmap_v1/compress.h"
-#include "mongo/db/storage/mmap_v1/paths.h"
#include "mongo/dbtests/dbtests.h"
#include "mongo/util/base64.h"
#include "mongo/util/queue.h"
@@ -382,36 +380,6 @@ public:
}
};
-class RelativePathTest {
-public:
- void run() {
- RelativePath a = RelativePath::fromRelativePath("a");
- RelativePath b = RelativePath::fromRelativePath("a");
- RelativePath c = RelativePath::fromRelativePath("b");
- RelativePath d = RelativePath::fromRelativePath("a/b");
-
-
- ASSERT(a == b);
- ASSERT(a != c);
- ASSERT(a != d);
- ASSERT(c != d);
- }
-};
-
-struct CompressionTest1 {
- void run() {
- const char* c = "this is a test";
- std::string s;
- size_t len = compress(c, strlen(c) + 1, &s);
- verify(len > 0);
-
- std::string out;
- bool ok = uncompress(s.c_str(), s.size(), &out);
- verify(ok);
- verify(strcmp(out.c_str(), c) == 0);
- }
-} ctest1;
-
class All : public Suite {
public:
All() : Suite("basic") {}
@@ -436,9 +404,6 @@ public:
add<StrTests>();
add<HostAndPortTests>();
- add<RelativePathTest>();
-
- add<CompressionTest1>();
}
};
diff --git a/src/mongo/dbtests/framework_options.cpp b/src/mongo/dbtests/framework_options.cpp
index 192760718d4..d9929c5842c 100644
--- a/src/mongo/dbtests/framework_options.cpp
+++ b/src/mongo/dbtests/framework_options.cpp
@@ -39,7 +39,6 @@
#include "mongo/base/status.h"
#include "mongo/bson/util/builder.h"
#include "mongo/db/query/find.h"
-#include "mongo/db/storage/mmap_v1/mmap_v1_options.h"
#include "mongo/db/storage/storage_options.h"
#include "mongo/dbtests/dbtests.h"
#include "mongo/unittest/unittest.h"
@@ -152,18 +151,6 @@ Status storeTestFrameworkOptions(const moe::Environment& params,
frameworkGlobalParams.perfHist = params["perfHist"].as<unsigned>();
}
- bool nodur = false;
- if (params.count("nodur")) {
- nodur = true;
- storageGlobalParams.dur = false;
- }
- if (params.count("dur") || storageGlobalParams.dur) {
- storageGlobalParams.dur = true;
- }
-
- if (params.count("nopreallocj")) {
- mmapv1GlobalOptions.preallocj = false;
- }
if (params.count("debug") || params.count("verbose")) {
logger::globalLogDomain()->setMinimumLoggedSeverity(logger::LogSeverity::Debug(1));
@@ -194,32 +181,11 @@ Status storeTestFrameworkOptions(const moe::Environment& params,
return Status(ErrorCodes::BadValue, sb.str());
}
+ DEV log() << "DEBUG build" << endl;
+
string dbpathString = p.string();
storageGlobalParams.dbpath = dbpathString.c_str();
- mmapv1GlobalOptions.prealloc = false;
-
- // dbtest defaults to smallfiles
- mmapv1GlobalOptions.smallfiles = true;
- if (params.count("bigfiles")) {
- storageGlobalParams.dur = true;
- }
-
- DEV log() << "DEBUG build" << endl;
- if (sizeof(void*) == 4)
- log() << "32bit" << endl;
- log() << "random seed: " << frameworkGlobalParams.seed << endl;
-
- if (time(0) % 3 == 0 && !nodur) {
- if (!storageGlobalParams.dur) {
- storageGlobalParams.dur = true;
- log() << "****************" << endl;
- log() << "running with journaling enabled to test that. dbtests will do this "
- << "occasionally even if --dur is not specified." << endl;
- log() << "****************" << endl;
- }
- }
-
storageGlobalParams.engine = params["storage.engine"].as<string>();
if (params.count("suites")) {
@@ -231,13 +197,6 @@ Status storeTestFrameworkOptions(const moe::Environment& params,
frameworkGlobalParams.filter = params["filter"].as<string>();
}
- if (kDebugBuild && storageGlobalParams.dur) {
- log() << "Debug Build: automatically enabling mmapv1GlobalOptions.journalOptions=8 "
- << "(JournalParanoid)" << endl;
- // this was commented out. why too slow or something?
- mmapv1GlobalOptions.journalOptions |= MMAPV1Options::JournalParanoid;
- }
-
return Status::OK();
}
}
diff --git a/src/mongo/dbtests/jsobjtests.cpp b/src/mongo/dbtests/jsobjtests.cpp
index b670cab1dae..7dca7745a97 100644
--- a/src/mongo/dbtests/jsobjtests.cpp
+++ b/src/mongo/dbtests/jsobjtests.cpp
@@ -43,7 +43,6 @@
#include "mongo/db/bson/dotted_path_support.h"
#include "mongo/db/jsobj.h"
#include "mongo/db/json.h"
-#include "mongo/db/storage/mmap_v1/btree/key.h"
#include "mongo/dbtests/dbtests.h"
#include "mongo/platform/decimal128.h"
#include "mongo/util/allocator.h"
@@ -168,63 +167,6 @@ FieldCompareResult compareDottedFieldNames(const string& l, const string& r, con
namespace JsobjTests {
-void keyTest(const BSONObj& o, bool mustBeCompact = false) {
- static KeyV1Owned* kLast;
- static BSONObj last;
-
- KeyV1Owned* key = new KeyV1Owned(o);
- KeyV1Owned& k = *key;
-
- ASSERT(!mustBeCompact || k.isCompactFormat());
-
- BSONObj x = k.toBson();
- int res = o.woCompare(x, BSONObj(), /*considerfieldname*/ false);
- if (res) {
- cout << o.toString() << endl;
- k.toBson();
- cout << x.toString() << endl;
- o.woCompare(x, BSONObj(), /*considerfieldname*/ false);
- ASSERT(res == 0);
- }
- ASSERT(k.woEqual(k));
- ASSERT(!k.isCompactFormat() || k.dataSize() < o.objsize());
-
- {
- int res = o.woCompare(last);
- ASSERT((res == 0) == SimpleBSONObjComparator::kInstance.evaluate(o == last));
- }
-
- if (kLast) {
- int r1 = o.woCompare(last, BSONObj(), false);
- int r2 = k.woCompare(*kLast, Ordering::make(BSONObj()));
- bool ok = (r1 < 0 && r2 < 0) || (r1 > 0 && r2 > 0) || r1 == r2;
- if (!ok) {
- cout << "r1r2 " << r1 << ' ' << r2 << endl;
- cout << "o:" << o.toString() << endl;
- cout << "last:" << last.toString() << endl;
- cout << "k:" << k.toString() << endl;
- cout << "kLast:" << kLast->toString() << endl;
- int r3 = k.woCompare(*kLast, Ordering::make(BSONObj()));
- cout << r3 << endl;
- }
- ASSERT(ok);
- if (k.isCompactFormat() && kLast->isCompactFormat()) {
- // only check if not bson as bson woEqual is broken! (or was may2011)
- if (k.woEqual(*kLast) != (r2 == 0)) { // check woEqual matches
- cout << r2 << endl;
- cout << k.toString() << endl;
- cout << kLast->toString() << endl;
- k.woEqual(*kLast);
- ASSERT(false);
- }
- }
- }
-
- delete kLast;
- kLast = key;
- last = o.getOwned();
-}
-
class BufBuilderBasic {
public:
void run() {
@@ -481,31 +423,10 @@ public:
key) < 0);
{
- // test a big key
- string x(2000, 'z');
- BSONObj o = BSON("q" << x);
- keyTest(o, false);
- }
- {
- string y(200, 'w');
- BSONObjBuilder b;
- for (int i = 0; i < 10; i++) {
- b.append("x", y);
- }
- keyTest(b.obj(), true);
- }
- {
- double nan = numeric_limits<double>::quiet_NaN();
- BSONObj o = BSON("y" << nan);
- keyTest(o);
- }
-
- {
BSONObjBuilder b;
b.append("", "c");
b.appendNull("");
BSONObj o = b.obj();
- keyTest(o);
ASSERT(dps::compareObjectsAccordingToSort(o,
BSON(""
<< "b"
@@ -557,13 +478,6 @@ public:
ASSERT(BSON("a" << nan).woCompare(BSON("a" << 5000000000LL)) < 0);
- {
- KeyV1Owned a(BSON("a" << nan));
- KeyV1Owned b(BSON("a" << 1));
- Ordering o = Ordering::make(BSON("a" << 1));
- ASSERT(a.woCompare(b, o) < 0);
- }
-
ASSERT(BSON("a" << 1).woCompare(BSON("a" << nan)) > 0);
ASSERT(BSON("a" << nan2).woCompare(BSON("a" << nan2)) == 0);
@@ -644,41 +558,6 @@ struct AppendIntOrLL {
void run() {
const long long billion = 1000 * 1000 * 1000;
- long long n = 0x3333111122224444LL;
- {
- double d = (double)n;
- BSONObj a = BSON("x" << n);
- BSONObj b = BSON("x" << d);
-
- long long back = (long long)d;
- // 3719
- ////// int res = a.woCompare(b);
-
- ASSERT(n > back);
- // ASSERT( res > 0 ); // SERVER-3719
-
- keyTest(a, false);
-
- KeyV1Owned A(a);
- KeyV1Owned B(b);
- // 3719
- ////// int res2 = A.woCompare(B, Ordering::make(BSONObj()));
- // ASSERT( res2 > 0 ); // SERVER-3719
-
- // fixing requires an index v# change.
-
- cout << "todo fix SERVER-3719 and uncomment test in AppendIntOrLL" << endl;
-
- n++;
- }
-
- {
- BSONObjBuilder b;
- b.appendIntOrLL("L4", -4 * billion);
- keyTest(b.obj());
- keyTest(BSON("" << billion));
- }
-
BSONObjBuilder b;
b.appendIntOrLL("i1", 1);
b.appendIntOrLL("i2", -1);
@@ -693,7 +572,6 @@ struct AppendIntOrLL {
b.appendIntOrLL("L6", -16 * billion);
BSONObj o = b.obj();
- keyTest(o);
ASSERT(o["i1"].type() == NumberInt);
ASSERT(o["i1"].number() == 1);
@@ -730,7 +608,6 @@ struct AppendNumber {
b.appendNumber("f", mongo::Decimal128("1"));
BSONObj o = b.obj();
- keyTest(o);
ASSERT(o["a"].type() == NumberInt);
ASSERT(o["b"].type() == NumberDouble);
@@ -741,161 +618,6 @@ struct AppendNumber {
}
};
-class ToStringArray {
-public:
- void run() {
- string spec = "{ a: [ \"a\", \"b\" ] }";
- ASSERT_EQUALS(spec, fromjson(spec).toString());
-
- BSONObj x = BSON("a"
- << "astring"
- << "b"
- << "str");
- keyTest(x);
- keyTest(x);
- BSONObj y = BSON("a"
- << "astring"
- << "b"
- << "stra");
- keyTest(y);
- y = BSON("a"
- << "");
- keyTest(y);
-
- keyTest(BSON("abc" << true));
- keyTest(BSON("abc" << false));
- keyTest(BSON("abc" << false << "b" << true));
-
- Date_t now = jsTime();
- keyTest(BSON("" << now << "" << 3 << "" << jstNULL << "" << true));
- keyTest(BSON("" << now << "" << 3 << "" << BSONObj() << "" << true));
-
- {{// check signed dates with new key format
- KeyV1Owned a(BSONObjBuilder().appendDate("", Date_t::fromMillisSinceEpoch(-50)).obj());
- KeyV1Owned b(BSONObjBuilder().appendDate("", Date_t::fromMillisSinceEpoch(50)).obj());
- ASSERT(a.woCompare(b, Ordering::make(BSONObj())) < 0);
- }
- {
- // backward compatibility
- KeyBson a(BSONObjBuilder().appendDate("", Date_t::fromMillisSinceEpoch(-50)).obj());
- KeyBson b(BSONObjBuilder().appendDate("", Date_t::fromMillisSinceEpoch(50)).obj());
- ASSERT(a.woCompare(b, Ordering::make(BSONObj())) > 0);
- }
- {
- // this is an uncompactable key:
- BSONObj uc1 = BSONObjBuilder()
- .appendDate("", Date_t::fromMillisSinceEpoch(-50))
- .appendCode("", "abc")
- .obj();
- BSONObj uc2 = BSONObjBuilder()
- .appendDate("", Date_t::fromMillisSinceEpoch(55))
- .appendCode("", "abc")
- .obj();
- ASSERT(uc1.woCompare(uc2, Ordering::make(BSONObj())) < 0);
- {
- KeyV1Owned a(uc1);
- KeyV1Owned b(uc2);
- ASSERT(!a.isCompactFormat());
- ASSERT(a.woCompare(b, Ordering::make(BSONObj())) < 0);
- }
- {
- KeyBson a(uc1);
- KeyBson b(uc2);
- ASSERT(!a.isCompactFormat());
- ASSERT(a.woCompare(b, Ordering::make(BSONObj())) > 0);
- }
- }
-}
-
-{
- BSONObjBuilder b;
- b.appendBinData("f", 8, (BinDataType)1, "aaaabbbb");
- b.appendBinData("e", 3, (BinDataType)1, "aaa");
- b.appendBinData("b", 1, (BinDataType)1, "x");
- BSONObj o = b.obj();
- keyTest(o, true);
-}
-
-{
- // check (non)equality
- BSONObj a = BSONObjBuilder().appendBinData("", 8, (BinDataType)1, "abcdefgh").obj();
- BSONObj b = BSONObjBuilder().appendBinData("", 8, (BinDataType)1, "abcdefgj").obj();
- ASSERT_BSONOBJ_NE(a, b);
- int res_ab = a.woCompare(b);
- ASSERT(res_ab != 0);
- keyTest(a, true);
- keyTest(b, true);
-
- // check subtypes do not equal
- BSONObj c = BSONObjBuilder().appendBinData("", 8, (BinDataType)4, "abcdefgh").obj();
- BSONObj d = BSONObjBuilder().appendBinData("", 8, (BinDataType)0x81, "abcdefgh").obj();
- ASSERT_BSONOBJ_NE(a, c);
- int res_ac = a.woCompare(c);
- ASSERT(res_ac != 0);
- keyTest(c, true);
- ASSERT_BSONOBJ_NE(a, d);
- int res_ad = a.woCompare(d);
- ASSERT(res_ad != 0);
- keyTest(d, true);
-
- KeyV1Owned A(a);
- KeyV1Owned B(b);
- KeyV1Owned C(c);
- KeyV1Owned D(d);
- ASSERT(!A.woEqual(B));
- ASSERT(A.woCompare(B, Ordering::make(BSONObj())) < 0 && res_ab < 0);
- ASSERT(!A.woEqual(C));
- ASSERT(A.woCompare(C, Ordering::make(BSONObj())) < 0 && res_ac < 0);
- ASSERT(!A.woEqual(D));
- ASSERT(A.woCompare(D, Ordering::make(BSONObj())) < 0 && res_ad < 0);
-}
-
-{
- BSONObjBuilder b;
- b.appendBinData("f", 33, (BinDataType)1, "123456789012345678901234567890123");
- BSONObj o = b.obj();
- keyTest(o, false);
-}
-
-{
- for (int i = 1; i <= 3; i++) {
- for (int j = 1; j <= 3; j++) {
- BSONObjBuilder b;
- b.appendBinData("f", i, (BinDataType)j, "abc");
- BSONObj o = b.obj();
- keyTest(o, j != ByteArrayDeprecated);
- }
- }
-}
-
-{
- BSONObjBuilder b;
- b.appendBinData("f", 1, (BinDataType)133, "a");
- BSONObj o = b.obj();
- keyTest(o, true);
-}
-
-{
- BSONObjBuilder b;
- b.append("AA", 3);
- b.appendBinData("f", 0, (BinDataType)0, "");
- b.appendBinData("e", 3, (BinDataType)7, "aaa");
- b.appendBinData("b", 1, (BinDataType)128, "x");
- b.append("z", 3);
- b.appendBinData("bb", 0, (BinDataType)129, "x");
- BSONObj o = b.obj();
- keyTest(o, true);
-}
-
-{
- // 9 is not supported in compact format. so test a non-compact case here.
- BSONObjBuilder b;
- b.appendBinData("f", 9, (BinDataType)0, "aaaabbbbc");
- BSONObj o = b.obj();
- keyTest(o);
-}
-}
-};
class ToStringNumber {
public:
@@ -915,7 +637,6 @@ public:
b.append("i", -0.0);
BSONObj x = b.obj();
- keyTest(x);
ASSERT_EQUALS("4", x["a"].toString(false, true));
ASSERT_EQUALS("5.0", x["b"].toString(false, true));
@@ -973,7 +694,6 @@ public:
b.append("b", z);
b.appendAs(b.asTempObj()["a"], "c");
BSONObj o = b.obj();
- keyTest(o);
stringstream ss;
ss << 'a' << '\0' << 'b';
@@ -1352,8 +1072,6 @@ public:
b.appendOID("b", 0, false);
b.appendOID("c", 0, true);
BSONObj o = b.obj();
- keyTest(o);
-
ASSERT(o["a"].__oid().toString() == "000000000000000000000000");
ASSERT(o["b"].__oid().toString() == "000000000000000000000000");
ASSERT(o["c"].__oid().toString() != "000000000000000000000000");
@@ -1951,8 +1669,6 @@ struct BSONArrayBuilderTest {
BSONObj o = BSON("obj" << obj << "arr" << arr << "arr2" << BSONArray(obj) << "regex"
<< BSONRegEx("reg", "x"));
- keyTest(o);
-
ASSERT_EQUALS(o["obj"].type(), Object);
ASSERT_EQUALS(o["arr"].type(), Array);
ASSERT_EQUALS(o["arr2"].type(), Array);
@@ -2209,8 +1925,6 @@ public:
void run() {
BSONObj x = BSON("a" << BSON("b" << 1));
BSONObj y = BSON("a" << BSON("b" << 1.0));
- keyTest(x);
- keyTest(y);
ASSERT_BSONOBJ_EQ(x, y);
ASSERT_EQUALS(0, x.woCompare(y));
}
@@ -2325,7 +2039,6 @@ public:
add<BSONObjTests::AsTempObj>();
add<BSONObjTests::AppendIntOrLL>();
add<BSONObjTests::AppendNumber>();
- add<BSONObjTests::ToStringArray>();
add<BSONObjTests::ToStringNumber>();
add<BSONObjTests::AppendAs>();
add<BSONObjTests::ToStringRecursionDepth>();
diff --git a/src/mongo/dbtests/mmaptests.cpp b/src/mongo/dbtests/mmaptests.cpp
deleted file mode 100644
index bec7f072342..00000000000
--- a/src/mongo/dbtests/mmaptests.cpp
+++ /dev/null
@@ -1,312 +0,0 @@
-// @file mmaptests.cpp
-
-/**
- * Copyright (C) 2008 10gen Inc.
- *
- * This program is free software: you can redistribute it and/or modify
- * it under the terms of the GNU Affero General Public License, version 3,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU Affero General Public License for more details.
- *
- * You should have received a copy of the GNU Affero General Public License
- * along with this program. If not, see <http://www.gnu.org/licenses/>.
- *
- * As a special exception, the copyright holders give permission to link the
- * code of portions of this program with the OpenSSL library under certain
- * conditions as described in each individual source file and distribute
- * linked combinations including the program with the OpenSSL library. You
- * must comply with the GNU Affero General Public License in all respects
- * for all of the code used other than as permitted herein. If you modify
- * file(s) with this exception, you may extend this exception to your
- * version of the file(s), but you are not obligated to do so. If you do not
- * wish to do so, delete this exception statement from your version. If you
- * delete this exception statement from all source files in the program,
- * then also delete it in the license file.
- */
-
-#include "mongo/platform/basic.h"
-
-#include <boost/filesystem/operations.hpp>
-#include <iostream>
-
-#include "mongo/db/concurrency/d_concurrency.h"
-#include "mongo/db/concurrency/lock_state.h"
-#include "mongo/db/service_context.h"
-#include "mongo/db/storage/mmap_v1/data_file.h"
-#include "mongo/db/storage/mmap_v1/durable_mapped_file.h"
-#include "mongo/db/storage/mmap_v1/extent.h"
-#include "mongo/db/storage/mmap_v1/extent_manager.h"
-#include "mongo/db/storage/mmap_v1/mmap_v1_extent_manager.h"
-#include "mongo/db/storage/mmap_v1/mmap_v1_options.h"
-#include "mongo/db/storage/storage_options.h"
-#include "mongo/dbtests/dbtests.h"
-#include "mongo/util/scopeguard.h"
-#include "mongo/util/timer.h"
-
-namespace MMapTests {
-
-using std::endl;
-using std::string;
-
-class LeakTest {
- const string fn;
- const int optOld;
-
-public:
- LeakTest()
- : fn((boost::filesystem::path(storageGlobalParams.dbpath) / "testfile.map").string()),
- optOld(mmapv1GlobalOptions.journalOptions) {
- mmapv1GlobalOptions.journalOptions = 0; // DurParanoid doesn't make sense with this test
- }
- ~LeakTest() {
- mmapv1GlobalOptions.journalOptions = optOld;
- try {
- boost::filesystem::remove(fn);
- } catch (...) {
- }
- }
- void run() {
- try {
- boost::filesystem::remove(fn);
- } catch (...) {
- }
-
- auto opCtx = cc().makeOperationContext();
- Lock::GlobalWrite lk(opCtx.get());
-
- {
- DurableMappedFile f(opCtx.get());
- ON_BLOCK_EXIT([&f, &opCtx] {
- LockMongoFilesExclusive lock(opCtx.get());
- f.close(opCtx.get());
- });
- unsigned long long len = 256 * 1024 * 1024;
- verify(f.create(opCtx.get(), fn, len));
- {
- char* p = (char*)f.getView();
- verify(p);
- // write something to the private view as a test
- if (storageGlobalParams.dur)
- privateViews.makeWritable(p, 6);
- strcpy(p, "hello");
- }
- if (storageGlobalParams.dur) {
- char* w = (char*)f.view_write();
- strcpy(w + 6, "world");
- }
- MongoFileFinder ff(opCtx.get());
- ASSERT(ff.findByPath(fn));
- ASSERT(ff.findByPath("asdf") == 0);
- }
- {
- MongoFileFinder ff(opCtx.get());
- ASSERT(ff.findByPath(fn) == 0);
- }
-
- int N = 10000;
-#if !defined(_WIN32) && !defined(__linux__)
- // seems this test is slow on OS X.
- N = 100;
-#endif
-
- // we make a lot here -- if we were leaking, presumably it would fail doing this many.
- Timer t;
- for (int i = 0; i < N; i++) {
- // Every 4 iterations we pass the sequential hint.
- DurableMappedFile f{opCtx.get(),
- i % 4 == 1 ? MongoFile::Options::SEQUENTIAL
- : MongoFile::Options::NONE};
- ON_BLOCK_EXIT([&f, &opCtx] {
- LockMongoFilesExclusive lock(opCtx.get());
- f.close(opCtx.get());
- });
- verify(f.open(opCtx.get(), fn));
- {
- char* p = (char*)f.getView();
- verify(p);
- if (storageGlobalParams.dur)
- privateViews.makeWritable(p, 4);
- strcpy(p, "zzz");
- }
- if (storageGlobalParams.dur) {
- char* w = (char*)f.view_write();
- if (i % 2 == 0)
- ++(*w);
- verify(w[6] == 'w');
- }
- }
- if (t.millis() > 10000) {
- mongo::unittest::log() << "warning: MMap LeakTest is unusually slow N:" << N << ' '
- << t.millis() << "ms" << endl;
- }
- }
-};
-
-class ExtentSizing {
-public:
- void run() {
- MmapV1ExtentManager em("x", "x", false);
-
- ASSERT_EQUALS(em.maxSize(), em.quantizeExtentSize(em.maxSize()));
-
- // test that no matter what we start with, we always get to max extent size
- for (int obj = 16; obj < BSONObjMaxUserSize; obj += 111) {
- int sz = em.initialSize(obj);
-
- double totalExtentSize = sz;
-
- int numFiles = 1;
- int sizeLeftInExtent = em.maxSize() - 1;
-
- for (int i = 0; i < 100; i++) {
- sz = em.followupSize(obj, sz);
- ASSERT(sz >= obj);
- ASSERT(sz >= em.minSize());
- ASSERT(sz <= em.maxSize());
- ASSERT(sz <= em.maxSize());
-
- totalExtentSize += sz;
-
- if (sz < sizeLeftInExtent) {
- sizeLeftInExtent -= sz;
- } else {
- numFiles++;
- sizeLeftInExtent = em.maxSize() - sz;
- }
- }
- ASSERT_EQUALS(em.maxSize(), sz);
-
- double allocatedOnDisk = (double)numFiles * em.maxSize();
-
- ASSERT((totalExtentSize / allocatedOnDisk) > .95);
-
- invariant(em.numFiles() == 0);
- }
- }
-};
-
-class All : public Suite {
-public:
- All() : Suite("mmap") {}
- void setupTests() {
- if (!getGlobalServiceContext()->getStorageEngine()->isMmapV1())
- return;
-
- add<LeakTest>();
- add<ExtentSizing>();
- }
-};
-
-SuiteInstance<All> myall;
-
-#if 0
-
- class CopyOnWriteSpeedTest {
- public:
- void run() {
-
- string fn = "/tmp/testfile.map";
- boost::filesystem::remove(fn);
-
- MemoryMappedFile f;
- char *p = (char *) f.create(fn, 1024 * 1024 * 1024, true);
- verify(p);
- strcpy(p, "hello");
-
- {
- void *x = f.testGetCopyOnWriteView();
- Timer tt;
- for( int i = 11; i < 1000000000; i++ )
- p[i] = 'z';
- cout << "fill 1GB time: " << tt.millis() << "ms" << endl;
- f.testCloseCopyOnWriteView(x);
- }
-
- /* test a lot of view/unviews */
- {
- Timer t;
-
- char *q;
- for( int i = 0; i < 1000; i++ ) {
- q = (char *) f.testGetCopyOnWriteView();
- verify( q );
- if( i == 999 ) {
- strcpy(q+2, "there");
- }
- f.testCloseCopyOnWriteView(q);
- }
-
- cout << "view unview: " << t.millis() << "ms" << endl;
- }
-
- f.flush(true);
-
- /* plain old mmaped writes */
- {
- Timer t;
- for( int i = 0; i < 10; i++ ) {
- memset(p+100, 'c', 200 * 1024 * 1024);
- }
- cout << "traditional writes: " << t.millis() << "ms" << endl;
- }
-
- f.flush(true);
-
- /* test doing some writes */
- {
- Timer t;
- char *q = (char *) f.testGetCopyOnWriteView();
- for( int i = 0; i < 10; i++ ) {
- verify( q );
- memset(q+100, 'c', 200 * 1024 * 1024);
- }
- f.testCloseCopyOnWriteView(q);
-
- cout << "inc style some writes: " << t.millis() << "ms" << endl;
- }
-
- /* test doing some writes */
- {
- Timer t;
- for( int i = 0; i < 10; i++ ) {
- char *q = (char *) f.testGetCopyOnWriteView();
- verify( q );
- memset(q+100, 'c', 200 * 1024 * 1024);
- f.testCloseCopyOnWriteView(q);
- }
-
- cout << "some writes: " << t.millis() << "ms" << endl;
- }
-
- /* more granular */
- {
- Timer t;
- for( int i = 0; i < 100; i++ ) {
- char *q = (char *) f.testGetCopyOnWriteView();
- verify( q );
- memset(q+100, 'c', 20 * 1024 * 1024);
- f.testCloseCopyOnWriteView(q);
- }
-
- cout << "more granular some writes: " << t.millis() << "ms" << endl;
- }
-
- p[10] = 0;
- cout << p << endl;
- }
- };
-
- class All : public Suite {
- public:
- All() : Suite( "mmap" ) {}
- void setupTests() {
- add< CopyOnWriteSpeedTest >();
- }
- } myall;
-
-#endif
-} // namespace MMapTests
diff --git a/src/mongo/dbtests/namespacetests.cpp b/src/mongo/dbtests/namespacetests.cpp
deleted file mode 100644
index 2d1144c327c..00000000000
--- a/src/mongo/dbtests/namespacetests.cpp
+++ /dev/null
@@ -1,664 +0,0 @@
-/**
- * Copyright (C) 2008-2014 MongoDB Inc.
- *
- * This program is free software: you can redistribute it and/or modify
- * it under the terms of the GNU Affero General Public License, version 3,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU Affero General Public License for more details.
- *
- * You should have received a copy of the GNU Affero General Public License
- * along with this program. If not, see <http://www.gnu.org/licenses/>.
- *
- * As a special exception, the copyright holders give permission to link the
- * code of portions of this program with the OpenSSL library under certain
- * conditions as described in each individual source file and distribute
- * linked combinations including the program with the OpenSSL library. You
- * must comply with the GNU Affero General Public License in all respects
- * for all of the code used other than as permitted herein. If you modify
- * file(s) with this exception, you may extend this exception to your
- * version of the file(s), but you are not obligated to do so. If you do not
- * wish to do so, delete this exception statement from your version. If you
- * delete this exception statement from all source files in the program,
- * then also delete it in the license file.
- */
-
-#define MONGO_LOG_DEFAULT_COMPONENT ::mongo::logger::LogComponent::kDefault
-
-#include "mongo/platform/basic.h"
-
-#include <string>
-
-#include "mongo/bson/simple_bsonobj_comparator.h"
-#include "mongo/db/catalog/collection.h"
-#include "mongo/db/catalog/database_holder.h"
-#include "mongo/db/client.h"
-#include "mongo/db/db_raii.h"
-#include "mongo/db/index/expression_keys_private.h"
-#include "mongo/db/index_legacy.h"
-#include "mongo/db/index_names.h"
-#include "mongo/db/json.h"
-#include "mongo/db/query/internal_plans.h"
-#include "mongo/db/storage/mmap_v1/catalog/namespace.h"
-#include "mongo/db/storage/mmap_v1/catalog/namespace_details.h"
-#include "mongo/db/storage/mmap_v1/catalog/namespace_details_rsv1_metadata.h"
-#include "mongo/db/storage/mmap_v1/extent.h"
-#include "mongo/db/storage/mmap_v1/extent_manager.h"
-#include "mongo/db/storage/mmap_v1/mmap_v1_extent_manager.h"
-#include "mongo/db/storage/mmap_v1/record_store_v1_capped.h"
-#include "mongo/db/storage/mmap_v1/record_store_v1_simple.h"
-#include "mongo/db/storage/storage_engine.h"
-#include "mongo/dbtests/dbtests.h"
-#include "mongo/util/log.h"
-
-namespace NamespaceTests {
-
-using std::string;
-
-const int MinExtentSize = 4096;
-
-namespace MissingFieldTests {
-
-/** A missing field is represented as null in a btree index. */
-class BtreeIndexMissingField {
-public:
- void run() {
- const ServiceContext::UniqueOperationContext opCtxPtr = cc().makeOperationContext();
- OperationContext& opCtx = *opCtxPtr;
- BSONObj spec(BSON("key" << BSON("a" << 1)));
- ASSERT_EQUALS(jstNULL,
- IndexLegacy::getMissingField(&opCtx, NULL, spec).firstElement().type());
- }
-};
-
-/** A missing field is represented as null in a 2d index. */
-class TwoDIndexMissingField {
-public:
- void run() {
- const ServiceContext::UniqueOperationContext opCtxPtr = cc().makeOperationContext();
- OperationContext& opCtx = *opCtxPtr;
- BSONObj spec(BSON("key" << BSON("a"
- << "2d")));
- ASSERT_EQUALS(jstNULL,
- IndexLegacy::getMissingField(&opCtx, NULL, spec).firstElement().type());
- }
-};
-
-/** A missing field is represented with the hash of null in a hashed index. */
-class HashedIndexMissingField {
-public:
- void run() {
- const ServiceContext::UniqueOperationContext opCtxPtr = cc().makeOperationContext();
- OperationContext& opCtx = *opCtxPtr;
- BSONObj spec(BSON("key" << BSON("a"
- << "hashed")));
- BSONObj nullObj = BSON("a" << BSONNULL);
-
- // Call getKeys on the nullObj.
- BSONObjSet nullFieldKeySet = SimpleBSONObjComparator::kInstance.makeBSONObjSet();
- const CollatorInterface* collator = nullptr;
- ExpressionKeysPrivate::getHashKeys(nullObj, "a", 0, 0, false, collator, &nullFieldKeySet);
- BSONElement nullFieldFromKey = nullFieldKeySet.begin()->firstElement();
-
- ASSERT_EQUALS(ExpressionKeysPrivate::makeSingleHashKey(nullObj.firstElement(), 0, 0),
- nullFieldFromKey.Long());
-
- BSONObj missingField = IndexLegacy::getMissingField(&opCtx, NULL, spec);
- ASSERT_EQUALS(NumberLong, missingField.firstElement().type());
- ASSERT_BSONELT_EQ(nullFieldFromKey, missingField.firstElement());
- }
-};
-
-/**
- * A missing field is represented with the hash of null in a hashed index. This hash value
- * depends on the hash seed.
- */
-class HashedIndexMissingFieldAlternateSeed {
-public:
- void run() {
- const ServiceContext::UniqueOperationContext opCtxPtr = cc().makeOperationContext();
- OperationContext& opCtx = *opCtxPtr;
- BSONObj spec(BSON("key" << BSON("a"
- << "hashed")
- << "seed"
- << 0x5eed));
- BSONObj nullObj = BSON("a" << BSONNULL);
-
- BSONObjSet nullFieldKeySet = SimpleBSONObjComparator::kInstance.makeBSONObjSet();
- const CollatorInterface* collator = nullptr;
- ExpressionKeysPrivate::getHashKeys(
- nullObj, "a", 0x5eed, 0, false, collator, &nullFieldKeySet);
- BSONElement nullFieldFromKey = nullFieldKeySet.begin()->firstElement();
-
- ASSERT_EQUALS(ExpressionKeysPrivate::makeSingleHashKey(nullObj.firstElement(), 0x5eed, 0),
- nullFieldFromKey.Long());
-
- // Ensure that getMissingField recognizes that the seed is different (and returns
- // the right key).
- BSONObj missingField = IndexLegacy::getMissingField(&opCtx, NULL, spec);
- ASSERT_EQUALS(NumberLong, missingField.firstElement().type());
- ASSERT_BSONELT_EQ(nullFieldFromKey, missingField.firstElement());
- }
-};
-
-} // namespace MissingFieldTests
-
-namespace NamespaceDetailsTests {
-#if 0 // SERVER-13640
-
- class Base {
- const char *ns_;
- Lock::GlobalWrite lk;
- OldClientContext _context;
- public:
- Base( const char *ns = "unittests.NamespaceDetailsTests" ) : ns_( ns ) , _context( ns ) {}
- virtual ~Base() {
- const ServiceContext::UniqueOperationContext opCtxPtr = cc().makeOperationContext(); OperationContext& opCtx = *opCtxPtr;
- if ( !nsd() )
- return;
- _context.db()->dropCollection( &opCtx, ns() );
- }
- protected:
- void create() {
- Lock::GlobalWrite lk;
- const ServiceContext::UniqueOperationContext opCtxPtr = cc().makeOperationContext(); OperationContext& opCtx = *opCtxPtr;
-
- CollectionOptions collectionOptions;
- ASSERT_OK(collectionOptions.parse(fromjson(spec()),
- CollectionOptions::parseForCommand));
- ASSERT_OK(userCreateNS(&opCtx, db(), ns(), collectionOptions, false));
- }
- virtual string spec() const = 0;
- int nRecords() const {
- int count = 0;
- const Extent* ext;
- for ( RecordId extLoc = nsd()->firstExtent();
- !extLoc.isNull();
- extLoc = ext->xnext) {
- ext = extentManager()->getExtent(extLoc);
- int fileNo = ext->firstRecord.a();
- if ( fileNo == -1 )
- continue;
- for ( int recOfs = ext->firstRecord.getOfs(); recOfs != RecordId::NullOfs;
- recOfs = recordStore()->recordFor(RecordId(fileNo, recOfs))->nextOfs() ) {
- ++count;
- }
- }
- ASSERT_EQUALS( count, nsd()->numRecords() );
- return count;
- }
- int nExtents() const {
- int count = 0;
- for ( RecordId extLoc = nsd()->firstExtent();
- !extLoc.isNull();
- extLoc = extentManager()->getExtent(extLoc)->xnext ) {
- ++count;
- }
- return count;
- }
- const char *ns() const {
- return ns_;
- }
- const NamespaceDetails *nsd() const {
- Collection* c = collection();
- if ( !c )
- return NULL;
- return c->detailsDeprecated();
- }
- const RecordStore* recordStore() const {
- Collection* c = collection();
- if ( !c )
- return NULL;
- return c->getRecordStore();
- }
- Database* db() const {
- return _context.db();
- }
- const ExtentManager* extentManager() const {
- return db()->getExtentManager();
- }
- Collection* collection() const {
- return db()->getCollection( &opCtx, ns() );
- }
-
- static BSONObj bigObj() {
- BSONObjBuilder b;
- b.appendOID("_id", 0, true);
- string as( 187, 'a' );
- b.append( "a", as );
- return b.obj();
- }
-
- };
-
- class Create : public Base {
- public:
- void run() {
- create();
- ASSERT( nsd() );
- ASSERT_EQUALS( 0, nRecords() );
- ASSERT( nsd()->firstExtent() == nsd()->capExtent() );
- RecordId initial = RecordId();
- initial.setInvalid();
- ASSERT( initial == nsd()->capFirstNewRecord() );
- }
- virtual string spec() const { return "{\"capped\":true,\"size\":512,\"$nExtents\":1}"; }
- };
-
- class SingleAlloc : public Base {
- public:
- void run() {
- const ServiceContext::UniqueOperationContext opCtxPtr = cc().makeOperationContext(); OperationContext& opCtx = *opCtxPtr;
- create();
- BSONObj b = bigObj();
- ASSERT( collection()->insertDocument( &opCtx, b, true ).isOK() );
- ASSERT_EQUALS( 1, nRecords() );
- }
- virtual string spec() const { return "{\"capped\":true,\"size\":512,\"$nExtents\":1}"; }
- };
-
- class Realloc : public Base {
- public:
- void run() {
- const ServiceContext::UniqueOperationContext opCtxPtr = cc().makeOperationContext(); OperationContext& opCtx = *opCtxPtr;
- create();
-
- const int N = 20;
- const int Q = 16; // these constants depend on the size of the bson object, the extent
- // size allocated by the system too
- RecordId l[ N ];
- for ( int i = 0; i < N; ++i ) {
- BSONObj b = bigObj();
- StatusWith<RecordId> status =
- ASSERT( collection()->insertDocument( &opCtx, b, true ).isOK() );
- l[ i ] = status.getValue();
- ASSERT( !l[ i ].isNull() );
- ASSERT( nRecords() <= Q );
- //ASSERT_EQUALS( 1 + i % 2, nRecords() );
- if ( i >= 16 )
- ASSERT( l[ i ] == l[ i - Q] );
- }
- }
- virtual string spec() const { return "{\"capped\":true,\"size\":512,\"$nExtents\":1}"; }
- };
-
- class TwoExtent : public Base {
- public:
- void run() {
- const ServiceContext::UniqueOperationContext opCtxPtr = cc().makeOperationContext(); OperationContext& opCtx = *opCtxPtr;
- create();
- ASSERT_EQUALS( 2, nExtents() );
-
- RecordId l[ 8 ];
- for ( int i = 0; i < 8; ++i ) {
- StatusWith<RecordId> status =
- ASSERT( collection()->insertDocument( &opCtx, bigObj(), true ).isOK() );
- l[ i ] = status.getValue();
- ASSERT( !l[ i ].isNull() );
- //ASSERT_EQUALS( i < 2 ? i + 1 : 3 + i % 2, nRecords() );
- //if ( i > 3 )
- // ASSERT( l[ i ] == l[ i - 4 ] );
- }
- ASSERT( nRecords() == 8 );
-
- // Too big
- BSONObjBuilder bob;
- bob.appendOID( "_id", NULL, true );
- bob.append( "a", string( MinExtentSize + 500, 'a' ) ); // min extent size is now 4096
- BSONObj bigger = bob.done();
- ASSERT( !collection()->insertDocument( &opCtx, bigger, false ).isOK() );
- ASSERT_EQUALS( 0, nRecords() );
- }
- private:
- virtual string spec() const {
- return "{\"capped\":true,\"size\":512,\"$nExtents\":2}";
- }
- };
-
-
- BSONObj docForRecordSize( int size ) {
- BSONObjBuilder b;
- b.append( "_id", 5 );
- b.append( "x", string( size - Record::HeaderSize - 22, 'x' ) );
- BSONObj x = b.obj();
- ASSERT_EQUALS( Record::HeaderSize + x.objsize(), size );
- return x;
- }
-
- /**
- * alloc() does not quantize records in capped collections.
- * NB: this actually tests that the code in Database::createCollection doesn't set
- * PowerOf2Sizes for capped collections.
- */
- class AllocCappedNotQuantized : public Base {
- public:
- void run() {
- const ServiceContext::UniqueOperationContext opCtxPtr = cc().makeOperationContext(); OperationContext& opCtx = *opCtxPtr;
- create();
- ASSERT( nsd()->isCapped() );
- ASSERT( !nsd()->isUserFlagSet( NamespaceDetails::Flag_UsePowerOf2Sizes ) );
-
- StatusWith<RecordId> result =
- collection()->insertDocument( &opCtx, docForRecordSize( 300 ), false );
- ASSERT( result.isOK() );
- Record* record = collection()->getRecordStore()->recordFor( result.getValue() );
- // Check that no quantization is performed.
- ASSERT_EQUALS( 300, record->lengthWithHeaders() );
- }
- virtual string spec() const { return "{capped:true,size:2048}"; }
- };
-
-
- /* test NamespaceDetails::cappedTruncateAfter(const char *ns, RecordId loc)
- */
- class TruncateCapped : public Base {
- virtual string spec() const {
- return "{\"capped\":true,\"size\":512,\"$nExtents\":2}";
- }
- void pass(int p) {
- const ServiceContext::UniqueOperationContext opCtxPtr = cc().makeOperationContext(); OperationContext& opCtx = *opCtxPtr;
- create();
- ASSERT_EQUALS( 2, nExtents() );
-
- BSONObj b = bigObj();
-
- int N = MinExtentSize / b.objsize() * nExtents() + 5;
- int T = N - 4;
-
- RecordId truncAt;
- //RecordId l[ 8 ];
- for ( int i = 0; i < N; ++i ) {
- BSONObj bb = bigObj();
- StatusWith<RecordId> status = collection()->insertDocument( &opCtx, bb, true );
- ASSERT( status.isOK() );
- RecordId a = status.getValue();
- if( T == i )
- truncAt = a;
- ASSERT( !a.isNull() );
- /*ASSERT_EQUALS( i < 2 ? i + 1 : 3 + i % 2, nRecords() );
- if ( i > 3 )
- ASSERT( l[ i ] == l[ i - 4 ] );*/
- }
- ASSERT( nRecords() < N );
-
- RecordId last, first;
- {
- unique_ptr<Runner> runner(InternalPlanner::collectionScan(&opCtx,
- ns(),
- collection(),
- InternalPlanner::BACKWARD));
- runner->getNext(NULL, &last);
- ASSERT( !last.isNull() );
- }
- {
- unique_ptr<Runner> runner(InternalPlanner::collectionScan(&opCtx,
- ns(),
- collection(),
- InternalPlanner::FORWARD));
- runner->getNext(NULL, &first);
- ASSERT( !first.isNull() );
- ASSERT( first != last ) ;
- }
-
- collection()->cappedTruncateAfter(&opCtx, truncAt, false);
- ASSERT_EQUALS( collection()->numRecords() , 28u );
-
- {
- RecordId loc;
- unique_ptr<Runner> runner(InternalPlanner::collectionScan(&opCtx,
- ns(),
- collection(),
- InternalPlanner::FORWARD));
- runner->getNext(NULL, &loc);
- ASSERT( first == loc);
- }
- {
- unique_ptr<Runner> runner(InternalPlanner::collectionScan(&opCtx,
- ns(),
- collection(),
- InternalPlanner::BACKWARD));
- RecordId loc;
- runner->getNext(NULL, &loc);
- ASSERT( last != loc );
- ASSERT( !last.isNull() );
- }
-
- // Too big
- BSONObjBuilder bob;
- bob.appendOID("_id", 0, true);
- bob.append( "a", string( MinExtentSize + 300, 'a' ) );
- BSONObj bigger = bob.done();
- ASSERT( !collection()->insertDocument( &opCtx, bigger, true ).isOK() );
- ASSERT_EQUALS( 0, nRecords() );
- }
- public:
- void run() {
-// log() << "******** NOT RUNNING TruncateCapped test yet ************" << endl;
- pass(0);
- }
- };
-#endif // SERVER-13640
-#if 0 // XXXXXX - once RecordStore is clean, we can put this back
- class Migrate : public Base {
- public:
- void run() {
- create();
- nsd()->deletedListEntry( 2 ) = nsd()->cappedListOfAllDeletedRecords().drec()->
- nextDeleted().drec()->nextDeleted();
- nsd()->cappedListOfAllDeletedRecords().drec()->nextDeleted().drec()->
- nextDeleted().writing() = RecordId();
- nsd()->cappedLastDelRecLastExtent().Null();
- NamespaceDetails *d = nsd();
-
- zero( &d->capExtent() );
- zero( &d->capFirstNewRecord() );
-
- // this has a side effect of called NamespaceDetails::cappedCheckMigrate
- db()->namespaceIndex().details( ns() );
-
- ASSERT( nsd()->firstExtent() == nsd()->capExtent() );
- ASSERT( nsd()->capExtent().getOfs() != 0 );
- ASSERT( !nsd()->capFirstNewRecord().isValid() );
- int nDeleted = 0;
- for ( RecordId i = nsd()->cappedListOfAllDeletedRecords();
- !i.isNull(); i = i.drec()->nextDeleted(), ++nDeleted );
- ASSERT_EQUALS( 10, nDeleted );
- ASSERT( nsd()->cappedLastDelRecLastExtent().isNull() );
- }
- private:
- static void zero( RecordId *d ) {
- memset( d, 0, sizeof( RecordId ) );
- }
- virtual string spec() const {
- return "{\"capped\":true,\"size\":512,\"$nExtents\":10}";
- }
- };
-#endif
-
-// This isn't a particularly useful test, and because it doesn't clean up
-// after itself, /tmp/unittest needs to be cleared after running.
-// class BigCollection : public Base {
-// public:
-// BigCollection() : Base( "NamespaceDetailsTests_BigCollection" ) {}
-// void run() {
-// create();
-// ASSERT_EQUALS( 2, nExtents() );
-// }
-// private:
-// virtual string spec() const {
-// // NOTE 256 added to size in _userCreateNS()
-// long long big = DataFile::maxSize() - DataFileHeader::HeaderSize;
-// stringstream ss;
-// ss << "{\"capped\":true,\"size\":" << big << "}";
-// return ss.str();
-// }
-// };
-
-#if 0 // SERVER-13640
- class SwapIndexEntriesTest : public Base {
- public:
- void run() {
- create();
- NamespaceDetails *nsd = collection()->detailsWritable();
-
- const ServiceContext::UniqueOperationContext opCtxPtr = cc().makeOperationContext(); OperationContext& opCtx = *opCtxPtr;
- // Set 2 & 54 as multikey
- nsd->setIndexIsMultikey(&opCtx, 2, true);
- nsd->setIndexIsMultikey(&opCtx, 54, true);
- ASSERT(nsd->isMultikey(2));
- ASSERT(nsd->isMultikey(54));
-
- // Flip 2 & 47
- nsd->setIndexIsMultikey(&opCtx, 2, false);
- nsd->setIndexIsMultikey(&opCtx, 47, true);
- ASSERT(!nsd->isMultikey(2));
- ASSERT(nsd->isMultikey(47));
-
- // Reset entries that are already true
- nsd->setIndexIsMultikey(&opCtx, 54, true);
- nsd->setIndexIsMultikey(&opCtx, 47, true);
- ASSERT(nsd->isMultikey(54));
- ASSERT(nsd->isMultikey(47));
-
- // Two non-multi-key
- nsd->setIndexIsMultikey(&opCtx, 2, false);
- nsd->setIndexIsMultikey(&opCtx, 43, false);
- ASSERT(!nsd->isMultikey(2));
- ASSERT(nsd->isMultikey(54));
- ASSERT(nsd->isMultikey(47));
- ASSERT(!nsd->isMultikey(43));
- }
- virtual string spec() const { return "{\"capped\":true,\"size\":512,\"$nExtents\":1}"; }
- };
-#endif // SERVER-13640
-} // namespace NamespaceDetailsTests
-
-namespace DatabaseTests {
-
-class RollbackCreateCollection {
-public:
- void run() {
- const string dbName = "rollback_create_collection";
- const string committedName = dbName + ".committed";
- const string rolledBackName = dbName + ".rolled_back";
-
- const ServiceContext::UniqueOperationContext opCtxPtr = cc().makeOperationContext();
- OperationContext& opCtx = *opCtxPtr;
-
- Lock::DBLock lk(&opCtx, dbName, MODE_X);
-
- bool justCreated;
- Database* db = DatabaseHolder::getDatabaseHolder().openDb(&opCtx, dbName, &justCreated);
- ASSERT(justCreated);
-
- Collection* committedColl;
- {
- WriteUnitOfWork wunit(&opCtx);
- ASSERT_FALSE(db->getCollection(&opCtx, committedName));
- committedColl = db->createCollection(&opCtx, committedName);
- ASSERT_EQUALS(db->getCollection(&opCtx, committedName), committedColl);
- wunit.commit();
- }
-
- ASSERT_EQUALS(db->getCollection(&opCtx, committedName), committedColl);
-
- {
- WriteUnitOfWork wunit(&opCtx);
- ASSERT_FALSE(db->getCollection(&opCtx, rolledBackName));
- Collection* rolledBackColl = db->createCollection(&opCtx, rolledBackName);
- ASSERT_EQUALS(db->getCollection(&opCtx, rolledBackName), rolledBackColl);
- // not committing so creation should be rolled back
- }
-
- // The rolledBackCollection creation should have been rolled back
- ASSERT_FALSE(db->getCollection(&opCtx, rolledBackName));
-
- // The committedCollection should not have been affected by the rollback. Holders
- // of the original Collection pointer should still be valid.
- ASSERT_EQUALS(db->getCollection(&opCtx, committedName), committedColl);
- }
-};
-
-class RollbackDropCollection {
-public:
- void run() {
- const string dbName = "rollback_drop_collection";
- const string droppedName = dbName + ".dropped";
- const string rolledBackName = dbName + ".rolled_back";
-
- const ServiceContext::UniqueOperationContext opCtxPtr = cc().makeOperationContext();
- OperationContext& opCtx = *opCtxPtr;
-
- Lock::DBLock lk(&opCtx, dbName, MODE_X);
-
- bool justCreated;
- Database* db = DatabaseHolder::getDatabaseHolder().openDb(&opCtx, dbName, &justCreated);
- ASSERT(justCreated);
-
- {
- WriteUnitOfWork wunit(&opCtx);
- ASSERT_FALSE(db->getCollection(&opCtx, droppedName));
- Collection* droppedColl;
- droppedColl = db->createCollection(&opCtx, droppedName);
- ASSERT_EQUALS(db->getCollection(&opCtx, droppedName), droppedColl);
- db->dropCollection(&opCtx, droppedName).transitional_ignore();
- wunit.commit();
- }
-
- // Should have been really dropped
- ASSERT_FALSE(db->getCollection(&opCtx, droppedName));
-
- {
- WriteUnitOfWork wunit(&opCtx);
- ASSERT_FALSE(db->getCollection(&opCtx, rolledBackName));
- Collection* rolledBackColl = db->createCollection(&opCtx, rolledBackName);
- wunit.commit();
- ASSERT_EQUALS(db->getCollection(&opCtx, rolledBackName), rolledBackColl);
- db->dropCollection(&opCtx, rolledBackName).transitional_ignore();
- // not committing so dropping should be rolled back
- }
-
- // The rolledBackCollection dropping should have been rolled back.
- // Original Collection pointers are no longer valid.
- ASSERT(db->getCollection(&opCtx, rolledBackName));
-
- // The droppedCollection should not have been restored by the rollback.
- ASSERT_FALSE(db->getCollection(&opCtx, droppedName));
- }
-};
-} // namespace DatabaseTests
-
-class All : public Suite {
-public:
- All() : Suite("namespace") {}
-
- void setupTests() {
- add<MissingFieldTests::BtreeIndexMissingField>();
- add<MissingFieldTests::TwoDIndexMissingField>();
- add<MissingFieldTests::HashedIndexMissingField>();
- add<MissingFieldTests::HashedIndexMissingFieldAlternateSeed>();
-
-// add< NamespaceDetailsTests::Create >();
-// add< NamespaceDetailsTests::SingleAlloc >();
-// add< NamespaceDetailsTests::Realloc >();
-// add< NamespaceDetailsTests::AllocCappedNotQuantized >();
-// add< NamespaceDetailsTests::TwoExtent >();
-// add< NamespaceDetailsTests::TruncateCapped >();
-// add< NamespaceDetailsTests::Migrate >();
-// add< NamespaceDetailsTests::SwapIndexEntriesTest >();
-// add< NamespaceDetailsTests::BigCollection >();
-
-#if 0
- // until ROLLBACK_ENABLED
- add< DatabaseTests::RollbackCreateCollection >();
- add< DatabaseTests::RollbackDropCollection >();
-#endif
- }
-};
-
-SuiteInstance<All> myall;
-
-} // namespace NamespaceTests
diff --git a/src/mongo/embedded/embedded.cpp b/src/mongo/embedded/embedded.cpp
index a7c5c6c719d..aa4b4133c5e 100644
--- a/src/mongo/embedded/embedded.cpp
+++ b/src/mongo/embedded/embedded.cpp
@@ -231,12 +231,6 @@ ServiceContext* initialize(const char* yaml_config) {
uassert(50677, ss.str().c_str(), boost::filesystem::exists(storageGlobalParams.dbpath));
}
- {
- std::stringstream ss;
- ss << "repairpath (" << storageGlobalParams.repairpath << ") does not exist";
- uassert(50678, ss.str().c_str(), boost::filesystem::exists(storageGlobalParams.repairpath));
- }
-
if (!storageGlobalParams.readOnly) {
boost::filesystem::remove_all(storageGlobalParams.dbpath + "/_tmp/");
}
diff --git a/src/mongo/embedded/embedded_options.cpp b/src/mongo/embedded/embedded_options.cpp
index 524a751a5ee..b1e8792c346 100644
--- a/src/mongo/embedded/embedded_options.cpp
+++ b/src/mongo/embedded/embedded_options.cpp
@@ -77,11 +77,6 @@ Status addOptions(optionenvironment::OptionSection* options) {
#endif
- storage_options.addOptionChaining("storage.repairPath",
- "repairpath",
- optionenvironment::String,
- "root directory for repair files - defaults to dbpath");
-
options->addSection(general_options).transitional_ignore();
options->addSection(storage_options).transitional_ignore();
@@ -131,23 +126,6 @@ Status storeOptions(const moe::Environment& params) {
}
#endif
- // needs to be after things like --configsvr parsing, thus here.
- if (params.count("storage.repairPath")) {
- storageGlobalParams.repairpath = params["storage.repairPath"].as<string>();
- if (!storageGlobalParams.repairpath.size()) {
- return Status(ErrorCodes::BadValue, "repairpath is empty");
- }
-
- if (storageGlobalParams.dur &&
- !str::startsWith(storageGlobalParams.repairpath, storageGlobalParams.dbpath)) {
- return Status(ErrorCodes::BadValue,
- "You must use a --repairpath that is a subdirectory of --dbpath when "
- "using journaling");
- }
- } else {
- storageGlobalParams.repairpath = storageGlobalParams.dbpath;
- }
-
return Status::OK();
}