summaryrefslogtreecommitdiff
path: root/src/mongo/db
diff options
context:
space:
mode:
authorGeert Bosch <geert@mongodb.com>2015-05-12 15:38:32 -0500
committerGeert Bosch <geert@mongodb.com>2015-05-12 17:01:05 -0400
commit4486be04b169d967683ce705b6288c82cbd34947 (patch)
tree359b7b24b83a8588edc177983f67bc7cf0030851 /src/mongo/db
parentb94f71a5dfa687aef2aa1af83e797683f9ad7476 (diff)
downloadmongo-4486be04b169d967683ce705b6288c82cbd34947.tar.gz
SERVER-18410: Move MMAPv1 specific libraries from util to db/storage/mmap_v1
Also removes some dependencies on MMAP_v1 specific files that are not needed anymore, and moves the 'paths' library to db/storage.
Diffstat (limited to 'src/mongo/db')
-rw-r--r--src/mongo/db/SConscript4
-rw-r--r--src/mongo/db/catalog/database_holder.cpp1
-rw-r--r--src/mongo/db/catalog/index_catalog_entry.cpp1
-rw-r--r--src/mongo/db/prefetch.cpp2
-rw-r--r--src/mongo/db/storage/SConscript11
-rw-r--r--src/mongo/db/storage/mmap_v1/SConscript55
-rw-r--r--src/mongo/db/storage/mmap_v1/compress.cpp59
-rw-r--r--src/mongo/db/storage/mmap_v1/compress.h49
-rw-r--r--src/mongo/db/storage/mmap_v1/data_file.cpp2
-rw-r--r--src/mongo/db/storage/mmap_v1/data_file_sync.cpp2
-rw-r--r--src/mongo/db/storage/mmap_v1/dur_journal.cpp8
-rw-r--r--src/mongo/db/storage/mmap_v1/dur_journalimpl.h2
-rw-r--r--src/mongo/db/storage/mmap_v1/dur_recover.cpp2
-rw-r--r--src/mongo/db/storage/mmap_v1/durable_mapped_file.h4
-rw-r--r--src/mongo/db/storage/mmap_v1/durop.cpp2
-rw-r--r--src/mongo/db/storage/mmap_v1/durop.h2
-rw-r--r--src/mongo/db/storage/mmap_v1/file_allocator.cpp472
-rw-r--r--src/mongo/db/storage/mmap_v1/file_allocator.h109
-rw-r--r--src/mongo/db/storage/mmap_v1/journal_latency_test_cmd.cpp4
-rw-r--r--src/mongo/db/storage/mmap_v1/logfile.cpp270
-rw-r--r--src/mongo/db/storage/mmap_v1/logfile.h77
-rw-r--r--src/mongo/db/storage/mmap_v1/mmap.cpp258
-rw-r--r--src/mongo/db/storage/mmap_v1/mmap.h262
-rw-r--r--src/mongo/db/storage/mmap_v1/mmap_posix.cpp324
-rw-r--r--src/mongo/db/storage/mmap_v1/mmap_v1_engine.cpp4
-rw-r--r--src/mongo/db/storage/mmap_v1/mmap_v1_extent_manager.cpp2
-rw-r--r--src/mongo/db/storage/mmap_v1/mmap_windows.cpp539
-rw-r--r--src/mongo/db/storage/mmap_v1/record_store_v1_capped.cpp2
-rw-r--r--src/mongo/db/storage/mmap_v1/repair_database.cpp4
-rw-r--r--src/mongo/db/storage/paths.cpp113
-rw-r--r--src/mongo/db/storage/paths.h93
-rw-r--r--src/mongo/db/storage/storage_engine_lock_file_posix.cpp2
32 files changed, 2709 insertions, 32 deletions
diff --git a/src/mongo/db/SConscript b/src/mongo/db/SConscript
index 0ea12270135..a074bbbf81b 100644
--- a/src/mongo/db/SConscript
+++ b/src/mongo/db/SConscript
@@ -635,8 +635,7 @@ serveronlyLibdeps = [
"$BUILD_DIR/mongo/s/serveronly",
"$BUILD_DIR/mongo/scripting/scripting_server",
"$BUILD_DIR/mongo/util/elapsed_tracker",
- "$BUILD_DIR/mongo/util/file_allocator",
- "$BUILD_DIR/mongo/util/mmap",
+ "$BUILD_DIR/mongo/db/storage/mmap_v1/file_allocator",
"$BUILD_DIR/third_party/shim_snappy",
"auth/authmongod",
"catalog/collection_options",
@@ -667,6 +666,7 @@ serveronlyLibdeps = [
"stats/top",
"storage/devnull/storage_devnull",
"storage/in_memory/storage_in_memory",
+ "storage/mmap_v1/mmap",
"storage/mmap_v1/storage_mmapv1",
"storage/storage_engine_lock_file",
"storage/storage_engine_metadata",
diff --git a/src/mongo/db/catalog/database_holder.cpp b/src/mongo/db/catalog/database_holder.cpp
index 9b23d193557..61110aa2b5e 100644
--- a/src/mongo/db/catalog/database_holder.cpp
+++ b/src/mongo/db/catalog/database_holder.cpp
@@ -42,7 +42,6 @@
#include "mongo/db/service_context.h"
#include "mongo/db/operation_context.h"
#include "mongo/db/storage/storage_engine.h"
-#include "mongo/util/file_allocator.h"
#include "mongo/util/log.h"
namespace mongo {
diff --git a/src/mongo/db/catalog/index_catalog_entry.cpp b/src/mongo/db/catalog/index_catalog_entry.cpp
index ad4e3b31b3a..67c41f54fd0 100644
--- a/src/mongo/db/catalog/index_catalog_entry.cpp
+++ b/src/mongo/db/catalog/index_catalog_entry.cpp
@@ -43,7 +43,6 @@
#include "mongo/db/matcher/expression_parser.h"
#include "mongo/db/operation_context.h"
#include "mongo/db/service_context.h"
-#include "mongo/util/file_allocator.h"
#include "mongo/util/log.h"
#include "mongo/util/scopeguard.h"
diff --git a/src/mongo/db/prefetch.cpp b/src/mongo/db/prefetch.cpp
index 1de1d565c47..fd306d8d5e2 100644
--- a/src/mongo/db/prefetch.cpp
+++ b/src/mongo/db/prefetch.cpp
@@ -43,8 +43,8 @@
#include "mongo/db/repl/replication_coordinator_global.h"
#include "mongo/db/server_parameters.h"
#include "mongo/db/stats/timer_stats.h"
+#include "mongo/db/storage/mmap_v1/mmap.h"
#include "mongo/util/log.h"
-#include "mongo/util/mmap.h"
namespace mongo {
diff --git a/src/mongo/db/storage/SConscript b/src/mongo/db/storage/SConscript
index 6dada1ec2c8..83d21072778 100644
--- a/src/mongo/db/storage/SConscript
+++ b/src/mongo/db/storage/SConscript
@@ -49,6 +49,15 @@ env.Library(
)
env.Library(
+ target='paths',
+ source=[
+ 'paths.cpp',
+ ],
+ LIBDEPS=[
+ ],
+)
+
+env.Library(
target='sorted_data_interface_test_harness',
source=[
'sorted_data_interface_test_bulkbuilder.cpp',
@@ -104,7 +113,7 @@ env.Library(
'storage_engine_lock_file_${TARGET_OS_FAMILY}.cpp',
],
LIBDEPS=[
- '$BUILD_DIR/mongo/util/paths',
+ 'paths',
]
)
diff --git a/src/mongo/db/storage/mmap_v1/SConscript b/src/mongo/db/storage/mmap_v1/SConscript
index c232cb43ce0..c7cbbd07e2c 100644
--- a/src/mongo/db/storage/mmap_v1/SConscript
+++ b/src/mongo/db/storage/mmap_v1/SConscript
@@ -33,13 +33,26 @@ env.Library(
'record_store_v1',
'record_access_tracker',
'btree',
- '$BUILD_DIR/mongo/util/logfile',
- '$BUILD_DIR/mongo/util/compress',
- '$BUILD_DIR/mongo/util/file_allocator',
- '$BUILD_DIR/mongo/util/paths',
+ 'file_allocator',
+ 'logfile',
+ 'compress',
+ '$BUILD_DIR/mongo/db/storage/paths',
]
)
+compressEnv = env.Clone()
+compressEnv.InjectThirdPartyIncludePaths(libraries=['snappy'])
+compressEnv
+compressEnv.Library(
+ target='compress',
+ source=[
+ 'compress.cpp',
+ ],
+ LIBDEPS=[
+ '$BUILD_DIR/mongo/db/storage/paths',
+ ],
+)
+
env.Library(
target= 'extent',
source= [
@@ -53,6 +66,38 @@ env.Library(
)
env.Library(
+ target='file_allocator',
+ source=[
+ 'file_allocator.cpp',
+ ],
+ LIBDEPS=[
+ '$BUILD_DIR/mongo/db/storage/paths',
+ ],
+)
+
+env.Library(
+ target='logfile',
+ source=[
+ 'logfile.cpp',
+ ],
+ LIBDEPS=[
+ '$BUILD_DIR/mongo/db/storage/paths',
+ ],
+)
+
+env.Library(
+ target='mmap',
+ source=[
+ 'mmap.cpp',
+ 'mmap_${TARGET_OS_FAMILY}.cpp',
+ ],
+ LIBDEPS=[
+ 'file_allocator',
+ '$BUILD_DIR/mongo/util/foundation',
+ ],
+)
+
+env.Library(
target= 'record_store_v1',
source= [
'record_store_v1_base.cpp',
@@ -162,7 +207,7 @@ env.Library(
],
LIBDEPS= [
'btree',
- '$BUILD_DIR/mongo/db/storage/mmap_v1/record_store_v1_test_help',
+ 'record_store_v1_test_help',
]
)
diff --git a/src/mongo/db/storage/mmap_v1/compress.cpp b/src/mongo/db/storage/mmap_v1/compress.cpp
new file mode 100644
index 00000000000..bae8bc5acba
--- /dev/null
+++ b/src/mongo/db/storage/mmap_v1/compress.cpp
@@ -0,0 +1,59 @@
+// @file compress.cpp
+
+/**
+* Copyright (C) 2012 10gen Inc.
+*
+* This program is free software: you can redistribute it and/or modify
+* it under the terms of the GNU Affero General Public License, version 3,
+* as published by the Free Software Foundation.
+*
+* This program is distributed in the hope that it will be useful,
+* but WITHOUT ANY WARRANTY; without even the implied warranty of
+* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+* GNU Affero General Public License for more details.
+*
+* You should have received a copy of the GNU Affero General Public License
+* along with this program. If not, see <http://www.gnu.org/licenses/>.
+*
+* As a special exception, the copyright holders give permission to link the
+* code of portions of this program with the OpenSSL library under certain
+* conditions as described in each individual source file and distribute
+* linked combinations including the program with the OpenSSL library. You
+* must comply with the GNU Affero General Public License in all respects
+* for all of the code used other than as permitted herein. If you modify
+* file(s) with this exception, you may extend this exception to your
+* version of the file(s), but you are not obligated to do so. If you do not
+* wish to do so, delete this exception statement from your version. If you
+* delete this exception statement from all source files in the program,
+* then also delete it in the license file.
+*/
+
+#include "mongo/platform/basic.h"
+
+#include "mongo/db/storage/mmap_v1/compress.h"
+
+#include <snappy.h>
+
+namespace mongo {
+
+ void rawCompress(const char* input,
+ size_t input_length,
+ char* compressed,
+ size_t* compressed_length)
+ {
+ snappy::RawCompress(input, input_length, compressed, compressed_length);
+ }
+
+ size_t maxCompressedLength(size_t source_len) {
+ return snappy::MaxCompressedLength(source_len);
+ }
+
+ size_t compress(const char* input, size_t input_length, std::string* output) {
+ return snappy::Compress(input, input_length, output);
+ }
+
+ bool uncompress(const char* compressed, size_t compressed_length, std::string* uncompressed) {
+ return snappy::Uncompress(compressed, compressed_length, uncompressed);
+ }
+
+}
diff --git a/src/mongo/db/storage/mmap_v1/compress.h b/src/mongo/db/storage/mmap_v1/compress.h
new file mode 100644
index 00000000000..b8afa4d90c5
--- /dev/null
+++ b/src/mongo/db/storage/mmap_v1/compress.h
@@ -0,0 +1,49 @@
+// @file compress.h
+
+/**
+* Copyright (C) 2012 10gen Inc.
+*
+* This program is free software: you can redistribute it and/or modify
+* it under the terms of the GNU Affero General Public License, version 3,
+* as published by the Free Software Foundation.
+*
+* This program is distributed in the hope that it will be useful,
+* but WITHOUT ANY WARRANTY; without even the implied warranty of
+* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+* GNU Affero General Public License for more details.
+*
+* You should have received a copy of the GNU Affero General Public License
+* along with this program. If not, see <http://www.gnu.org/licenses/>.
+*
+* As a special exception, the copyright holders give permission to link the
+* code of portions of this program with the OpenSSL library under certain
+* conditions as described in each individual source file and distribute
+* linked combinations including the program with the OpenSSL library. You
+* must comply with the GNU Affero General Public License in all respects
+* for all of the code used other than as permitted herein. If you modify
+* file(s) with this exception, you may extend this exception to your
+* version of the file(s), but you are not obligated to do so. If you do not
+* wish to do so, delete this exception statement from your version. If you
+* delete this exception statement from all source files in the program,
+* then also delete it in the license file.
+*/
+
+#pragma once
+
+#include <string>
+
+namespace mongo {
+
+ size_t compress(const char* input, size_t input_length, std::string* output);
+
+ bool uncompress(const char* compressed, size_t compressed_length, std::string* uncompressed);
+
+ size_t maxCompressedLength(size_t source_len);
+ void rawCompress(const char* input,
+ size_t input_length,
+ char* compressed,
+ size_t* compressed_length);
+
+}
+
+
diff --git a/src/mongo/db/storage/mmap_v1/data_file.cpp b/src/mongo/db/storage/mmap_v1/data_file.cpp
index fcd604d3206..04dda4902ef 100644
--- a/src/mongo/db/storage/mmap_v1/data_file.cpp
+++ b/src/mongo/db/storage/mmap_v1/data_file.cpp
@@ -42,7 +42,7 @@
#include "mongo/db/storage/mmap_v1/durable_mapped_file.h"
#include "mongo/db/storage/mmap_v1/mmap_v1_options.h"
#include "mongo/db/operation_context.h"
-#include "mongo/util/file_allocator.h"
+#include "mongo/db/storage/mmap_v1/file_allocator.h"
#include "mongo/util/log.h"
namespace mongo {
diff --git a/src/mongo/db/storage/mmap_v1/data_file_sync.cpp b/src/mongo/db/storage/mmap_v1/data_file_sync.cpp
index cf1d1abca88..b65124052d7 100644
--- a/src/mongo/db/storage/mmap_v1/data_file_sync.cpp
+++ b/src/mongo/db/storage/mmap_v1/data_file_sync.cpp
@@ -35,11 +35,11 @@
#include "mongo/db/commands/server_status_metric.h"
#include "mongo/db/service_context.h"
#include "mongo/db/instance.h"
+#include "mongo/db/storage/mmap_v1/mmap.h"
#include "mongo/db/storage/mmap_v1/mmap_v1_options.h"
#include "mongo/db/storage_options.h"
#include "mongo/util/exit.h"
#include "mongo/util/log.h"
-#include "mongo/util/mmap.h"
namespace mongo {
diff --git a/src/mongo/db/storage/mmap_v1/dur_journal.cpp b/src/mongo/db/storage/mmap_v1/dur_journal.cpp
index 50cb5ee3d22..1a08dc3c01c 100644
--- a/src/mongo/db/storage/mmap_v1/dur_journal.cpp
+++ b/src/mongo/db/storage/mmap_v1/dur_journal.cpp
@@ -41,24 +41,24 @@
#include "mongo/base/init.h"
#include "mongo/config.h"
#include "mongo/db/client.h"
+#include "mongo/db/storage/mmap_v1/mmap.h"
#include "mongo/db/storage/mmap_v1/aligned_builder.h"
+#include "mongo/db/storage/mmap_v1/compress.h"
#include "mongo/db/storage/mmap_v1/dur_journalformat.h"
#include "mongo/db/storage/mmap_v1/dur_journalimpl.h"
#include "mongo/db/storage/mmap_v1/dur_stats.h"
+#include "mongo/db/storage/mmap_v1/logfile.h"
#include "mongo/db/storage/mmap_v1/mmap_v1_options.h"
+#include "mongo/db/storage/paths.h"
#include "mongo/db/storage_options.h"
#include "mongo/platform/random.h"
#include "mongo/util/checksum.h"
-#include "mongo/util/compress.h"
#include "mongo/util/exit.h"
#include "mongo/util/file.h"
#include "mongo/util/hex.h"
#include "mongo/util/log.h"
-#include "mongo/util/logfile.h"
-#include "mongo/util/mmap.h"
#include "mongo/util/mongoutils/str.h"
#include "mongo/util/net/listen.h" // getelapsedtimemillis
-#include "mongo/util/paths.h"
#include "mongo/util/progress_meter.h"
#include "mongo/util/timer.h"
diff --git a/src/mongo/db/storage/mmap_v1/dur_journalimpl.h b/src/mongo/db/storage/mmap_v1/dur_journalimpl.h
index 04aca13e69d..7664e1f5265 100644
--- a/src/mongo/db/storage/mmap_v1/dur_journalimpl.h
+++ b/src/mongo/db/storage/mmap_v1/dur_journalimpl.h
@@ -31,7 +31,7 @@
#pragma once
#include "mongo/db/storage/mmap_v1/dur_journalformat.h"
-#include "mongo/util/logfile.h"
+#include "mongo/db/storage/mmap_v1/logfile.h"
namespace mongo {
namespace dur {
diff --git a/src/mongo/db/storage/mmap_v1/dur_recover.cpp b/src/mongo/db/storage/mmap_v1/dur_recover.cpp
index b4e0a6b3bd9..b6d997d4025 100644
--- a/src/mongo/db/storage/mmap_v1/dur_recover.cpp
+++ b/src/mongo/db/storage/mmap_v1/dur_recover.cpp
@@ -40,6 +40,7 @@
#include <sys/stat.h>
#include "mongo/db/operation_context_impl.h"
+#include "mongo/db/storage/mmap_v1/compress.h"
#include "mongo/db/storage/mmap_v1/dur_commitjob.h"
#include "mongo/db/storage/mmap_v1/dur_journal.h"
#include "mongo/db/storage/mmap_v1/dur_journalformat.h"
@@ -49,7 +50,6 @@
#include "mongo/db/storage/mmap_v1/mmap_v1_options.h"
#include "mongo/util/bufreader.h"
#include "mongo/util/checksum.h"
-#include "mongo/util/compress.h"
#include "mongo/util/exit.h"
#include "mongo/util/hex.h"
#include "mongo/util/log.h"
diff --git a/src/mongo/db/storage/mmap_v1/durable_mapped_file.h b/src/mongo/db/storage/mmap_v1/durable_mapped_file.h
index 53a211f609e..0ebbd9dd7c9 100644
--- a/src/mongo/db/storage/mmap_v1/durable_mapped_file.h
+++ b/src/mongo/db/storage/mmap_v1/durable_mapped_file.h
@@ -31,8 +31,8 @@
#pragma once
-#include "mongo/util/mmap.h"
-#include "mongo/util/paths.h"
+#include "mongo/db/storage/mmap_v1/mmap.h"
+#include "mongo/db/storage/paths.h"
namespace mongo {
diff --git a/src/mongo/db/storage/mmap_v1/durop.cpp b/src/mongo/db/storage/mmap_v1/durop.cpp
index aa70b169f32..ee91e0c60aa 100644
--- a/src/mongo/db/storage/mmap_v1/durop.cpp
+++ b/src/mongo/db/storage/mmap_v1/durop.cpp
@@ -42,7 +42,7 @@
#include "mongo/db/storage/mmap_v1/durable_mapped_file.h"
#include "mongo/db/storage/mmap_v1/mmap_v1_engine.h"
#include "mongo/util/file.h"
-#include "mongo/util/file_allocator.h"
+#include "mongo/db/storage/mmap_v1/file_allocator.h"
#include "mongo/util/log.h"
#include "mongo/util/mongoutils/str.h"
diff --git a/src/mongo/db/storage/mmap_v1/durop.h b/src/mongo/db/storage/mmap_v1/durop.h
index ee31780896f..e7da38a32d1 100644
--- a/src/mongo/db/storage/mmap_v1/durop.h
+++ b/src/mongo/db/storage/mmap_v1/durop.h
@@ -33,8 +33,8 @@
#include <boost/shared_ptr.hpp>
#include "mongo/db/storage/mmap_v1/dur_journalformat.h"
+#include "mongo/db/storage/paths.h"
#include "mongo/util/bufreader.h"
-#include "mongo/util/paths.h"
namespace mongo {
diff --git a/src/mongo/db/storage/mmap_v1/file_allocator.cpp b/src/mongo/db/storage/mmap_v1/file_allocator.cpp
new file mode 100644
index 00000000000..d0bd764d25a
--- /dev/null
+++ b/src/mongo/db/storage/mmap_v1/file_allocator.cpp
@@ -0,0 +1,472 @@
+// @file file_allocator.cpp
+
+/* Copyright 2009 10gen Inc.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Affero General Public License, version 3,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Affero General Public License for more details.
+ *
+ * You should have received a copy of the GNU Affero General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ *
+ * As a special exception, the copyright holders give permission to link the
+ * code of portions of this program with the OpenSSL library under certain
+ * conditions as described in each individual source file and distribute
+ * linked combinations including the program with the OpenSSL library. You
+ * must comply with the GNU Affero General Public License in all respects
+ * for all of the code used other than as permitted herein. If you modify
+ * file(s) with this exception, you may extend this exception to your
+ * version of the file(s), but you are not obligated to do so. If you do not
+ * wish to do so, delete this exception statement from your version. If you
+ * delete this exception statement from all source files in the program,
+ * then also delete it in the license file.
+ */
+
+#define MONGO_LOG_DEFAULT_COMPONENT ::mongo::logger::LogComponent::kStorage
+
+#include "mongo/platform/basic.h"
+
+#include "mongo/db/storage/mmap_v1/file_allocator.h"
+
+#include <boost/thread.hpp>
+#include <boost/filesystem/operations.hpp>
+#include <errno.h>
+#include <fcntl.h>
+
+#if defined(__FreeBSD__)
+# include <sys/param.h>
+# include <sys/mount.h>
+#endif
+
+#if defined(__linux__)
+# include <sys/vfs.h>
+#endif
+
+#if defined(_WIN32)
+# include <io.h>
+#endif
+
+#include "mongo/db/storage/paths.h"
+#include "mongo/platform/posix_fadvise.h"
+#include "mongo/stdx/functional.h"
+#include "mongo/util/concurrency/thread_name.h"
+#include "mongo/util/fail_point.h"
+#include "mongo/util/fail_point_service.h"
+#include "mongo/util/log.h"
+#include "mongo/util/mongoutils/str.h"
+#include "mongo/util/processinfo.h"
+#include "mongo/util/time_support.h"
+#include "mongo/util/timer.h"
+
+using namespace mongoutils;
+
+#ifndef O_NOATIME
+#define O_NOATIME (0)
+#endif
+
+namespace mongo {
+
+ using std::endl;
+ using std::list;
+ using std::string;
+ using std::stringstream;
+
+ // unique number for temporary file names
+ unsigned long long FileAllocator::_uniqueNumber = 0;
+ static SimpleMutex _uniqueNumberMutex( "uniqueNumberMutex" );
+
+ MONGO_FP_DECLARE(allocateDiskFull);
+
+ /**
+ * Aliases for Win32 CRT functions
+ */
+#if defined(_WIN32)
+ static inline long lseek(int fd, long offset, int origin) { return _lseek(fd, offset, origin); }
+ static inline int write(int fd, const void *data, int count) { return _write(fd, data, count); }
+ static inline int close(int fd) { return _close(fd); }
+
+ typedef BOOL (CALLBACK *GetVolumeInformationByHandleWPtr)(HANDLE, LPWSTR, DWORD, LPDWORD, LPDWORD, LPDWORD, LPWSTR, DWORD);
+ GetVolumeInformationByHandleWPtr GetVolumeInformationByHandleWFunc;
+
+ MONGO_INITIALIZER(InitGetVolumeInformationByHandleW)(InitializerContext *context) {
+ HMODULE kernelLib = LoadLibraryA("kernel32.dll");
+ if (kernelLib) {
+ GetVolumeInformationByHandleWFunc = reinterpret_cast<GetVolumeInformationByHandleWPtr>
+ (GetProcAddress(kernelLib, "GetVolumeInformationByHandleW"));
+ }
+ return Status::OK();
+ }
+#endif
+
+ boost::filesystem::path ensureParentDirCreated(const boost::filesystem::path& p){
+ const boost::filesystem::path parent = p.branch_path();
+
+ if (! boost::filesystem::exists(parent)){
+ ensureParentDirCreated(parent);
+ log() << "creating directory " << parent.string() << endl;
+ boost::filesystem::create_directory(parent);
+ flushMyDirectory(parent); // flushes grandparent to ensure parent exists after crash
+ }
+
+ verify(boost::filesystem::is_directory(parent));
+ return parent;
+ }
+
+ FileAllocator::FileAllocator() : _failed() {}
+
+
+ void FileAllocator::start() {
+ boost::thread t( stdx::bind( &FileAllocator::run , this ) );
+ }
+
+ void FileAllocator::requestAllocation( const string &name, long &size ) {
+ boost::lock_guard<boost::mutex> lk( _pendingMutex );
+ if ( _failed )
+ return;
+ long oldSize = prevSize( name );
+ if ( oldSize != -1 ) {
+ size = oldSize;
+ return;
+ }
+ _pending.push_back( name );
+ _pendingSize[ name ] = size;
+ _pendingUpdated.notify_all();
+ }
+
+ void FileAllocator::allocateAsap( const string &name, unsigned long long &size ) {
+ boost::unique_lock<boost::mutex> lk( _pendingMutex );
+
+ // In case the allocator is in failed state, check once before starting so that subsequent
+ // requests for the same database would fail fast after the first one has failed.
+ checkFailure();
+
+ long oldSize = prevSize( name );
+ if ( oldSize != -1 ) {
+ size = oldSize;
+ if ( !inProgress( name ) )
+ return;
+ }
+ checkFailure();
+ _pendingSize[ name ] = size;
+ if ( _pending.size() == 0 )
+ _pending.push_back( name );
+ else if ( _pending.front() != name ) {
+ _pending.remove( name );
+ list< string >::iterator i = _pending.begin();
+ ++i;
+ _pending.insert( i, name );
+ }
+ _pendingUpdated.notify_all();
+ while( inProgress( name ) ) {
+ checkFailure();
+ _pendingUpdated.wait(lk);
+ }
+
+ }
+
+ void FileAllocator::waitUntilFinished() const {
+ if ( _failed )
+ return;
+ boost::unique_lock<boost::mutex> lk( _pendingMutex );
+ while( _pending.size() != 0 )
+ _pendingUpdated.wait(lk);
+ }
+
+ // TODO: pull this out to per-OS files once they exist
+ static bool useSparseFiles(int fd) {
+
+#if defined(__linux__) || defined(__FreeBSD__)
+ struct statfs fs_stats;
+ int ret = fstatfs(fd, &fs_stats);
+ uassert(16062, "fstatfs failed: " + errnoWithDescription(), ret == 0);
+#endif
+
+#if defined(__linux__)
+// these are from <linux/magic.h> but that isn't available on all systems
+# define NFS_SUPER_MAGIC 0x6969
+# define TMPFS_MAGIC 0x01021994
+
+ return (fs_stats.f_type == NFS_SUPER_MAGIC)
+ || (fs_stats.f_type == TMPFS_MAGIC)
+ ;
+
+#elif defined(__FreeBSD__)
+
+ return (str::equals(fs_stats.f_fstypename, "zfs") ||
+ str::equals(fs_stats.f_fstypename, "nfs") ||
+ str::equals(fs_stats.f_fstypename, "oldnfs"));
+
+#elif defined(__sun)
+ // assume using ZFS which is copy-on-write so no benefit to zero-filling
+ // TODO: check which fs we are using like we do elsewhere
+ return true;
+#else
+ return false;
+#endif
+ }
+
+#if defined(_WIN32)
+ static bool isFileOnNTFSVolume(int fd) {
+ if (!GetVolumeInformationByHandleWFunc) {
+ warning() << "Could not retrieve pointer to GetVolumeInformationByHandleW function";
+ return false;
+ }
+
+ HANDLE fileHandle = (HANDLE)_get_osfhandle(fd);
+ if (fileHandle == INVALID_HANDLE_VALUE) {
+ warning() << "_get_osfhandle() failed with " << _strerror(NULL);
+ return false;
+ }
+
+ WCHAR fileSystemName[MAX_PATH + 1];
+ if (!GetVolumeInformationByHandleWFunc(fileHandle, NULL, 0, NULL, 0, NULL, fileSystemName, sizeof(fileSystemName))) {
+ DWORD gle = GetLastError();
+ warning() << "GetVolumeInformationByHandleW failed with " << errnoWithDescription(gle);
+ return false;
+ }
+
+ return lstrcmpW(fileSystemName, L"NTFS") == 0;
+ }
+#endif
+
+ void FileAllocator::ensureLength(int fd , long size) {
+ // Test running out of disk scenarios
+ if (MONGO_FAIL_POINT(allocateDiskFull)) {
+ uasserted( 10444 , "File allocation failed due to failpoint.");
+ }
+
+#if !defined(_WIN32)
+ if (useSparseFiles(fd)) {
+ LOG(1) << "using ftruncate to create a sparse file" << endl;
+ int ret = ftruncate(fd, size);
+ uassert(16063, "ftruncate failed: " + errnoWithDescription(), ret == 0);
+ return;
+ }
+#endif
+
+#if defined(__linux__)
+ int ret = posix_fallocate(fd,0,size);
+ if ( ret == 0 )
+ return;
+
+ log() << "FileAllocator: posix_fallocate failed: " << errnoWithDescription( ret ) << " falling back" << endl;
+#endif
+
+ off_t filelen = lseek( fd, 0, SEEK_END );
+ if ( filelen < size ) {
+ if (filelen != 0) {
+ stringstream ss;
+ ss << "failure creating new datafile; lseek failed for fd " << fd << " with errno: " << errnoWithDescription();
+ uassert( 10440 , ss.str(), filelen == 0 );
+ }
+ // Check for end of disk.
+
+ uassert( 10441 , str::stream() << "Unable to allocate new file of size " << size << ' ' << errnoWithDescription(),
+ size - 1 == lseek(fd, size - 1, SEEK_SET) );
+ uassert( 10442 , str::stream() << "Unable to allocate new file of size " << size << ' ' << errnoWithDescription(),
+ 1 == write(fd, "", 1) );
+
+ // File expansion is completed here. Do not do the zeroing out on OS-es where there
+ // is no risk of triggering allocation-related bugs such as
+ // http://support.microsoft.com/kb/2731284.
+ //
+ if (!ProcessInfo::isDataFileZeroingNeeded()) {
+ return;
+ }
+
+#if defined(_WIN32)
+ if (!isFileOnNTFSVolume(fd)) {
+ log() << "No need to zero out datafile on non-NTFS volume" << endl;
+ return;
+ }
+#endif
+
+ lseek(fd, 0, SEEK_SET);
+
+ const long z = 256 * 1024;
+ const boost::scoped_array<char> buf_holder (new char[z]);
+ char* buf = buf_holder.get();
+ memset(buf, 0, z);
+ long left = size;
+ while ( left > 0 ) {
+ long towrite = left;
+ if ( towrite > z )
+ towrite = z;
+
+ int written = write( fd , buf , towrite );
+ uassert( 10443 , errnoWithPrefix("FileAllocator: file write failed" ), written > 0 );
+ left -= written;
+ }
+ }
+ }
+
+ void FileAllocator::checkFailure() {
+ if (_failed) {
+ // we want to log the problem (diskfull.js expects it) but we do not want to dump a stack tracke
+ msgassertedNoTrace( 12520, "new file allocation failure" );
+ }
+ }
+
+ long FileAllocator::prevSize( const string &name ) const {
+ if ( _pendingSize.count( name ) > 0 )
+ return _pendingSize[ name ];
+ if ( boost::filesystem::exists( name ) )
+ return boost::filesystem::file_size( name );
+ return -1;
+ }
+
+ // caller must hold _pendingMutex lock.
+ bool FileAllocator::inProgress( const string &name ) const {
+ for( list< string >::const_iterator i = _pending.begin(); i != _pending.end(); ++i )
+ if ( *i == name )
+ return true;
+ return false;
+ }
+
+ string FileAllocator::makeTempFileName( boost::filesystem::path root ) {
+ while( 1 ) {
+ boost::filesystem::path p = root / "_tmp";
+ stringstream ss;
+ unsigned long long thisUniqueNumber;
+ {
+ // increment temporary file name counter
+ // TODO: SERVER-6055 -- Unify temporary file name selection
+ SimpleMutex::scoped_lock lk(_uniqueNumberMutex);
+ thisUniqueNumber = _uniqueNumber;
+ ++_uniqueNumber;
+ }
+ ss << thisUniqueNumber;
+ p /= ss.str();
+ string fn = p.string();
+ if( !boost::filesystem::exists(p) )
+ return fn;
+ }
+ return "";
+ }
+
+ void FileAllocator::run( FileAllocator * fa ) {
+ setThreadName( "FileAllocator" );
+ {
+ // initialize unique temporary file name counter
+ // TODO: SERVER-6055 -- Unify temporary file name selection
+ SimpleMutex::scoped_lock lk(_uniqueNumberMutex);
+ _uniqueNumber = curTimeMicros64();
+ }
+ while( 1 ) {
+ {
+ boost::unique_lock<boost::mutex> lk( fa->_pendingMutex );
+ if ( fa->_pending.size() == 0 )
+ fa->_pendingUpdated.wait(lk);
+ }
+ while( 1 ) {
+ string name;
+ long size = 0;
+ {
+ boost::lock_guard<boost::mutex> lk( fa->_pendingMutex );
+ if ( fa->_pending.size() == 0 )
+ break;
+ name = fa->_pending.front();
+ size = fa->_pendingSize[ name ];
+ }
+
+ string tmp;
+ long fd = 0;
+ try {
+ log() << "allocating new datafile " << name << ", filling with zeroes..." << endl;
+
+ boost::filesystem::path parent = ensureParentDirCreated(name);
+ tmp = fa->makeTempFileName( parent );
+ ensureParentDirCreated(tmp);
+
+#if defined(_WIN32)
+ fd = _open( tmp.c_str(), _O_RDWR | _O_CREAT | O_NOATIME, _S_IREAD | _S_IWRITE );
+#else
+ fd = open(tmp.c_str(), O_CREAT | O_RDWR | O_NOATIME, S_IRUSR | S_IWUSR);
+#endif
+ if ( fd < 0 ) {
+ log() << "FileAllocator: couldn't create " << name << " (" << tmp << ") " << errnoWithDescription() << endl;
+ uasserted(10439, "");
+ }
+
+#if defined(POSIX_FADV_DONTNEED)
+ if( posix_fadvise(fd, 0, size, POSIX_FADV_DONTNEED) ) {
+ log() << "warning: posix_fadvise fails " << name << " (" << tmp << ") " << errnoWithDescription() << endl;
+ }
+#endif
+
+ Timer t;
+
+ /* make sure the file is the full desired length */
+ ensureLength( fd , size );
+
+ close( fd );
+ fd = 0;
+
+ if( rename(tmp.c_str(), name.c_str()) ) {
+ const string& errStr = errnoWithDescription();
+ const string& errMessage = str::stream()
+ << "error: couldn't rename " << tmp
+ << " to " << name << ' ' << errStr;
+ msgasserted(13653, errMessage);
+ }
+ flushMyDirectory(name);
+
+ log() << "done allocating datafile " << name << ", "
+ << "size: " << size/1024/1024 << "MB, "
+ << " took " << ((double)t.millis())/1000.0 << " secs"
+ << endl;
+
+ // no longer in a failed state. allow new writers.
+ fa->_failed = false;
+ }
+ catch ( const std::exception& e ) {
+ log() << "error: failed to allocate new file: " << name
+ << " size: " << size << ' ' << e.what()
+ << ". will try again in 10 seconds" << endl;
+ if ( fd > 0 )
+ close( fd );
+ try {
+ if ( ! tmp.empty() )
+ boost::filesystem::remove( tmp );
+ boost::filesystem::remove( name );
+ } catch ( const std::exception& e ) {
+ log() << "error removing files: " << e.what() << endl;
+ }
+
+ {
+ boost::lock_guard<boost::mutex> lk(fa->_pendingMutex);
+ fa->_failed = true;
+
+ // TODO: Should we remove the file from pending?
+ fa->_pendingUpdated.notify_all();
+ }
+
+
+ sleepsecs(10);
+ continue;
+ }
+
+ {
+ boost::lock_guard<boost::mutex> lk( fa->_pendingMutex );
+ fa->_pendingSize.erase( name );
+ fa->_pending.pop_front();
+ fa->_pendingUpdated.notify_all();
+ }
+ }
+ }
+ }
+
+ FileAllocator* FileAllocator::_instance = 0;
+
+ FileAllocator* FileAllocator::get(){
+ if ( ! _instance )
+ _instance = new FileAllocator();
+ return _instance;
+ }
+
+} // namespace mongo
diff --git a/src/mongo/db/storage/mmap_v1/file_allocator.h b/src/mongo/db/storage/mmap_v1/file_allocator.h
new file mode 100644
index 00000000000..aabe2a368bf
--- /dev/null
+++ b/src/mongo/db/storage/mmap_v1/file_allocator.h
@@ -0,0 +1,109 @@
+// @file file_allocator.h
+
+/* Copyright 2009 10gen Inc.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Affero General Public License, version 3,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Affero General Public License for more details.
+ *
+ * You should have received a copy of the GNU Affero General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ *
+ * As a special exception, the copyright holders give permission to link the
+ * code of portions of this program with the OpenSSL library under certain
+ * conditions as described in each individual source file and distribute
+ * linked combinations including the program with the OpenSSL library. You
+ * must comply with the GNU Affero General Public License in all respects
+ * for all of the code used other than as permitted herein. If you modify
+ * file(s) with this exception, you may extend this exception to your
+ * version of the file(s), but you are not obligated to do so. If you do not
+ * wish to do so, delete this exception statement from your version. If you
+ * delete this exception statement from all source files in the program,
+ * then also delete it in the license file.
+ */
+
+#include "mongo/platform/basic.h"
+
+#include <list>
+#include <boost/filesystem/path.hpp>
+#include <boost/noncopyable.hpp>
+#include <boost/thread/condition.hpp>
+
+#include "mongo/util/concurrency/mutex.h"
+
+namespace mongo {
+
+ /*
+ * Handles allocation of contiguous files on disk. Allocation may be
+ * requested asynchronously or synchronously.
+ * singleton
+ */
+ class FileAllocator : boost::noncopyable {
+ /*
+ * The public functions may not be called concurrently. The allocation
+ * functions may be called multiple times per file, but only the first
+ * size specified per file will be used.
+ */
+ public:
+ void start();
+
+ /**
+ * May be called if file exists. If file exists, or its allocation has
+ * been requested, size is updated to match existing file size.
+ */
+ void requestAllocation( const std::string &name, long &size );
+
+
+ /**
+ * Returns when file has been allocated. If file exists, size is
+ * updated to match existing file size.
+ */
+ void allocateAsap( const std::string &name, unsigned long long &size );
+
+ void waitUntilFinished() const;
+
+ static void ensureLength(int fd, long size);
+
+ /** @return the singleton */
+ static FileAllocator * get();
+
+ private:
+
+ FileAllocator();
+
+ void checkFailure();
+
+ // caller must hold pendingMutex_ lock. Returns size if allocated or
+ // allocation requested, -1 otherwise.
+ long prevSize( const std::string &name ) const;
+
+ // caller must hold pendingMutex_ lock.
+ bool inProgress( const std::string &name ) const;
+
+ /** called from the worked thread */
+ static void run( FileAllocator * fa );
+
+ // generate a unique name for temporary files
+ std::string makeTempFileName( boost::filesystem::path root );
+
+ mutable mongo::mutex _pendingMutex;
+ mutable boost::condition _pendingUpdated;
+
+ std::list< std::string > _pending;
+ mutable std::map< std::string, long > _pendingSize;
+
+ // unique number for temporary files
+ static unsigned long long _uniqueNumber;
+
+ bool _failed;
+
+ static FileAllocator* _instance;
+
+ };
+
+} // namespace mongo
diff --git a/src/mongo/db/storage/mmap_v1/journal_latency_test_cmd.cpp b/src/mongo/db/storage/mmap_v1/journal_latency_test_cmd.cpp
index bbabd11c179..8c29741ed7e 100644
--- a/src/mongo/db/storage/mmap_v1/journal_latency_test_cmd.cpp
+++ b/src/mongo/db/storage/mmap_v1/journal_latency_test_cmd.cpp
@@ -46,10 +46,10 @@
#include "mongo/db/query/internal_plans.h"
#include "mongo/db/storage_options.h"
#include "mongo/db/storage/mmap_v1/aligned_builder.h"
+#include "mongo/db/storage/mmap_v1/logfile.h"
+#include "mongo/db/storage/paths.h"
#include "mongo/scripting/engine.h"
#include "mongo/util/background.h"
-#include "mongo/util/logfile.h"
-#include "mongo/util/paths.h"
#include "mongo/util/timer.h"
namespace mongo {
diff --git a/src/mongo/db/storage/mmap_v1/logfile.cpp b/src/mongo/db/storage/mmap_v1/logfile.cpp
new file mode 100644
index 00000000000..8aa5e32626f
--- /dev/null
+++ b/src/mongo/db/storage/mmap_v1/logfile.cpp
@@ -0,0 +1,270 @@
+// @file logfile.cpp simple file log writing / journaling
+
+/**
+* Copyright (C) 2008 10gen Inc.
+*
+* This program is free software: you can redistribute it and/or modify
+* it under the terms of the GNU Affero General Public License, version 3,
+* as published by the Free Software Foundation.
+*
+* This program is distributed in the hope that it will be useful,
+* but WITHOUT ANY WARRANTY; without even the implied warranty of
+* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+* GNU Affero General Public License for more details.
+*
+* You should have received a copy of the GNU Affero General Public License
+* along with this program. If not, see <http://www.gnu.org/licenses/>.
+*
+* As a special exception, the copyright holders give permission to link the
+* code of portions of this program with the OpenSSL library under certain
+* conditions as described in each individual source file and distribute
+* linked combinations including the program with the OpenSSL library. You
+* must comply with the GNU Affero General Public License in all respects
+* for all of the code used other than as permitted herein. If you modify
+* file(s) with this exception, you may extend this exception to your
+* version of the file(s), but you are not obligated to do so. If you do not
+* wish to do so, delete this exception statement from your version. If you
+* delete this exception statement from all source files in the program,
+* then also delete it in the license file.
+*/
+
+#define MONGO_LOG_DEFAULT_COMPONENT ::mongo::logger::LogComponent::kControl
+
+#include "mongo/platform/basic.h"
+
+#include "mongo/db/storage/mmap_v1/logfile.h"
+
+#include "mongo/db/storage/mmap_v1/mmap.h"
+#include "mongo/db/storage/paths.h"
+#include "mongo/platform/posix_fadvise.h"
+#include "mongo/util/allocator.h"
+#include "mongo/util/log.h"
+#include "mongo/util/mongoutils/str.h"
+#include "mongo/util/startup_test.h"
+#include "mongo/util/text.h"
+
+
+using namespace mongoutils;
+
+using std::endl;
+using std::string;
+
+#if defined(_WIN32)
+
+namespace mongo {
+
+ LogFile::LogFile(const std::string& name, bool readwrite) : _name(name) {
+ _fd = CreateFile(
+ toNativeString(name.c_str()).c_str(),
+ (readwrite?GENERIC_READ:0)|GENERIC_WRITE,
+ FILE_SHARE_READ,
+ NULL,
+ OPEN_ALWAYS,
+ FILE_FLAG_NO_BUFFERING,
+ NULL);
+ if( _fd == INVALID_HANDLE_VALUE ) {
+ DWORD e = GetLastError();
+ uasserted(13518, str::stream() << "couldn't open file " << name << " for writing " << errnoWithDescription(e));
+ }
+ SetFilePointer(_fd, 0, 0, FILE_BEGIN);
+ }
+
+ LogFile::~LogFile() {
+ if( _fd != INVALID_HANDLE_VALUE )
+ CloseHandle(_fd);
+ }
+
+ void LogFile::truncate() {
+ verify(_fd != INVALID_HANDLE_VALUE);
+
+ if (!SetEndOfFile(_fd)){
+ msgasserted(15871, "Couldn't truncate file: " + errnoWithDescription());
+ }
+ }
+
+ void LogFile::writeAt(unsigned long long offset, const void *_buf, size_t _len) {
+// TODO 64 bit offsets
+ OVERLAPPED o;
+ memset(&o,0,sizeof(o));
+ (unsigned long long&) o.Offset = offset;
+ BOOL ok= WriteFile(_fd, _buf, _len, 0, &o);
+ verify(ok);
+ }
+
+ void LogFile::readAt(unsigned long long offset, void *_buf, size_t _len) {
+// TODO 64 bit offsets
+ OVERLAPPED o;
+ memset(&o,0,sizeof(o));
+ (unsigned long long&) o.Offset = offset;
+ DWORD nr;
+ BOOL ok = ReadFile(_fd, _buf, _len, &nr, &o);
+ if( !ok ) {
+ string e = errnoWithDescription();
+ //DWORD e = GetLastError();
+ log() << "LogFile readAt(" << offset << ") len:" << _len << "errno:" << e << endl;
+ verify(false);
+ }
+ }
+
+ void LogFile::synchronousAppend(const void *_buf, size_t _len) {
+ const size_t BlockSize = 8 * 1024 * 1024;
+ verify(_fd);
+ verify(_len % g_minOSPageSizeBytes == 0);
+ const char *buf = (const char *) _buf;
+ size_t left = _len;
+ while( left ) {
+ size_t toWrite = std::min(left, BlockSize);
+ DWORD written;
+ if( !WriteFile(_fd, buf, toWrite, &written, NULL) ) {
+ DWORD e = GetLastError();
+ if( e == 87 )
+ msgasserted(13519, "error 87 appending to file - invalid parameter");
+ else
+ uasserted(13517, str::stream() << "error appending to file " << _name << ' ' << _len << ' ' << toWrite << ' ' << errnoWithDescription(e));
+ }
+ else {
+ dassert( written == toWrite );
+ }
+ left -= written;
+ buf += written;
+ }
+ }
+
+}
+
+#else
+
+/// posix
+
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <sys/ioctl.h>
+
+#ifdef __linux__
+#include <linux/fs.h>
+#endif
+
+namespace mongo {
+
+ LogFile::LogFile(const std::string& name, bool readwrite) : _name(name) {
+ int options = O_CREAT
+ | (readwrite?O_RDWR:O_WRONLY)
+#if defined(O_DIRECT)
+ | O_DIRECT
+#endif
+#if defined(O_NOATIME)
+ | O_NOATIME
+#endif
+ ;
+
+ _fd = open(name.c_str(), options, S_IRUSR | S_IWUSR);
+ _blkSize = g_minOSPageSizeBytes;
+
+#if defined(O_DIRECT)
+ _direct = true;
+ if( _fd < 0 ) {
+ _direct = false;
+ options &= ~O_DIRECT;
+ _fd = open(name.c_str(), options, S_IRUSR | S_IWUSR);
+ }
+#ifdef __linux__
+ ssize_t tmpBlkSize = ioctl(_fd, BLKBSZGET);
+ // TODO: We need some sanity checking on tmpBlkSize even if ioctl() did not fail.
+ if (tmpBlkSize > 0) {
+ _blkSize = (size_t)tmpBlkSize;
+ }
+#endif
+#else
+ _direct = false;
+#endif
+
+ if( _fd < 0 ) {
+ uasserted(13516, str::stream() << "couldn't open file " << name << " for writing " << errnoWithDescription());
+ }
+
+ flushMyDirectory(name);
+ }
+
+ LogFile::~LogFile() {
+ if( _fd >= 0 )
+ close(_fd);
+ _fd = -1;
+ }
+
+ void LogFile::truncate() {
+ verify(_fd >= 0);
+
+ BOOST_STATIC_ASSERT(sizeof(off_t) == 8); // we don't want overflow here
+ const off_t pos = lseek(_fd, 0, SEEK_CUR); // doesn't actually seek
+ if (ftruncate(_fd, pos) != 0){
+ msgasserted(15873, "Couldn't truncate file: " + errnoWithDescription());
+ }
+
+ fsync(_fd);
+ }
+
+ void LogFile::writeAt(unsigned long long offset, const void *buf, size_t len) {
+ verify(((size_t)buf) % g_minOSPageSizeBytes == 0); // aligned
+ ssize_t written = pwrite(_fd, buf, len, offset);
+ if( written != (ssize_t) len ) {
+ log() << "writeAt fails " << errnoWithDescription() << endl;
+ }
+#if defined(__linux__)
+ fdatasync(_fd);
+#else
+ fsync(_fd);
+#endif
+ }
+
+ void LogFile::readAt(unsigned long long offset, void *_buf, size_t _len) {
+ verify(((size_t)_buf) % g_minOSPageSizeBytes == 0); // aligned
+ ssize_t rd = pread(_fd, _buf, _len, offset);
+ verify( rd != -1 );
+ }
+
+ void LogFile::synchronousAppend(const void *b, size_t len) {
+
+ const char *buf = static_cast<const char *>( b );
+ ssize_t charsToWrite = static_cast<ssize_t>( len );
+
+ fassert( 16144, charsToWrite >= 0 );
+ fassert( 16142, _fd >= 0 );
+ fassert( 16143, reinterpret_cast<size_t>( buf ) % _blkSize == 0 ); // aligned
+
+#ifdef POSIX_FADV_DONTNEED
+ const off_t pos = lseek(_fd, 0, SEEK_CUR); // doesn't actually seek, just get current position
+#endif
+
+ while ( charsToWrite > 0 ) {
+ const ssize_t written = write( _fd, buf, static_cast<size_t>( charsToWrite ) );
+ if ( -1 == written ) {
+ log() << "LogFile::synchronousAppend failed with " << charsToWrite
+ << " bytes unwritten out of " << len << " bytes; b=" << b << ' '
+ << errnoWithDescription() << std::endl;
+ fassertFailed( 13515 );
+ }
+ buf += written;
+ charsToWrite -= written;
+ }
+
+ if(
+#if defined(__linux__)
+ fdatasync(_fd) < 0
+#else
+ fsync(_fd)
+#endif
+ ) {
+ log() << "error appending to file on fsync " << ' ' << errnoWithDescription();
+ fassertFailed( 13514 );
+ }
+
+#ifdef POSIX_FADV_DONTNEED
+ if (!_direct)
+ posix_fadvise(_fd, pos, len, POSIX_FADV_DONTNEED);
+#endif
+ }
+
+}
+
+#endif
diff --git a/src/mongo/db/storage/mmap_v1/logfile.h b/src/mongo/db/storage/mmap_v1/logfile.h
new file mode 100644
index 00000000000..278b9c162aa
--- /dev/null
+++ b/src/mongo/db/storage/mmap_v1/logfile.h
@@ -0,0 +1,77 @@
+// @file logfile.h simple file log writing / journaling
+
+/**
+* Copyright (C) 2010 10gen Inc.
+*
+* This program is free software: you can redistribute it and/or modify
+* it under the terms of the GNU Affero General Public License, version 3,
+* as published by the Free Software Foundation.
+*
+* This program is distributed in the hope that it will be useful,
+* but WITHOUT ANY WARRANTY; without even the implied warranty of
+* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+* GNU Affero General Public License for more details.
+*
+* You should have received a copy of the GNU Affero General Public License
+* along with this program. If not, see <http://www.gnu.org/licenses/>.
+*
+* As a special exception, the copyright holders give permission to link the
+* code of portions of this program with the OpenSSL library under certain
+* conditions as described in each individual source file and distribute
+* linked combinations including the program with the OpenSSL library. You
+* must comply with the GNU Affero General Public License in all respects
+* for all of the code used other than as permitted herein. If you modify
+* file(s) with this exception, you may extend this exception to your
+* version of the file(s), but you are not obligated to do so. If you do not
+* wish to do so, delete this exception statement from your version. If you
+* delete this exception statement from all source files in the program,
+* then also delete it in the license file.
+*/
+
+#pragma once
+
+#include <string>
+
+
+namespace mongo {
+
+ class LogFile {
+ public:
+ /** create the file and open. must not already exist.
+ throws UserAssertion on i/o error
+ */
+ LogFile(const std::string& name, bool readwrite = false);
+
+ /** closes */
+ ~LogFile();
+
+ /** append to file. does not return until sync'd. uses direct i/o when possible.
+ throws UserAssertion on an i/o error
+ note direct i/o may have alignment requirements
+ */
+ void synchronousAppend(const void *buf, size_t len);
+
+ /** write at specified offset. must be aligned. noreturn until physically written. thread safe */
+ void writeAt(unsigned long long offset, const void *_bug, size_t _len);
+
+ void readAt(unsigned long long offset, void *_buf, size_t _len);
+
+ const std::string _name;
+
+ void truncate(); // Removes extra data after current position
+
+ private:
+#if defined(_WIN32)
+ typedef HANDLE fd_type;
+#else
+ typedef int fd_type;
+#endif
+ fd_type _fd;
+ bool _direct; // are we using direct I/O
+
+ // Block size, in case of direct I/O we need to test alignment against the page size,
+ // which can be different than 4kB.
+ size_t _blkSize;
+ };
+
+}
diff --git a/src/mongo/db/storage/mmap_v1/mmap.cpp b/src/mongo/db/storage/mmap_v1/mmap.cpp
new file mode 100644
index 00000000000..e9519fc7d94
--- /dev/null
+++ b/src/mongo/db/storage/mmap_v1/mmap.cpp
@@ -0,0 +1,258 @@
+// mmap.cpp
+
+/* Copyright 2009 10gen Inc.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Affero General Public License, version 3,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Affero General Public License for more details.
+ *
+ * You should have received a copy of the GNU Affero General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ *
+ * As a special exception, the copyright holders give permission to link the
+ * code of portions of this program with the OpenSSL library under certain
+ * conditions as described in each individual source file and distribute
+ * linked combinations including the program with the OpenSSL library. You
+ * must comply with the GNU Affero General Public License in all respects
+ * for all of the code used other than as permitted herein. If you modify
+ * file(s) with this exception, you may extend this exception to your
+ * version of the file(s), but you are not obligated to do so. If you do not
+ * wish to do so, delete this exception statement from your version. If you
+ * delete this exception statement from all source files in the program,
+ * then also delete it in the license file.
+ */
+
+#define MONGO_LOG_DEFAULT_COMPONENT ::mongo::logger::LogComponent::kControl
+
+#include "mongo/platform/basic.h"
+
+#include "mongo/db/storage/mmap_v1/mmap.h"
+
+#include <boost/filesystem/operations.hpp>
+
+#include "mongo/base/owned_pointer_vector.h"
+#include "mongo/util/concurrency/rwlock.h"
+#include "mongo/util/log.h"
+#include "mongo/util/map_util.h"
+#include "mongo/util/mongoutils/str.h"
+#include "mongo/util/processinfo.h"
+#include "mongo/util/progress_meter.h"
+#include "mongo/util/startup_test.h"
+
+namespace mongo {
+
+ using std::endl;
+ using std::map;
+ using std::set;
+ using std::string;
+ using std::stringstream;
+ using std::vector;
+
+ void minOSPageSizeBytesTest(size_t minOSPageSizeBytes) {
+ fassert( 16325, minOSPageSizeBytes > 0 );
+ fassert( 16326, minOSPageSizeBytes < 1000000 );
+ // check to see if the page size is a power of 2
+ fassert( 16327, (minOSPageSizeBytes & (minOSPageSizeBytes - 1)) == 0);
+ }
+
+namespace {
+ set<MongoFile*> mmfiles;
+ map<string,MongoFile*> pathToFile;
+} // namespace
+
+ /* Create. Must not exist.
+ @param zero fill file with zeros when true
+ */
+ void* MemoryMappedFile::create(const std::string& filename, unsigned long long len, bool zero) {
+ uassert( 13468, string("can't create file already exists ") + filename, ! boost::filesystem::exists(filename) );
+ void *p = map(filename.c_str(), len);
+ if( p && zero ) {
+ size_t sz = (size_t) len;
+ verify( len == sz );
+ memset(p, 0, sz);
+ }
+ return p;
+ }
+
+ /*static*/ void MemoryMappedFile::updateLength( const char *filename, unsigned long long &length ) {
+ if ( !boost::filesystem::exists( filename ) )
+ return;
+ // make sure we map full length if preexisting file.
+ boost::uintmax_t l = boost::filesystem::file_size( filename );
+ length = l;
+ }
+
+ void* MemoryMappedFile::map(const char *filename) {
+ unsigned long long l;
+ try {
+ l = boost::filesystem::file_size( filename );
+ }
+ catch(boost::filesystem::filesystem_error& e) {
+ uasserted(15922, mongoutils::str::stream() << "couldn't get file length when opening mapping " << filename << ' ' << e.what() );
+ }
+ return map( filename , l );
+ }
+ void* MemoryMappedFile::mapWithOptions(const char *filename, int options) {
+ unsigned long long l;
+ try {
+ l = boost::filesystem::file_size( filename );
+ }
+ catch(boost::filesystem::filesystem_error& e) {
+ uasserted(15923, mongoutils::str::stream() << "couldn't get file length when opening mapping " << filename << ' ' << e.what() );
+ }
+ return map( filename , l, options );
+ }
+
+ /* --- MongoFile -------------------------------------------------
+ this is the administrative stuff
+ */
+
+ RWLockRecursiveNongreedy LockMongoFilesShared::mmmutex("mmmutex",10*60*1000 /* 10 minutes */);
+ unsigned LockMongoFilesShared::era = 99; // note this rolls over
+
+ set<MongoFile*>& MongoFile::getAllFiles() { return mmfiles; }
+
+ /* subclass must call in destructor (or at close).
+ removes this from pathToFile and other maps
+ safe to call more than once, albeit might be wasted work
+ ideal to call close to the close, if the close is well before object destruction
+ */
+ void MongoFile::destroyed() {
+ LockMongoFilesShared::assertExclusivelyLocked();
+ mmfiles.erase(this);
+ pathToFile.erase( filename() );
+ }
+
+ /*static*/
+ void MongoFile::closeAllFiles( stringstream &message ) {
+ static int closingAllFiles = 0;
+ if ( closingAllFiles ) {
+ message << "warning closingAllFiles=" << closingAllFiles << endl;
+ return;
+ }
+ ++closingAllFiles;
+
+ LockMongoFilesExclusive lk;
+
+ ProgressMeter pm(mmfiles.size(), 2, 1, "files", "File Closing Progress");
+ set<MongoFile*> temp = mmfiles;
+ for ( set<MongoFile*>::iterator i = temp.begin(); i != temp.end(); i++ ) {
+ (*i)->close(); // close() now removes from mmfiles
+ pm.hit();
+ }
+ message << "closeAllFiles() finished";
+ --closingAllFiles;
+ }
+
+ /*static*/ long long MongoFile::totalMappedLength() {
+ unsigned long long total = 0;
+
+ LockMongoFilesShared lk;
+
+ for ( set<MongoFile*>::iterator i = mmfiles.begin(); i != mmfiles.end(); i++ )
+ total += (*i)->length();
+
+ return total;
+ }
+
+ void nullFunc() { }
+
+ // callback notifications
+ void (*MongoFile::notifyPreFlush)() = nullFunc;
+ void (*MongoFile::notifyPostFlush)() = nullFunc;
+
+ /*static*/ int MongoFile::flushAll( bool sync ) {
+ if ( sync ) notifyPreFlush();
+ int x = _flushAll(sync);
+ if ( sync ) notifyPostFlush();
+ return x;
+ }
+
+ /*static*/ int MongoFile::_flushAll( bool sync ) {
+ if ( ! sync ) {
+ int num = 0;
+ LockMongoFilesShared lk;
+ for ( set<MongoFile*>::iterator i = mmfiles.begin(); i != mmfiles.end(); i++ ) {
+ num++;
+ MongoFile * mmf = *i;
+ if ( ! mmf )
+ continue;
+
+ mmf->flush( sync );
+ }
+ return num;
+ }
+
+ // want to do it sync
+
+ // get a thread-safe Flushable object for each file first in a single lock
+ // so that we can iterate and flush without doing any locking here
+ OwnedPointerVector<Flushable> thingsToFlushWrapper;
+ vector<Flushable*>& thingsToFlush = thingsToFlushWrapper.mutableVector();
+ {
+ LockMongoFilesShared lk;
+ for ( set<MongoFile*>::iterator i = mmfiles.begin(); i != mmfiles.end(); i++ ) {
+ MongoFile* mmf = *i;
+ if ( !mmf )
+ continue;
+ thingsToFlush.push_back( mmf->prepareFlush() );
+ }
+ }
+
+ for ( size_t i = 0; i < thingsToFlush.size(); i++ ) {
+ thingsToFlush[i]->flush();
+ }
+
+ return thingsToFlush.size();
+ }
+
+ void MongoFile::created() {
+ LockMongoFilesExclusive lk;
+ mmfiles.insert(this);
+ }
+
+ void MongoFile::setFilename(const std::string& fn) {
+ LockMongoFilesExclusive lk;
+ verify( _filename.empty() );
+ _filename = boost::filesystem::absolute(fn).generic_string();
+ MongoFile *&ptf = pathToFile[_filename];
+ massert(13617, "MongoFile : multiple opens of same filename", ptf == 0);
+ ptf = this;
+ }
+
+ MongoFile* MongoFileFinder::findByPath(const std::string& path) const {
+ return mapFindWithDefault(pathToFile,
+ boost::filesystem::absolute(path).generic_string(),
+ static_cast<MongoFile*>(NULL));
+ }
+
+
+ void printMemInfo( const char * where ) {
+ LogstreamBuilder out = log();
+ out << "mem info: ";
+ if ( where )
+ out << where << " ";
+
+ ProcessInfo pi;
+ if ( ! pi.supported() ) {
+ out << " not supported";
+ return;
+ }
+
+ out << "vsize: " << pi.getVirtualMemorySize()
+ << " resident: " << pi.getResidentSize()
+ << " mapped: " << ( MemoryMappedFile::totalMappedLength() / ( 1024 * 1024 ) );
+ }
+
+ void dataSyncFailedHandler() {
+ log() << "error syncing data to disk, probably a disk error";
+ log() << " shutting down immediately to avoid corruption";
+ fassertFailed( 17346 );
+ }
+
+} // namespace mongo
diff --git a/src/mongo/db/storage/mmap_v1/mmap.h b/src/mongo/db/storage/mmap_v1/mmap.h
new file mode 100644
index 00000000000..7b34b21b254
--- /dev/null
+++ b/src/mongo/db/storage/mmap_v1/mmap.h
@@ -0,0 +1,262 @@
+// mmap.h
+
+/* Copyright 2009 10gen Inc.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Affero General Public License, version 3,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Affero General Public License for more details.
+ *
+ * You should have received a copy of the GNU Affero General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ *
+ * As a special exception, the copyright holders give permission to link the
+ * code of portions of this program with the OpenSSL library under certain
+ * conditions as described in each individual source file and distribute
+ * linked combinations including the program with the OpenSSL library. You
+ * must comply with the GNU Affero General Public License in all respects
+ * for all of the code used other than as permitted herein. If you modify
+ * file(s) with this exception, you may extend this exception to your
+ * version of the file(s), but you are not obligated to do so. If you do not
+ * wish to do so, delete this exception statement from your version. If you
+ * delete this exception statement from all source files in the program,
+ * then also delete it in the license file.
+ */
+
+#pragma once
+
+#include <set>
+#include <sstream>
+#include <vector>
+
+#include "mongo/util/concurrency/rwlock.h"
+
+namespace mongo {
+
+#if !defined(_WIN32)
+ typedef int HANDLE;
+#endif
+
+ extern const size_t g_minOSPageSizeBytes;
+ void minOSPageSizeBytesTest(size_t minOSPageSizeBytes); // lame-o
+
+ // call this if syncing data fails
+ void dataSyncFailedHandler();
+
+ class MAdvise {
+ MONGO_DISALLOW_COPYING(MAdvise);
+ public:
+ enum Advice { Sequential=1 , Random=2 };
+ MAdvise(void *p, unsigned len, Advice a);
+ ~MAdvise(); // destructor resets the range to MADV_NORMAL
+ private:
+ void *_p;
+ unsigned _len;
+ };
+
+ // lock order: lock dbMutex before this if you lock both
+ class LockMongoFilesShared {
+ friend class LockMongoFilesExclusive;
+ static RWLockRecursiveNongreedy mmmutex;
+ static unsigned era;
+ RWLockRecursive::Shared lk;
+ public:
+ LockMongoFilesShared() : lk(mmmutex) { }
+
+ /** era changes anytime memory maps come and go. thus you can use this as a cheap way to check
+ if nothing has changed since the last time you locked. Of course you must be shared locked
+ at the time of this call, otherwise someone could be in progress.
+
+ This is used for yielding; see PageFaultException::touch().
+ */
+ static unsigned getEra() { return era; }
+
+ static void assertExclusivelyLocked() { mmmutex.assertExclusivelyLocked(); }
+ static void assertAtLeastReadLocked() { mmmutex.assertAtLeastReadLocked(); }
+ };
+
+ class LockMongoFilesExclusive {
+ RWLockRecursive::Exclusive lk;
+ public:
+ LockMongoFilesExclusive() : lk(LockMongoFilesShared::mmmutex) {
+ LockMongoFilesShared::era++;
+ }
+ };
+
+ /* the administrative-ish stuff here */
+ class MongoFile {
+ MONGO_DISALLOW_COPYING(MongoFile);
+ public:
+ /** Flushable has to fail nicely if the underlying object gets killed */
+ class Flushable {
+ public:
+ virtual ~Flushable() {}
+ virtual void flush() = 0;
+ };
+
+ MongoFile() {}
+ virtual ~MongoFile() {}
+
+ enum Options {
+ SEQUENTIAL = 1, // hint - e.g. FILE_FLAG_SEQUENTIAL_SCAN on windows
+ READONLY = 2 // not contractually guaranteed, but if specified the impl has option to fault writes
+ };
+
+ /** @param fun is called for each MongoFile.
+ called from within a mutex that MongoFile uses. so be careful not to deadlock.
+ */
+ template < class F >
+ static void forEach( F fun );
+
+ /** note: you need to be in mmmutex when using this. forEach (above) handles that for you automatically.
+*/
+ static std::set<MongoFile*>& getAllFiles();
+
+ // callbacks if you need them
+ static void (*notifyPreFlush)();
+ static void (*notifyPostFlush)();
+
+ static int flushAll( bool sync ); // returns n flushed
+ static long long totalMappedLength();
+ static void closeAllFiles( std::stringstream &message );
+
+ virtual bool isDurableMappedFile() { return false; }
+
+ std::string filename() const { return _filename; }
+ void setFilename(const std::string& fn);
+
+ virtual uint64_t getUniqueId() const = 0;
+
+ private:
+ std::string _filename;
+ static int _flushAll( bool sync ); // returns n flushed
+ protected:
+ virtual void close() = 0;
+ virtual void flush(bool sync) = 0;
+ /**
+ * returns a thread safe object that you can call flush on
+ * Flushable has to fail nicely if the underlying object gets killed
+ */
+ virtual Flushable * prepareFlush() = 0;
+
+ void created(); /* subclass must call after create */
+
+ /* subclass must call in destructor (or at close).
+ removes this from pathToFile and other maps
+ safe to call more than once, albeit might be wasted work
+ ideal to call close to the close, if the close is well before object destruction
+ */
+ void destroyed();
+
+ virtual unsigned long long length() const = 0;
+ };
+
+ /** look up a MMF by filename. scoped mutex locking convention.
+ example:
+ MMFFinderByName finder;
+ DurableMappedFile *a = finder.find("file_name_a");
+ DurableMappedFile *b = finder.find("file_name_b");
+ */
+ class MongoFileFinder {
+ MONGO_DISALLOW_COPYING(MongoFileFinder);
+ public:
+ MongoFileFinder() { }
+
+ /** @return The MongoFile object associated with the specified file name. If no file is open
+ with the specified name, returns null.
+ */
+ MongoFile* findByPath(const std::string& path) const;
+
+ private:
+ LockMongoFilesShared _lk;
+ };
+
+ class MemoryMappedFile : public MongoFile {
+ protected:
+ virtual void* viewForFlushing() {
+ if( views.size() == 0 )
+ return 0;
+ verify( views.size() == 1 );
+ return views[0];
+ }
+ public:
+ MemoryMappedFile();
+
+ virtual ~MemoryMappedFile() {
+ LockMongoFilesExclusive lk;
+ close();
+ }
+
+ virtual void close();
+
+ // Throws exception if file doesn't exist. (dm may2010: not sure if this is always true?)
+ void* map(const char *filename);
+
+ /** @param options see MongoFile::Options
+ */
+ void* mapWithOptions(const char *filename, int options);
+
+ /* Creates with length if DNE, otherwise uses existing file length,
+ passed length.
+ @param options MongoFile::Options bits
+ */
+ void* map(const char *filename, unsigned long long &length, int options = 0 );
+
+ /* Create. Must not exist.
+ @param zero fill file with zeros when true
+ */
+ void* create(const std::string& filename, unsigned long long len, bool zero);
+
+ void flush(bool sync);
+ virtual Flushable * prepareFlush();
+
+ long shortLength() const { return (long) len; }
+ unsigned long long length() const { return len; }
+ HANDLE getFd() const { return fd; }
+ /** create a new view with the specified properties.
+ automatically cleaned up upon close/destruction of the MemoryMappedFile object.
+ */
+ void* createReadOnlyMap();
+ void* createPrivateMap();
+
+ virtual uint64_t getUniqueId() const { return _uniqueId; }
+
+ private:
+ static void updateLength( const char *filename, unsigned long long &length );
+
+ HANDLE fd;
+ HANDLE maphandle;
+ std::vector<void *> views;
+ unsigned long long len;
+ const uint64_t _uniqueId;
+#ifdef _WIN32
+ // flush Mutex
+ //
+ // Protects:
+ // Prevent flush() and close() from concurrently running.
+ // It ensures close() cannot complete while flush() is running
+ // Lock Ordering:
+ // LockMongoFilesShared must be taken before _flushMutex if both are taken
+ boost::mutex _flushMutex;
+#endif
+
+ protected:
+
+ /** close the current private view and open a new replacement */
+ void* remapPrivateView(void *oldPrivateAddr);
+ };
+
+ /** p is called from within a mutex that MongoFile uses. so be careful not to deadlock. */
+ template < class F >
+ inline void MongoFile::forEach( F p ) {
+ LockMongoFilesShared lklk;
+ const std::set<MongoFile*>& mmfiles = MongoFile::getAllFiles();
+ for ( std::set<MongoFile*>::const_iterator i = mmfiles.begin(); i != mmfiles.end(); i++ )
+ p(*i);
+ }
+
+} // namespace mongo
diff --git a/src/mongo/db/storage/mmap_v1/mmap_posix.cpp b/src/mongo/db/storage/mmap_v1/mmap_posix.cpp
new file mode 100644
index 00000000000..f7dffae468f
--- /dev/null
+++ b/src/mongo/db/storage/mmap_v1/mmap_posix.cpp
@@ -0,0 +1,324 @@
+// mmap_posix.cpp
+
+/* Copyright 2009 10gen Inc.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Affero General Public License, version 3,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Affero General Public License for more details.
+ *
+ * You should have received a copy of the GNU Affero General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ *
+ * As a special exception, the copyright holders give permission to link the
+ * code of portions of this program with the OpenSSL library under certain
+ * conditions as described in each individual source file and distribute
+ * linked combinations including the program with the OpenSSL library. You
+ * must comply with the GNU Affero General Public License in all respects
+ * for all of the code used other than as permitted herein. If you modify
+ * file(s) with this exception, you may extend this exception to your
+ * version of the file(s), but you are not obligated to do so. If you do not
+ * wish to do so, delete this exception statement from your version. If you
+ * delete this exception statement from all source files in the program,
+ * then also delete it in the license file.
+ */
+
+#define MONGO_LOG_DEFAULT_COMPONENT ::mongo::logger::LogComponent::kControl
+
+#include "mongo/platform/basic.h"
+
+#include <errno.h>
+#include <fcntl.h>
+#include <sys/mman.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+
+#include "mongo/platform/atomic_word.h"
+#include "mongo/db/concurrency/d_concurrency.h"
+#include "mongo/db/storage/mmap_v1/file_allocator.h"
+#include "mongo/db/storage/mmap_v1/mmap.h"
+#include "mongo/util/log.h"
+#include "mongo/util/mongoutils/str.h"
+#include "mongo/util/processinfo.h"
+#include "mongo/util/startup_test.h"
+
+using std::endl;
+using std::numeric_limits;
+using std::vector;
+
+using namespace mongoutils;
+
+namespace {
+ mongo::AtomicUInt64 mmfNextId(0);
+}
+
+namespace mongo {
+ static size_t fetchMinOSPageSizeBytes() {
+ size_t minOSPageSizeBytes = sysconf( _SC_PAGESIZE );
+ minOSPageSizeBytesTest(minOSPageSizeBytes);
+ return minOSPageSizeBytes;
+ }
+ const size_t g_minOSPageSizeBytes = fetchMinOSPageSizeBytes();
+
+
+
+ MemoryMappedFile::MemoryMappedFile() : _uniqueId(mmfNextId.fetchAndAdd(1)) {
+ fd = 0;
+ maphandle = 0;
+ len = 0;
+ created();
+ }
+
+ void MemoryMappedFile::close() {
+ LockMongoFilesShared::assertExclusivelyLocked();
+ for( vector<void*>::iterator i = views.begin(); i != views.end(); i++ ) {
+ munmap(*i,len);
+ }
+ views.clear();
+
+ if ( fd )
+ ::close(fd);
+ fd = 0;
+ destroyed(); // cleans up from the master list of mmaps
+ }
+
+#ifndef O_NOATIME
+#define O_NOATIME (0)
+#endif
+
+#ifndef MAP_NORESERVE
+#define MAP_NORESERVE (0)
+#endif
+
+ namespace {
+ void* _pageAlign( void* p ) {
+ return (void*)((int64_t)p & ~(g_minOSPageSizeBytes-1));
+ }
+
+ class PageAlignTest : public StartupTest {
+ public:
+ void run() {
+ {
+ int64_t x = g_minOSPageSizeBytes + 123;
+ void* y = _pageAlign( reinterpret_cast<void*>( x ) );
+ invariant( g_minOSPageSizeBytes == reinterpret_cast<size_t>(y) );
+ }
+ {
+ int64_t a = static_cast<uint64_t>( numeric_limits<int>::max() );
+ a = a / g_minOSPageSizeBytes;
+ a = a * g_minOSPageSizeBytes;
+ // a should now be page aligned
+
+ // b is not page aligned
+ int64_t b = a + 123;
+
+ void* y = _pageAlign( reinterpret_cast<void*>( b ) );
+ invariant( a == reinterpret_cast<int64_t>(y) );
+ }
+
+ }
+ } pageAlignTest;
+ }
+
+#if defined(__sun)
+ MAdvise::MAdvise(void *,unsigned, Advice) { }
+ MAdvise::~MAdvise() { }
+#else
+ MAdvise::MAdvise(void *p, unsigned len, Advice a) {
+
+ _p = _pageAlign( p );
+
+ _len = len + static_cast<unsigned>( reinterpret_cast<size_t>(p) -
+ reinterpret_cast<size_t>(_p) );
+
+ int advice = 0;
+ switch ( a ) {
+ case Sequential:
+ advice = MADV_SEQUENTIAL;
+ break;
+ case Random:
+ advice = MADV_RANDOM;
+ break;
+ }
+
+ if ( madvise(_p,_len,advice ) ) {
+ error() << "madvise failed: " << errnoWithDescription();
+ }
+
+ }
+ MAdvise::~MAdvise() {
+ madvise(_p,_len,MADV_NORMAL);
+ }
+#endif
+
+ void* MemoryMappedFile::map(const char *filename, unsigned long long &length, int options) {
+ // length may be updated by callee.
+ setFilename(filename);
+ FileAllocator::get()->allocateAsap( filename, length );
+ len = length;
+
+ massert( 10446 , str::stream() << "mmap: can't map area of size 0 file: " << filename, length > 0 );
+
+ fd = open(filename, O_RDWR | O_NOATIME);
+ if ( fd <= 0 ) {
+ log() << "couldn't open " << filename << ' ' << errnoWithDescription() << endl;
+ fd = 0; // our sentinel for not opened
+ return 0;
+ }
+
+ unsigned long long filelen = lseek(fd, 0, SEEK_END);
+ uassert(10447, str::stream() << "map file alloc failed, wanted: " << length << " filelen: " << filelen << ' ' << sizeof(size_t), filelen == length );
+ lseek( fd, 0, SEEK_SET );
+
+ void * view = mmap(NULL, length, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
+ if ( view == MAP_FAILED ) {
+ error() << " mmap() failed for " << filename << " len:" << length << " " << errnoWithDescription() << endl;
+ if ( errno == ENOMEM ) {
+ if( sizeof(void*) == 4 )
+ error() << "mmap failed with out of memory. You are using a 32-bit build and probably need to upgrade to 64" << endl;
+ else
+ error() << "mmap failed with out of memory. (64 bit build)" << endl;
+ }
+ return 0;
+ }
+
+
+#if defined(__sun)
+#warning madvise not supported on solaris yet
+#else
+ if ( options & SEQUENTIAL ) {
+ if ( madvise( view , length , MADV_SEQUENTIAL ) ) {
+ warning() << "map: madvise failed for " << filename << ' ' << errnoWithDescription() << endl;
+ }
+ }
+#endif
+
+ views.push_back( view );
+
+ return view;
+ }
+
+ void* MemoryMappedFile::createReadOnlyMap() {
+ void * x = mmap( /*start*/0 , len , PROT_READ , MAP_SHARED , fd , 0 );
+ if( x == MAP_FAILED ) {
+ if ( errno == ENOMEM ) {
+ if( sizeof(void*) == 4 )
+ error() << "mmap ro failed with out of memory. You are using a 32-bit build and probably need to upgrade to 64" << endl;
+ else
+ error() << "mmap ro failed with out of memory. (64 bit build)" << endl;
+ }
+ return 0;
+ }
+ return x;
+ }
+
+ void* MemoryMappedFile::createPrivateMap() {
+ void * x = mmap( /*start*/0 , len , PROT_READ|PROT_WRITE , MAP_PRIVATE|MAP_NORESERVE , fd , 0 );
+ if( x == MAP_FAILED ) {
+ if ( errno == ENOMEM ) {
+ if( sizeof(void*) == 4 ) {
+ error() << "mmap private failed with out of memory. You are using a 32-bit build and probably need to upgrade to 64" << endl;
+ }
+ else {
+ error() << "mmap private failed with out of memory. (64 bit build)" << endl;
+ }
+ }
+ else {
+ error() << "mmap private failed " << errnoWithDescription() << endl;
+ }
+ return 0;
+ }
+
+ views.push_back(x);
+ return x;
+ }
+
+ void* MemoryMappedFile::remapPrivateView(void *oldPrivateAddr) {
+#if defined(__sun) // SERVER-8795
+ LockMongoFilesExclusive lockMongoFiles;
+#endif
+
+ // don't unmap, just mmap over the old region
+ void * x = mmap( oldPrivateAddr, len , PROT_READ|PROT_WRITE , MAP_PRIVATE|MAP_NORESERVE|MAP_FIXED , fd , 0 );
+ if( x == MAP_FAILED ) {
+ int err = errno;
+ error() << "13601 Couldn't remap private view: " << errnoWithDescription(err) << endl;
+ log() << "aborting" << endl;
+ printMemInfo();
+ abort();
+ }
+ verify( x == oldPrivateAddr );
+ return x;
+ }
+
+ void MemoryMappedFile::flush(bool sync) {
+ if ( views.empty() || fd == 0 )
+ return;
+
+ bool useFsync = sync && !ProcessInfo::preferMsyncOverFSync();
+
+ if ( useFsync ?
+ fsync(fd) != 0 :
+ msync(viewForFlushing(), len, sync ? MS_SYNC : MS_ASYNC) ) {
+ // msync failed, this is very bad
+ log() << (useFsync ? "fsync failed: " : "msync failed: ") << errnoWithDescription()
+ << " file: " << filename() << endl;
+ dataSyncFailedHandler();
+ }
+ }
+
+ class PosixFlushable : public MemoryMappedFile::Flushable {
+ public:
+ PosixFlushable( MemoryMappedFile* theFile, void* view , HANDLE fd , long len)
+ : _theFile( theFile ), _view( view ), _fd(fd), _len(len), _id(_theFile->getUniqueId()) {
+ }
+
+ void flush() {
+ if ( _view == NULL || _fd == 0 )
+ return;
+
+ if ( ProcessInfo::preferMsyncOverFSync() ?
+ msync(_view, _len, MS_SYNC ) == 0 :
+ fsync(_fd) == 0 ) {
+ return;
+ }
+
+ if ( errno == EBADF ) {
+ // ok, we were unlocked, so this file was closed
+ return;
+ }
+
+ // some error, lets see if we're supposed to exist
+ LockMongoFilesShared mmfilesLock;
+ std::set<MongoFile*> mmfs = MongoFile::getAllFiles();
+ std::set<MongoFile*>::const_iterator it = mmfs.find(_theFile);
+ if ( (it == mmfs.end()) || ((*it)->getUniqueId() != _id) ) {
+ log() << "msync failed with: " << errnoWithDescription()
+ << " but file doesn't exist anymore, so ignoring";
+ // this was deleted while we were unlocked
+ return;
+ }
+
+ // we got an error, and we still exist, so this is bad, we fail
+ log() << "msync " << errnoWithDescription() << endl;
+ dataSyncFailedHandler();
+ }
+
+ MemoryMappedFile* _theFile;
+ void * _view;
+ HANDLE _fd;
+ long _len;
+ const uint64_t _id;
+ };
+
+ MemoryMappedFile::Flushable * MemoryMappedFile::prepareFlush() {
+ return new PosixFlushable( this, viewForFlushing(), fd, len);
+ }
+
+
+} // namespace mongo
+
diff --git a/src/mongo/db/storage/mmap_v1/mmap_v1_engine.cpp b/src/mongo/db/storage/mmap_v1/mmap_v1_engine.cpp
index f84f7810bc3..595df3616f3 100644
--- a/src/mongo/db/storage/mmap_v1/mmap_v1_engine.cpp
+++ b/src/mongo/db/storage/mmap_v1/mmap_v1_engine.cpp
@@ -37,6 +37,7 @@
#include <fstream>
#include "mongo/db/mongod_options.h"
+#include "mongo/db/storage/mmap_v1/mmap.h"
#include "mongo/db/storage/mmap_v1/data_file_sync.h"
#include "mongo/db/storage/mmap_v1/dur.h"
#include "mongo/db/storage/mmap_v1/dur_journal.h"
@@ -46,9 +47,8 @@
#include "mongo/db/storage/mmap_v1/mmap_v1_options.h"
#include "mongo/db/storage/storage_engine_lock_file.h"
#include "mongo/db/storage_options.h"
-#include "mongo/util/file_allocator.h"
+#include "mongo/db/storage/mmap_v1/file_allocator.h"
#include "mongo/util/log.h"
-#include "mongo/util/mmap.h"
namespace mongo {
diff --git a/src/mongo/db/storage/mmap_v1/mmap_v1_extent_manager.cpp b/src/mongo/db/storage/mmap_v1/mmap_v1_extent_manager.cpp
index 17a72099531..01a13ee86a9 100644
--- a/src/mongo/db/storage/mmap_v1/mmap_v1_extent_manager.cpp
+++ b/src/mongo/db/storage/mmap_v1/mmap_v1_extent_manager.cpp
@@ -43,6 +43,7 @@
#include "mongo/db/storage/mmap_v1/record.h"
#include "mongo/db/storage/mmap_v1/extent.h"
#include "mongo/db/storage/mmap_v1/extent_manager.h"
+#include "mongo/db/storage/mmap_v1/mmap.h"
#include "mongo/db/storage/mmap_v1/mmap_v1_engine.h"
#include "mongo/db/storage/mmap_v1/mmap_v1_options.h"
#include "mongo/db/storage/record_fetcher.h"
@@ -50,7 +51,6 @@
#include "mongo/util/fail_point_service.h"
#include "mongo/util/file.h"
#include "mongo/util/log.h"
-#include "mongo/util/mmap.h"
namespace mongo {
diff --git a/src/mongo/db/storage/mmap_v1/mmap_windows.cpp b/src/mongo/db/storage/mmap_v1/mmap_windows.cpp
new file mode 100644
index 00000000000..2ce46d43584
--- /dev/null
+++ b/src/mongo/db/storage/mmap_v1/mmap_windows.cpp
@@ -0,0 +1,539 @@
+// mmap_win.cpp
+
+/* Copyright 2009 10gen Inc.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Affero General Public License, version 3,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Affero General Public License for more details.
+ *
+ * You should have received a copy of the GNU Affero General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ *
+ * As a special exception, the copyright holders give permission to link the
+ * code of portions of this program with the OpenSSL library under certain
+ * conditions as described in each individual source file and distribute
+ * linked combinations including the program with the OpenSSL library. You
+ * must comply with the GNU Affero General Public License in all respects
+ * for all of the code used other than as permitted herein. If you modify
+ * file(s) with this exception, you may extend this exception to your
+ * version of the file(s), but you are not obligated to do so. If you do not
+ * wish to do so, delete this exception statement from your version. If you
+ * delete this exception statement from all source files in the program,
+ * then also delete it in the license file.
+ */
+
+#define MONGO_LOG_DEFAULT_COMPONENT ::mongo::logger::LogComponent::kControl
+
+#include "mongo/platform/basic.h"
+
+#include "mongo/db/storage/mmap_v1/mmap.h"
+
+#include "mongo/db/storage/mmap_v1/durable_mapped_file.h"
+#include "mongo/db/storage/mmap_v1/file_allocator.h"
+#include "mongo/util/log.h"
+#include "mongo/util/processinfo.h"
+#include "mongo/util/text.h"
+#include "mongo/util/timer.h"
+
+namespace mongo {
+
+ using std::endl;
+ using std::string;
+ using std::vector;
+
+ namespace {
+ mongo::AtomicUInt64 mmfNextId(0);
+ }
+
+ static size_t fetchMinOSPageSizeBytes() {
+ SYSTEM_INFO si;
+ GetSystemInfo(&si);
+ size_t minOSPageSizeBytes = si.dwPageSize;
+ minOSPageSizeBytesTest(minOSPageSizeBytes);
+ return minOSPageSizeBytes;
+ }
+ const size_t g_minOSPageSizeBytes = fetchMinOSPageSizeBytes();
+
+ // MapViewMutex
+ //
+ // Protects:
+ // 1. Ensures all MapViewOfFile/UnMapViewOfFile operations are serialized to reduce chance of
+ // "address in use" errors (error code 487)
+ // - These errors can still occur if the memory is used for other purposes
+ // (stack storage, heap)
+ // 2. Prevents calls to VirtualProtect while we remapping files.
+ // Lock Ordering:
+ // - If taken, must be after previewViews._m to prevent deadlocks
+ mutex mapViewMutex;
+
+ MAdvise::MAdvise(void *,unsigned, Advice) { }
+ MAdvise::~MAdvise() { }
+
+ const unsigned long long memoryMappedFileLocationFloor = 256LL * 1024LL * 1024LL * 1024LL;
+ static unsigned long long _nextMemoryMappedFileLocation = memoryMappedFileLocationFloor;
+
+ // nextMemoryMappedFileLocationMutex
+ //
+ // Protects:
+ // Windows 64-bit specific allocation of virtual memory regions for
+ // placing memory mapped files in memory
+ // Lock Ordering:
+ // No restrictions
+ static SimpleMutex _nextMemoryMappedFileLocationMutex("nextMemoryMappedFileLocationMutex");
+
+ unsigned long long AlignNumber(unsigned long long number, unsigned long long granularity)
+ {
+ return (number + granularity - 1) & ~(granularity - 1);
+ }
+
+ static void* getNextMemoryMappedFileLocation(unsigned long long mmfSize) {
+ if (4 == sizeof(void*)) {
+ return 0;
+ }
+ SimpleMutex::scoped_lock lk(_nextMemoryMappedFileLocationMutex);
+
+ static unsigned long long granularity = 0;
+
+ if (0 == granularity) {
+ SYSTEM_INFO systemInfo;
+ GetSystemInfo(&systemInfo);
+ granularity = static_cast<unsigned long long>(systemInfo.dwAllocationGranularity);
+ }
+
+ unsigned long long thisMemoryMappedFileLocation = _nextMemoryMappedFileLocation;
+
+ int current_retry = 1;
+
+ while (true) {
+ MEMORY_BASIC_INFORMATION memInfo;
+
+ if (VirtualQuery(reinterpret_cast<LPCVOID>(thisMemoryMappedFileLocation),
+ &memInfo, sizeof(memInfo)) == 0) {
+ DWORD gle = GetLastError();
+
+ // If we exceed the limits of Virtual Memory
+ // - 8TB before Windows 8.1/2012 R2, 128 TB after
+ // restart scanning from our memory mapped floor once more
+ // This is a linear scan of regions, not of every VM page
+ if (gle == ERROR_INVALID_PARAMETER && current_retry == 1) {
+ thisMemoryMappedFileLocation = memoryMappedFileLocationFloor;
+ ++current_retry;
+ continue;
+ }
+
+ log() << "VirtualQuery of " << thisMemoryMappedFileLocation
+ << " failed with error " << errnoWithDescription(gle);
+ fassertFailed(17484);
+ }
+
+ // Free memory regions that we can use for memory map files
+ // 1. Marked MEM_FREE, not MEM_RESERVE
+ // 2. Marked as PAGE_NOACCESS, not anything else
+ if (memInfo.Protect == PAGE_NOACCESS &&
+ memInfo.State == MEM_FREE &&
+ memInfo.RegionSize > mmfSize)
+ break;
+
+ thisMemoryMappedFileLocation = reinterpret_cast<unsigned long long>(memInfo.BaseAddress)
+ + memInfo.RegionSize;
+ }
+
+ _nextMemoryMappedFileLocation = thisMemoryMappedFileLocation
+ + AlignNumber(mmfSize, granularity);
+
+ return reinterpret_cast<void*>(static_cast<uintptr_t>(thisMemoryMappedFileLocation));
+ }
+
+ MemoryMappedFile::MemoryMappedFile()
+ : _uniqueId(mmfNextId.fetchAndAdd(1)),
+ fd(0),
+ maphandle(0),
+ len(0) {
+
+ created();
+ }
+
+ void MemoryMappedFile::close() {
+ LockMongoFilesShared::assertExclusivelyLocked();
+
+ // Prevent flush and close from concurrently running
+ boost::lock_guard<boost::mutex> lk(_flushMutex);
+
+ {
+ boost::lock_guard<boost::mutex> lk(mapViewMutex);
+
+ for (vector<void*>::iterator i = views.begin(); i != views.end(); i++) {
+ UnmapViewOfFile(*i);
+ }
+ }
+
+ views.clear();
+ if ( maphandle )
+ CloseHandle(maphandle);
+ maphandle = 0;
+ if ( fd )
+ CloseHandle(fd);
+ fd = 0;
+ destroyed(); // cleans up from the master list of mmaps
+ }
+
+ unsigned long long mapped = 0;
+
+ void* MemoryMappedFile::createReadOnlyMap() {
+ verify( maphandle );
+
+ boost::lock_guard<boost::mutex> lk(mapViewMutex);
+
+ void* readOnlyMapAddress = NULL;
+ int current_retry = 0;
+
+ while (true) {
+
+ LPVOID thisAddress = getNextMemoryMappedFileLocation(len);
+
+ readOnlyMapAddress = MapViewOfFileEx(
+ maphandle, // file mapping handle
+ FILE_MAP_READ, // access
+ 0, 0, // file offset, high and low
+ 0, // bytes to map, 0 == all
+ thisAddress); // address to place file
+
+ if (0 == readOnlyMapAddress) {
+ DWORD dosError = GetLastError();
+
+ ++current_retry;
+
+ // If we failed to allocate a memory mapped file, try again in case we picked
+ // an address that Windows is also trying to use for some other VM allocations
+ if (dosError == ERROR_INVALID_ADDRESS && current_retry < 5) {
+ continue;
+ }
+
+ log() << "MapViewOfFileEx for " << filename()
+ << " at address " << thisAddress
+ << " failed with error " << errnoWithDescription(dosError)
+ << " (file size is " << len << ")"
+ << " in MemoryMappedFile::createReadOnlyMap"
+ << endl;
+
+ fassertFailed(16165);
+ }
+
+ break;
+ }
+
+ views.push_back( readOnlyMapAddress );
+ return readOnlyMapAddress;
+ }
+
+ void* MemoryMappedFile::map(const char *filenameIn, unsigned long long &length, int options) {
+ verify( fd == 0 && len == 0 ); // can't open more than once
+ setFilename(filenameIn);
+ FileAllocator::get()->allocateAsap( filenameIn, length );
+ /* big hack here: Babble uses db names with colons. doesn't seem to work on windows. temporary perhaps. */
+ char filename[256];
+ strncpy(filename, filenameIn, 255);
+ filename[255] = 0;
+ {
+ size_t len = strlen( filename );
+ for ( size_t i=len-1; i>=0; i-- ) {
+ if ( filename[i] == '/' ||
+ filename[i] == '\\' )
+ break;
+
+ if ( filename[i] == ':' )
+ filename[i] = '_';
+ }
+ }
+
+ updateLength( filename, length );
+
+ {
+ DWORD createOptions = FILE_ATTRIBUTE_NORMAL;
+ if ( options & SEQUENTIAL )
+ createOptions |= FILE_FLAG_SEQUENTIAL_SCAN;
+ DWORD rw = GENERIC_READ | GENERIC_WRITE;
+ fd = CreateFileW(
+ toWideString(filename).c_str(),
+ rw, // desired access
+ FILE_SHARE_WRITE | FILE_SHARE_READ, // share mode
+ NULL, // security
+ OPEN_ALWAYS, // create disposition
+ createOptions , // flags
+ NULL); // hTempl
+ if ( fd == INVALID_HANDLE_VALUE ) {
+ DWORD dosError = GetLastError();
+ log() << "CreateFileW for " << filename
+ << " failed with " << errnoWithDescription( dosError )
+ << " (file size is " << length << ")"
+ << " in MemoryMappedFile::map"
+ << endl;
+ return 0;
+ }
+ }
+
+ mapped += length;
+
+ {
+ DWORD flProtect = PAGE_READWRITE; //(options & READONLY)?PAGE_READONLY:PAGE_READWRITE;
+ maphandle = CreateFileMappingW(fd, NULL, flProtect,
+ length >> 32 /*maxsizehigh*/,
+ (unsigned) length /*maxsizelow*/,
+ NULL/*lpName*/);
+ if ( maphandle == NULL ) {
+ DWORD dosError = GetLastError();
+ log() << "CreateFileMappingW for " << filename
+ << " failed with " << errnoWithDescription( dosError )
+ << " (file size is " << length << ")"
+ << " in MemoryMappedFile::map"
+ << endl;
+ close();
+ fassertFailed( 16225 );
+ }
+ }
+
+ void *view = 0;
+ {
+ boost::lock_guard<boost::mutex> lk(mapViewMutex);
+ DWORD access = ( options & READONLY ) ? FILE_MAP_READ : FILE_MAP_ALL_ACCESS;
+
+ int current_retry = 0;
+ while (true) {
+
+ LPVOID thisAddress = getNextMemoryMappedFileLocation(length);
+
+ view = MapViewOfFileEx(
+ maphandle, // file mapping handle
+ access, // access
+ 0, 0, // file offset, high and low
+ 0, // bytes to map, 0 == all
+ thisAddress); // address to place file
+
+ if (view == 0) {
+ DWORD dosError = GetLastError();
+
+ ++current_retry;
+
+ // If we failed to allocate a memory mapped file, try again in case we picked
+ // an address that Windows is also trying to use for some other VM allocations
+ if (dosError == ERROR_INVALID_ADDRESS && current_retry < 5) {
+ continue;
+ }
+
+#ifndef _WIN64
+ // Warn user that if they are running a 32-bit app on 64-bit Windows
+ if (dosError == ERROR_NOT_ENOUGH_MEMORY) {
+ BOOL wow64Process;
+ BOOL retWow64 = IsWow64Process(GetCurrentProcess(), &wow64Process);
+ if (retWow64 && wow64Process) {
+ log() << "This is a 32-bit MongoDB binary running on a 64-bit"
+ " operating system that has run out of virtual memory for"
+ " databases. Switch to a 64-bit build of MongoDB to open"
+ " the databases.";
+ }
+ }
+#endif
+
+ log() << "MapViewOfFileEx for " << filename
+ << " at address " << thisAddress
+ << " failed with " << errnoWithDescription(dosError)
+ << " (file size is " << length << ")"
+ << " in MemoryMappedFile::map"
+ << endl;
+
+ close();
+ fassertFailed(16166);
+ }
+
+ break;
+ }
+ }
+
+ views.push_back(view);
+ len = length;
+ return view;
+ }
+
+ extern mutex mapViewMutex;
+
+ void* MemoryMappedFile::createPrivateMap() {
+ verify( maphandle );
+
+ boost::lock_guard<boost::mutex> lk(mapViewMutex);
+
+ LPVOID thisAddress = getNextMemoryMappedFileLocation( len );
+
+ void* privateMapAddress = NULL;
+ int current_retry = 0;
+
+ while (true) {
+
+ privateMapAddress = MapViewOfFileEx(
+ maphandle, // file mapping handle
+ FILE_MAP_READ, // access
+ 0, 0, // file offset, high and low
+ 0, // bytes to map, 0 == all
+ thisAddress); // address to place file
+
+ if (privateMapAddress == 0) {
+ DWORD dosError = GetLastError();
+
+ ++current_retry;
+
+ // If we failed to allocate a memory mapped file, try again in case we picked
+ // an address that Windows is also trying to use for some other VM allocations
+ if (dosError == ERROR_INVALID_ADDRESS && current_retry < 5) {
+ continue;
+ }
+
+ log() << "MapViewOfFileEx for " << filename()
+ << " failed with error " << errnoWithDescription(dosError)
+ << " (file size is " << len << ")"
+ << " in MemoryMappedFile::createPrivateMap"
+ << endl;
+
+ fassertFailed(16167);
+ }
+
+ break;
+ }
+
+ views.push_back( privateMapAddress );
+ return privateMapAddress;
+ }
+
+ void* MemoryMappedFile::remapPrivateView(void *oldPrivateAddr) {
+ LockMongoFilesExclusive lockMongoFiles;
+
+ privateViews.clearWritableBits(oldPrivateAddr, len);
+
+ boost::lock_guard<boost::mutex> lk(mapViewMutex);
+
+ if( !UnmapViewOfFile(oldPrivateAddr) ) {
+ DWORD dosError = GetLastError();
+ log() << "UnMapViewOfFile for " << filename()
+ << " failed with error " << errnoWithDescription( dosError )
+ << " in MemoryMappedFile::remapPrivateView"
+ << endl;
+ fassertFailed( 16168 );
+ }
+
+ void* newPrivateView = MapViewOfFileEx(
+ maphandle, // file mapping handle
+ FILE_MAP_READ, // access
+ 0, 0, // file offset, high and low
+ 0, // bytes to map, 0 == all
+ oldPrivateAddr ); // we want the same address we had before
+ if ( 0 == newPrivateView ) {
+ DWORD dosError = GetLastError();
+ log() << "MapViewOfFileEx for " << filename()
+ << " failed with error " << errnoWithDescription( dosError )
+ << " (file size is " << len << ")"
+ << " in MemoryMappedFile::remapPrivateView"
+ << endl;
+ }
+ fassert( 16148, newPrivateView == oldPrivateAddr );
+ return newPrivateView;
+ }
+
+ class WindowsFlushable : public MemoryMappedFile::Flushable {
+ public:
+ WindowsFlushable( MemoryMappedFile* theFile,
+ void * view,
+ HANDLE fd,
+ const uint64_t id,
+ const std::string& filename,
+ boost::mutex& flushMutex )
+ : _theFile(theFile), _view(view), _fd(fd), _id(id), _filename(filename),
+ _flushMutex(flushMutex)
+ {}
+
+ void flush() {
+ if (!_view || !_fd)
+ return;
+
+ {
+ LockMongoFilesShared mmfilesLock;
+
+ std::set<MongoFile*> mmfs = MongoFile::getAllFiles();
+ std::set<MongoFile*>::const_iterator it = mmfs.find(_theFile);
+ if ( it == mmfs.end() || (*it)->getUniqueId() != _id ) {
+ // this was deleted while we were unlocked
+ return;
+ }
+
+ // Hold the flush mutex to ensure the file is not closed during flush
+ _flushMutex.lock();
+ }
+
+ boost::lock_guard<boost::mutex> lk(_flushMutex, boost::adopt_lock_t());
+
+ int loopCount = 0;
+ bool success = false;
+ bool timeout = false;
+ int dosError = ERROR_SUCCESS;
+ const int maximumTimeInSeconds = 60 * 15;
+ Timer t;
+ while ( !success && !timeout ) {
+ ++loopCount;
+ success = FALSE != FlushViewOfFile( _view, 0 );
+ if ( !success ) {
+ dosError = GetLastError();
+ if ( dosError != ERROR_LOCK_VIOLATION ) {
+ break;
+ }
+ timeout = t.seconds() > maximumTimeInSeconds;
+ }
+ }
+ if ( success && loopCount > 1 ) {
+ log() << "FlushViewOfFile for " << _filename
+ << " succeeded after " << loopCount
+ << " attempts taking " << t.millis()
+ << "ms" << endl;
+ }
+ else if ( !success ) {
+ log() << "FlushViewOfFile for " << _filename
+ << " failed with error " << dosError
+ << " after " << loopCount
+ << " attempts taking " << t.millis()
+ << "ms" << endl;
+ // Abort here to avoid data corruption
+ fassert(16387, false);
+ }
+
+ success = FALSE != FlushFileBuffers(_fd);
+ if (!success) {
+ int err = GetLastError();
+ log() << "FlushFileBuffers failed: " << errnoWithDescription( err )
+ << " file: " << _filename << endl;
+ dataSyncFailedHandler();
+ }
+ }
+
+ MemoryMappedFile* _theFile; // this may be deleted while we are running
+ void * _view;
+ HANDLE _fd;
+ const uint64_t _id;
+ string _filename;
+ boost::mutex& _flushMutex;
+ };
+
+ void MemoryMappedFile::flush(bool sync) {
+ uassert(13056, "Async flushing not supported on windows", sync);
+ if( !views.empty() ) {
+ WindowsFlushable f(this, viewForFlushing(), fd, _uniqueId, filename(), _flushMutex);
+ f.flush();
+ }
+ }
+
+ MemoryMappedFile::Flushable * MemoryMappedFile::prepareFlush() {
+ return new WindowsFlushable(this, viewForFlushing(), fd, _uniqueId,
+ filename(), _flushMutex);
+ }
+
+}
diff --git a/src/mongo/db/storage/mmap_v1/record_store_v1_capped.cpp b/src/mongo/db/storage/mmap_v1/record_store_v1_capped.cpp
index 76eef273815..45143deb2ec 100644
--- a/src/mongo/db/storage/mmap_v1/record_store_v1_capped.cpp
+++ b/src/mongo/db/storage/mmap_v1/record_store_v1_capped.cpp
@@ -35,10 +35,10 @@
#include "mongo/db/operation_context_impl.h"
#include "mongo/db/storage/mmap_v1/extent.h"
#include "mongo/db/storage/mmap_v1/extent_manager.h"
+#include "mongo/db/storage/mmap_v1/mmap.h"
#include "mongo/db/storage/mmap_v1/record.h"
#include "mongo/db/storage/mmap_v1/record_store_v1_capped_iterator.h"
#include "mongo/util/log.h"
-#include "mongo/util/mmap.h"
#include "mongo/util/mongoutils/str.h"
/*
diff --git a/src/mongo/db/storage/mmap_v1/repair_database.cpp b/src/mongo/db/storage/mmap_v1/repair_database.cpp
index e8d05a3d352..777f8f25b05 100644
--- a/src/mongo/db/storage/mmap_v1/repair_database.cpp
+++ b/src/mongo/db/storage/mmap_v1/repair_database.cpp
@@ -46,12 +46,12 @@
#include "mongo/db/db_raii.h"
#include "mongo/db/index/index_descriptor.h"
#include "mongo/db/storage/mmap_v1/dur.h"
+#include "mongo/db/storage/mmap_v1/mmap.h"
#include "mongo/db/storage/mmap_v1/mmap_v1_database_catalog_entry.h"
#include "mongo/db/storage/mmap_v1/mmap_v1_options.h"
#include "mongo/util/file.h"
-#include "mongo/util/file_allocator.h"
+#include "mongo/db/storage/mmap_v1/file_allocator.h"
#include "mongo/util/log.h"
-#include "mongo/util/mmap.h"
#include "mongo/util/scopeguard.h"
namespace mongo {
diff --git a/src/mongo/db/storage/paths.cpp b/src/mongo/db/storage/paths.cpp
new file mode 100644
index 00000000000..cb2913c6b06
--- /dev/null
+++ b/src/mongo/db/storage/paths.cpp
@@ -0,0 +1,113 @@
+/* Copyright 2010 10gen Inc.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Affero General Public License, version 3,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Affero General Public License for more details.
+ *
+ * You should have received a copy of the GNU Affero General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ *
+ * As a special exception, the copyright holders give permission to link the
+ * code of portions of this program with the OpenSSL library under certain
+ * conditions as described in each individual source file and distribute
+ * linked combinations including the program with the OpenSSL library. You
+ * must comply with the GNU Affero General Public License in all respects
+ * for all of the code used other than as permitted herein. If you modify
+ * file(s) with this exception, you may extend this exception to your
+ * version of the file(s), but you are not obligated to do so. If you do not
+ * wish to do so, delete this exception statement from your version. If you
+ * delete this exception statement from all source files in the program,
+ * then also delete it in the license file.
+ */
+
+#define MONGO_LOG_DEFAULT_COMPONENT ::mongo::logger::LogComponent::kStorage
+
+#include "mongo/platform/basic.h"
+
+#include "mongo/db/storage/paths.h"
+
+#include "mongo/util/log.h"
+
+namespace mongo {
+
+ /** from a full path */
+ RelativePath RelativePath::fromFullPath(boost::filesystem::path dbp,
+ boost::filesystem::path f) {
+ // filesystem::path normalizes / and backslash
+ std::string fullpath = f.string();
+ std::string relative = str::after(fullpath, dbp.string());
+ if( relative.empty() ) {
+ log() << "warning file is not under db path? " << fullpath << ' ' << dbp.string();
+ RelativePath rp;
+ rp._p = fullpath;
+ return rp;
+ }
+ if( str::startsWith(relative, "/") || str::startsWith(relative, "\\") ) {
+ relative.erase(0, 1);
+ }
+ RelativePath rp;
+ rp._p = relative;
+ return rp;
+ }
+
+ dev_t getPartition(const std::string& path){
+ struct stat stats;
+
+ if (stat(path.c_str(), &stats) != 0){
+ uasserted(13646, str::stream() << "stat() failed for file: " << path << " " << errnoWithDescription());
+ }
+
+ return stats.st_dev;
+ }
+
+ void flushMyDirectory(const boost::filesystem::path& file) {
+#ifdef __linux__ // this isn't needed elsewhere
+ static bool _warnedAboutFilesystem = false;
+ // if called without a fully qualified path it asserts; that makes mongoperf fail.
+ // so make a warning. need a better solution longer term.
+ // massert(13652, str::stream() << "Couldn't find parent dir for file: " << file.string(),);
+ if (!file.has_branch_path()) {
+ log() << "warning flushMyDirectory couldn't find parent dir for file: "
+ << file.string();
+ return;
+ }
+
+
+ boost::filesystem::path dir = file.branch_path(); // parent_path in new boosts
+
+ LOG(1) << "flushing directory " << dir.string();
+
+ int fd = ::open(dir.string().c_str(), O_RDONLY); // DO NOT THROW OR ASSERT BEFORE CLOSING
+ massert(13650, str::stream() << "Couldn't open directory '" << dir.string()
+ << "' for flushing: " << errnoWithDescription(),
+ fd >= 0);
+ if (fsync(fd) != 0) {
+ int e = errno;
+ if (e == EINVAL) { // indicates filesystem does not support synchronization
+ if (!_warnedAboutFilesystem) {
+ log() << "\tWARNING: This file system is not supported. For further information"
+ << " see:"
+ << startupWarningsLog;
+ log() << "\t\t\thttp://dochub.mongodb.org/core/unsupported-filesystems"
+ << startupWarningsLog;
+ log() << "\t\tPlease notify MongoDB, Inc. if an unlisted filesystem generated "
+ << "this warning." << startupWarningsLog;
+ _warnedAboutFilesystem = true;
+ }
+ }
+ else {
+ close(fd);
+ massert(13651, str::stream() << "Couldn't fsync directory '" << dir.string()
+ << "': " << errnoWithDescription(e),
+ false);
+ }
+ }
+ close(fd);
+#endif
+ }
+}
diff --git a/src/mongo/db/storage/paths.h b/src/mongo/db/storage/paths.h
new file mode 100644
index 00000000000..8286c920566
--- /dev/null
+++ b/src/mongo/db/storage/paths.h
@@ -0,0 +1,93 @@
+// @file paths.h
+// file paths and directory handling
+
+/* Copyright 2010 10gen Inc.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Affero General Public License, version 3,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Affero General Public License for more details.
+ *
+ * You should have received a copy of the GNU Affero General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ *
+ * As a special exception, the copyright holders give permission to link the
+ * code of portions of this program with the OpenSSL library under certain
+ * conditions as described in each individual source file and distribute
+ * linked combinations including the program with the OpenSSL library. You
+ * must comply with the GNU Affero General Public License in all respects
+ * for all of the code used other than as permitted herein. If you modify
+ * file(s) with this exception, you may extend this exception to your
+ * version of the file(s), but you are not obligated to do so. If you do not
+ * wish to do so, delete this exception statement from your version. If you
+ * delete this exception statement from all source files in the program,
+ * then also delete it in the license file.
+ */
+
+#pragma once
+
+#include <boost/filesystem/path.hpp>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+
+#include "mongo/util/mongoutils/str.h"
+
+#include "mongo/db/storage_options.h"
+
+namespace mongo {
+
+ using namespace mongoutils;
+
+ /** this is very much like a boost::path. however, we define a new type to get some type
+ checking. if you want to say 'my param MUST be a relative path", use this.
+ */
+ struct RelativePath {
+ std::string _p;
+
+ bool empty() const { return _p.empty(); }
+
+ static RelativePath fromRelativePath(const std::string& f) {
+ RelativePath rp;
+ rp._p = f;
+ return rp;
+ }
+
+ /**
+ * Returns path relative to 'dbpath' from a full path 'f'.
+ */
+ static RelativePath fromFullPath(boost::filesystem::path dbpath,
+ boost::filesystem::path f);
+
+ std::string toString() const { return _p; }
+
+ bool operator!=(const RelativePath& r) const { return _p != r._p; }
+ bool operator==(const RelativePath& r) const { return _p == r._p; }
+ bool operator<(const RelativePath& r) const { return _p < r._p; }
+
+ std::string asFullPath() const {
+ boost::filesystem::path x(storageGlobalParams.dbpath);
+ x /= _p;
+ return x.string();
+ }
+
+ };
+
+ dev_t getPartition(const std::string& path);
+
+ inline bool onSamePartition(const std::string& path1, const std::string& path2){
+ dev_t dev1 = getPartition(path1);
+ dev_t dev2 = getPartition(path2);
+
+ return dev1 == dev2;
+ }
+
+ void flushMyDirectory(const boost::filesystem::path& file);
+
+ boost::filesystem::path ensureParentDirCreated(const boost::filesystem::path& p);
+
+}
diff --git a/src/mongo/db/storage/storage_engine_lock_file_posix.cpp b/src/mongo/db/storage/storage_engine_lock_file_posix.cpp
index 93227bbaf9a..6897f413fbe 100644
--- a/src/mongo/db/storage/storage_engine_lock_file_posix.cpp
+++ b/src/mongo/db/storage/storage_engine_lock_file_posix.cpp
@@ -41,9 +41,9 @@
#include <unistd.h>
#include <sstream>
+#include "mongo/db/storage/paths.h"
#include "mongo/platform/process_id.h"
#include "mongo/util/log.h"
-#include "mongo/util/paths.h"
#include "mongo/util/mongoutils/str.h"
namespace mongo {