summaryrefslogtreecommitdiff
path: root/src/mongo/db/dur.h
diff options
context:
space:
mode:
authorEliot Horowitz <eliot@10gen.com>2011-12-24 15:33:26 -0500
committerEliot Horowitz <eliot@10gen.com>2011-12-24 15:33:45 -0500
commitae1ecd9c786911f9f1f0242f0f7d702b3e5dfeba (patch)
tree92f8e1649e6f080b251ff5f1763679a72eb59b34 /src/mongo/db/dur.h
parentdfa4cd7e2cf109b072440155fabc08a93c8045a0 (diff)
downloadmongo-ae1ecd9c786911f9f1f0242f0f7d702b3e5dfeba.tar.gz
bulk move of code to src/ SERVER-4551
Diffstat (limited to 'src/mongo/db/dur.h')
-rw-r--r--src/mongo/db/dur.h209
1 files changed, 209 insertions, 0 deletions
diff --git a/src/mongo/db/dur.h b/src/mongo/db/dur.h
new file mode 100644
index 00000000000..f06ff500195
--- /dev/null
+++ b/src/mongo/db/dur.h
@@ -0,0 +1,209 @@
+// @file dur.h durability support
+
+#pragma once
+
+#include "diskloc.h"
+#include "mongommf.h"
+
+namespace mongo {
+
+ class NamespaceDetails;
+
+ void mongoAbort(const char *msg);
+ void abort(); // not defined -- use mongoAbort() instead
+
+ namespace dur {
+
+ // a smaller limit is likely better on 32 bit
+#if defined(__i386__) || defined(_M_IX86)
+ const unsigned UncommittedBytesLimit = 50 * 1024 * 1024;
+#else
+ const unsigned UncommittedBytesLimit = 100 * 1024 * 1024;
+#endif
+
+ /** Call during startup so durability module can initialize
+ Throws if fatal error
+ Does nothing if cmdLine.dur is false
+ */
+ void startup();
+
+ class DurableInterface : boost::noncopyable {
+ public:
+ virtual ~DurableInterface() { log() << "ERROR warning ~DurableInterface not intended to be called" << endl; }
+
+ /** Declare that a file has been created
+ Normally writes are applied only after journaling, for safety. But here the file
+ is created first, and the journal will just replay the creation if the create didn't
+ happen because of crashing.
+ */
+ virtual void createdFile(string filename, unsigned long long len) = 0;
+
+ /** Declarations of write intent.
+
+ Use these methods to declare "i'm about to write to x and it should be logged for redo."
+
+ Failure to call writing...() is checked in _DEBUG mode by using a read only mapped view
+ (i.e., you'll segfault if the code is covered in that situation). The _DEBUG check doesn't
+ verify that your length is correct though.
+ */
+
+ /** declare intent to write to x for up to len
+ @return pointer where to write. this is modified when testIntent is true.
+ */
+ virtual void* writingPtr(void *x, unsigned len) = 0;
+
+ /** declare write intent; should already be in the write view to work correctly when testIntent is true.
+ if you aren't, use writingPtr() instead.
+ */
+ virtual void declareWriteIntent(void *x, unsigned len) = 0;
+
+ /** declare intent to write
+ @param ofs offset within buf at which we will write
+ @param len the length at ofs we will write
+ @return new buffer pointer. this is modified when testIntent is true.
+ */
+ virtual void* writingAtOffset(void *buf, unsigned ofs, unsigned len) = 0;
+
+ /** declare intent to write
+ @param ranges vector of pairs representing ranges. Each pair
+ comprises an offset from buf where a range begins, then the
+ range length.
+ @return new buffer pointer. this is modified when testIntent is true.
+ */
+ virtual void* writingRangesAtOffsets(void *buf, const vector< pair< long long, unsigned > > &ranges ) = 0;
+
+ /** Wait for acknowledgement of the next group commit.
+ @return true if --dur is on. There will be delay.
+ @return false if --dur is off.
+ */
+ virtual bool awaitCommit() = 0;
+
+ /** Commit immediately.
+
+ Generally, you do not want to do this often, as highly granular committing may affect
+ performance.
+
+ Does not return until the commit is complete.
+
+ You must be at least read locked when you call this. Ideally, you are not write locked
+ and then read operations can occur concurrently.
+
+ @return true if --dur is on.
+ @return false if --dur is off. (in which case there is action)
+ */
+ virtual bool commitNow() = 0;
+
+ /** Commit if enough bytes have been modified. Current threshold is 50MB
+
+ The idea is that long running write operations that dont yield
+ (like creating an index or update with $atomic) can call this
+ whenever the db is in a sane state and it will prevent commits
+ from growing too large.
+ @return true if commited
+ */
+ virtual bool commitIfNeeded() = 0;
+
+ /** @return true if time to commit but does NOT do a commit */
+ virtual bool aCommitIsNeeded() const = 0;
+
+ /** Declare write intent for a DiskLoc. @see DiskLoc::writing() */
+ inline DiskLoc& writingDiskLoc(DiskLoc& d) { return *((DiskLoc*) writingPtr(&d, sizeof(d))); }
+
+ /** Declare write intent for an int */
+ inline int& writingInt(const int& d) { return *((int*) writingPtr((int*) &d, sizeof(d))); }
+
+ /** "assume i've already indicated write intent, let me write"
+ redeclaration is fine too, but this is faster.
+ */
+ template <typename T>
+ inline
+ T* alreadyDeclared(T *x) {
+#if defined(_TESTINTENT)
+ return (T*) MongoMMF::switchToPrivateView(x);
+#else
+ return x;
+#endif
+ }
+
+ /** declare intent to write to x for sizeof(*x) */
+ template <typename T>
+ inline
+ T* writing(T *x) {
+ return (T*) writingPtr(x, sizeof(T));
+ }
+
+ /** write something that doesn't have to be journaled, as this write is "unimportant".
+ a good example is paddingFactor.
+ can be thought of as memcpy(dst,src,len)
+ the dur implementation acquires a mutex in this method, so do not assume it is faster
+ without measuring!
+ */
+ virtual void setNoJournal(void *dst, void *src, unsigned len) = 0;
+
+ /** Commits pending changes, flushes all changes to main data
+ files, then removes the journal.
+
+ This is useful as a "barrier" to ensure that writes before this
+ call will never go through recovery and be applied to files
+ that have had changes made after this call applied.
+ */
+ virtual void syncDataAndTruncateJournal() = 0;
+
+ static DurableInterface& getDur() { return *_impl; }
+
+ private:
+ /** Intentionally unimplemented method.
+ It's very easy to manipulate Record::data open ended. Thus a call to writing(Record*) is suspect.
+ This will override the templated version and yield an unresolved external.
+ */
+ Record* writing(Record* r);
+ /** Intentionally unimplemented method. BtreeBuckets are allocated in buffers larger than sizeof( BtreeBucket ). */
+// BtreeBucket* writing( BtreeBucket* );
+ /** Intentionally unimplemented method. NamespaceDetails may be based on references to 'Extra' objects. */
+ NamespaceDetails* writing( NamespaceDetails* );
+
+ static DurableInterface* _impl; // NonDurableImpl at startup()
+ static void enableDurability(); // makes _impl a DurableImpl
+ static void disableDurability(); // makes _impl a NonDurableImpl
+
+ // these need to be able to enable/disable Durability
+ friend void startup();
+ friend class TempDisableDurability;
+ }; // class DurableInterface
+
+ class NonDurableImpl : public DurableInterface {
+ void* writingPtr(void *x, unsigned len) { return x; }
+ void* writingAtOffset(void *buf, unsigned ofs, unsigned len) { return buf; }
+ void* writingRangesAtOffsets(void *buf, const vector< pair< long long, unsigned > > &ranges) { return buf; }
+ void declareWriteIntent(void *, unsigned) { }
+ void createdFile(string filename, unsigned long long len) { }
+ bool awaitCommit() { return false; }
+ bool commitNow() { return false; }
+ bool commitIfNeeded() { return false; }
+ bool aCommitIsNeeded() const { return false; }
+ void setNoJournal(void *dst, void *src, unsigned len);
+ void syncDataAndTruncateJournal() {}
+ };
+
+ class DurableImpl : public DurableInterface {
+ void* writingPtr(void *x, unsigned len);
+ void* writingAtOffset(void *buf, unsigned ofs, unsigned len);
+ void* writingRangesAtOffsets(void *buf, const vector< pair< long long, unsigned > > &ranges);
+ void declareWriteIntent(void *, unsigned);
+ void createdFile(string filename, unsigned long long len);
+ bool awaitCommit();
+ bool commitNow();
+ bool aCommitIsNeeded() const;
+ bool commitIfNeeded();
+ void setNoJournal(void *dst, void *src, unsigned len);
+ void syncDataAndTruncateJournal();
+ };
+
+ } // namespace dur
+
+ inline dur::DurableInterface& getDur() { return dur::DurableInterface::getDur(); }
+
+ /** declare that we are modifying a diskloc and this is a datafile write. */
+ inline DiskLoc& DiskLoc::writing() const { return getDur().writingDiskLoc(*const_cast< DiskLoc * >( this )); }
+
+}