// @file dur_preplogbuffer.cpp /** * Copyright (C) 2009 10gen Inc. * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU Affero General Public License, version 3, * as published by the Free Software Foundation. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Affero General Public License for more details. * * You should have received a copy of the GNU Affero General Public License * along with this program. If not, see . * * As a special exception, the copyright holders give permission to link the * code of portions of this program with the OpenSSL library under certain * conditions as described in each individual source file and distribute * linked combinations including the program with the OpenSSL library. You * must comply with the GNU Affero General Public License in all respects for * all of the code used other than as permitted herein. If you modify file(s) * with this exception, you may extend this exception to your version of the * file(s), but you are not obligated to do so. If you do not wish to do so, * delete this exception statement from your version. If you delete this * exception statement from all source files in the program, then also delete * it in the license file. */ /* PREPLOGBUFFER we will build an output buffer ourself and then use O_DIRECT we could be in read lock for this for very large objects write directly to redo log in situ? @see https://docs.google.com/drawings/edit?id=1TklsmZzm7ohIZkwgeK6rMvsdaR13KjtJYMsfLr175Zc */ #include "mongo/pch.h" #include "mongo/db/dur.h" #include "mongo/db/dur_commitjob.h" #include "mongo/db/dur_journal.h" #include "mongo/db/dur_journalimpl.h" #include "mongo/db/dur_stats.h" #include "mongo/server.h" #include "mongo/util/alignedbuilder.h" #include "mongo/util/mongoutils/hash.h" #include "mongo/util/mongoutils/str.h" #include "mongo/util/stacktrace.h" #include "mongo/util/timer.h" using namespace mongoutils; namespace mongo { namespace dur { extern Journal j; RelativePath local = RelativePath::fromRelativePath("local"); static DurableMappedFile* findMMF_inlock(void *ptr, size_t &ofs) { DurableMappedFile *f = privateViews.find_inlock(ptr, ofs); if( f == 0 ) { error() << "findMMF_inlock failed " << privateViews.numberOfViews_inlock() << endl; printStackTrace(); // we want a stack trace and the assert below didn't print a trace once in the real world - not sure why stringstream ss; ss << "view pointer cannot be resolved " << hex << (size_t) ptr; journalingFailure(ss.str().c_str()); // asserts, which then abends } return f; } /** put the basic write operation into the buffer (bb) to be journaled */ static void prepBasicWrite_inlock(AlignedBuilder&bb, const WriteIntent *i, RelativePath& lastDbPath) { size_t ofs = 1; DurableMappedFile *mmf = findMMF_inlock(i->start(), /*out*/ofs); if( unlikely(!mmf->willNeedRemap()) ) { // tag this mmf as needed a remap of its private view later. // usually it will already be dirty/already set, so we do the if above first // to avoid possibility of cpu cache line contention mmf->willNeedRemap() = true; } // since we have already looked up the mmf, we go ahead and remember the write view location // so we don't have to find the DurableMappedFile again later in WRITETODATAFILES() // // this was for WRITETODATAFILES_Impl2 so commented out now // /* dassert( i->w_ptr == 0 ); i->w_ptr = ((char*)mmf->view_write()) + ofs; */ JEntry e; e.len = min(i->length(), (unsigned)(mmf->length() - ofs)); //don't write past end of file verify( ofs <= 0x80000000 ); e.ofs = (unsigned) ofs; e.setFileNo( mmf->fileSuffixNo() ); if( mmf->relativePath() == local ) { e.setLocalDbContextBit(); } else if( mmf->relativePath() != lastDbPath ) { lastDbPath = mmf->relativePath(); JDbContext c; bb.appendStruct(c); bb.appendStr(lastDbPath.toString()); } bb.appendStruct(e); #if defined(_EXPERIMENTAL) i->ofsInJournalBuffer = bb.len(); #endif bb.appendBuf(i->start(), e.len); if (unlikely(e.len != (unsigned)i->length())) { log() << "journal info splitting prepBasicWrite at boundary" << endl; // This only happens if we write to the last byte in a file and // the fist byte in another file that is mapped adjacently. I // think most OSs leave at least a one page gap between // mappings, but better to be safe. WriteIntent next ((char*)i->start() + e.len, i->length() - e.len); prepBasicWrite_inlock(bb, &next, lastDbPath); } } void assertNothingSpooled(); /** basic write ops / write intents. note there is no particular order to these : if we have two writes to the same location during the group commit interval, it is likely (although not assured) that it is journaled here once. */ static void prepBasicWrites(AlignedBuilder& bb) { scoped_lock lk(privateViews._mutex()); // each time events switch to a different database we journal a JDbContext // switches will be rare as we sort by memory location first and we batch commit. RelativePath lastDbPath; assertNothingSpooled(); const vector& _intents = commitJob.getIntentsSorted(); verify( !_intents.empty() ); WriteIntent last; for( vector::const_iterator i = _intents.begin(); i != _intents.end(); i++ ) { if( i->start() < last.end() ) { // overlaps last.absorb(*i); } else { // discontinuous if( i != _intents.begin() ) prepBasicWrite_inlock(bb, &last, lastDbPath); last = *i; } } prepBasicWrite_inlock(bb, &last, lastDbPath); } static void resetLogBuffer(/*out*/JSectHeader& h, AlignedBuilder& bb) { bb.reset(); h.setSectionLen(0xffffffff); // total length, will fill in later h.seqNumber = getLastDataFileFlushTime(); h.fileId = j.curFileId(); } /** we will build an output buffer ourself and then use O_DIRECT we could be in read lock for this caller handles locking @return partially populated sectheader and _ab set */ static void _PREPLOGBUFFER(JSectHeader& h, AlignedBuilder& bb) { verify(storageGlobalParams.dur); assertLockedForCommitting(); resetLogBuffer(h, bb); // adds JSectHeader // ops other than basic writes (DurOp's) { for( vector< shared_ptr >::iterator i = commitJob.ops().begin(); i != commitJob.ops().end(); ++i ) { (*i)->serialize(bb); } } prepBasicWrites(bb); return; } void PREPLOGBUFFER(/*out*/ JSectHeader& h, AlignedBuilder& ab) { assertLockedForCommitting(); Timer t; j.assureLogFileOpen(); // so fileId is set _PREPLOGBUFFER(h, ab); stats.curr->_prepLogBufferMicros += t.micros(); } } }