src/mongo/db/prefetch.cpp


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186

/**
*    Copyright (C) 2008 10gen Inc.
*
*    This program is free software: you can redistribute it and/or  modify
*    it under the terms of the GNU Affero General Public License, version 3,
*    as published by the Free Software Foundation.
*
*    This program is distributed in the hope that it will be useful,
*    but WITHOUT ANY WARRANTY; without even the implied warranty of
*    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
*    GNU Affero General Public License for more details.
*
*    You should have received a copy of the GNU Affero General Public License
*    along with this program.  If not, see <http://www.gnu.org/licenses/>.
*/

#include "mongo/pch.h"

#include "mongo/db/prefetch.h"

#include "mongo/db/dbhelpers.h"
#include "mongo/db/diskloc.h"
#include "mongo/db/index/catalog_hack.h"
#include "mongo/db/index.h"
#include "mongo/db/index_update.h"
#include "mongo/db/jsobj.h"
#include "mongo/db/namespace_details.h"
#include "mongo/db/repl/rs.h"
#include "mongo/db/stats/timer_stats.h"
#include "mongo/db/commands/server_status.h"

namespace mongo {

    // todo / idea: the prefetcher, when it fetches _id, on an upsert, will see if the record exists. if it does not, 
    //              at write time, we can just do an insert, which will be faster.

    //The count (of batches) and time spent fetching pages before application
    //    -- meaning depends on the prefetch behavior: all, _id index, none, etc.)
    static TimerStats prefetchIndexStats;
    static ServerStatusMetricField<TimerStats> displayPrefetchIndexPages(
                                                    "repl.preload.indexes",
                                                    &prefetchIndexStats );
    static TimerStats prefetchDocStats;
    static ServerStatusMetricField<TimerStats> displayPrefetchDocPages(
                                                    "repl.preload.docs",
                                                    &prefetchDocStats );

    // prefetch for an oplog operation
    void prefetchPagesForReplicatedOp(const BSONObj& op) {
        const char *opField;
        const char *opType = op.getStringField("op");
        switch (*opType) {
        case 'i': // insert
        case 'd': // delete
            opField = "o";
            break;
        case 'u': // update
            opField = "o2";
            break;
        default:
            // prefetch ignores other ops
            return;
        }
         
        BSONObj obj = op.getObjectField(opField);
        const char *ns = op.getStringField("ns");
        NamespaceDetails *nsd = nsdetails(ns);
        if (!nsd) return; // maybe not opened yet
        
        LOG(4) << "index prefetch for op " << *opType << endl;

        DEV Lock::assertAtLeastReadLocked(ns);

        // should we prefetch index pages on updates? if the update is in-place and doesn't change 
        // indexed values, it is actually slower - a lot slower if there are a dozen indexes or 
        // lots of multikeys.  possible variations (not all mutually exclusive):
        //  1) current behavior: full prefetch
        //  2) don't do it for updates
        //  3) don't do multikey indexes for updates
        //  4) don't prefetchIndexPages on some heuristic; e.g., if it's an $inc.
        //  5) if not prefetching index pages (#2), we should do it if we are upsertings and it 
        //     will be an insert. to do that we could do the prefetchRecordPage first and if DNE
        //     then we do #1.
        // 
        // note that on deletes 'obj' does not have all the keys we would want to prefetch on.
        // a way to achieve that would be to prefetch the record first, and then afterwards do 
        // this part.
        //
        prefetchIndexPages(nsd, obj);

        // do not prefetch the data for inserts; it doesn't exist yet
        // 
        // we should consider doing the record prefetch for the delete op case as we hit the record
        // when we delete.  note if done we only want to touch the first page.
        // 
        // update: do record prefetch. 
        if ((*opType == 'u') &&
            // do not prefetch the data for capped collections because
            // they typically do not have an _id index for findById() to use.
            !nsd->isCapped()) {
            prefetchRecordPages(ns, obj);
        }
    }

    void prefetchIndexPages(NamespaceDetails *nsd, const BSONObj& obj) {
        DiskLoc unusedDl; // unused
        BSONObjSet unusedKeys;
        ReplSetImpl::IndexPrefetchConfig prefetchConfig = theReplSet->getIndexPrefetchConfig();

        // do we want prefetchConfig to be (1) as-is, (2) for update ops only, or (3) configured per op type?  
        // One might want PREFETCH_NONE for updates, but it's more rare that it is a bad idea for inserts.  
        // #3 (per op), a big issue would be "too many knobs".
        switch (prefetchConfig) {
        case ReplSetImpl::PREFETCH_NONE:
            return;
        case ReplSetImpl::PREFETCH_ID_ONLY:
        {
            TimerHolder timer( &prefetchIndexStats);
            // on the update op case, the call to prefetchRecordPages will touch the _id index.
            // thus perhaps this option isn't very useful?
            int indexNo = nsd->findIdIndex();
            if (indexNo == -1) return;
            try {
                auto_ptr<IndexDescriptor> desc(CatalogHack::getDescriptor(nsd, indexNo));
                auto_ptr<IndexAccessMethod> iam(CatalogHack::getIndex(desc.get()));
                iam->touch(obj);
            }
            catch (const DBException& e) {
                LOG(2) << "ignoring exception in prefetchIndexPages(): " << e.what() << endl;
            }
            break;
        }
        case ReplSetImpl::PREFETCH_ALL:
        {
            // indexCount includes all indexes, including ones
            // in the process of being built
            int indexCount = nsd->getTotalIndexCount();
            for ( int indexNo = 0; indexNo < indexCount; indexNo++ ) {
                TimerHolder timer( &prefetchIndexStats);
                // This will page in all index pages for the given object.
                try {
                    auto_ptr<IndexDescriptor> desc(CatalogHack::getDescriptor(nsd, indexNo));
                    auto_ptr<IndexAccessMethod> iam(CatalogHack::getIndex(desc.get()));
                    iam->touch(obj);
                }
                catch (const DBException& e) {
                    LOG(2) << "ignoring exception in prefetchIndexPages(): " << e.what() << endl;
                }
                unusedKeys.clear();
            }
            break;
        }
        default:
            fassertFailed(16427);
        }
    }


    void prefetchRecordPages(const char* ns, const BSONObj& obj) {
        BSONElement _id;
        if( obj.getObjectID(_id) ) {
            TimerHolder timer(&prefetchDocStats);
            BSONObjBuilder builder;
            builder.append(_id);
            BSONObj result;
            try {
                // we can probably use Client::Context here instead of ReadContext as we
                // have locked higher up the call stack already
                Client::ReadContext ctx( ns );
                if( Helpers::findById(cc(), ns, builder.done(), result) ) {
                    // do we want to use Record::touch() here?  it's pretty similar.
                    volatile char _dummy_char = '\0';
                    // Touch the first word on every page in order to fault it into memory
                    for (int i = 0; i < result.objsize(); i += g_minOSPageSizeBytes) {                        
                        _dummy_char += *(result.objdata() + i); 
                    }
                    // hit the last page, in case we missed it above
                    _dummy_char += *(result.objdata() + result.objsize() - 1);
                }
            }
            catch(const DBException& e) {
                LOG(2) << "ignoring exception in prefetchRecordPages(): " << e.what() << endl;
            }
        }
    }
}