1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
|
/**
* Copyright (C) 2008-2014 MongoDB Inc.
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License, version 3,
* as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*
* As a special exception, the copyright holders give permission to link the
* code of portions of this program with the OpenSSL library under certain
* conditions as described in each individual source file and distribute
* linked combinations including the program with the OpenSSL library. You
* must comply with the GNU Affero General Public License in all respects for
* all of the code used other than as permitted herein. If you modify file(s)
* with this exception, you may extend this exception to your version of the
* file(s), but you are not obligated to do so. If you do not wish to do so,
* delete this exception statement from your version. If you delete this
* exception statement from all source files in the program, then also delete
* it in the license file.
*/
#define MONGO_LOG_DEFAULT_COMPONENT ::mongo::logger::LogComponent::kReplication
#include "mongo/platform/basic.h"
#include "mongo/db/prefetch.h"
#include "mongo/db/catalog/collection.h"
#include "mongo/db/catalog/database.h"
#include "mongo/db/commands/server_status_metric.h"
#include "mongo/db/dbhelpers.h"
#include "mongo/db/index/index_access_method.h"
#include "mongo/db/jsobj.h"
#include "mongo/db/repl/bgsync.h"
#include "mongo/db/repl/replication_coordinator.h"
#include "mongo/db/repl/replication_coordinator_global.h"
#include "mongo/db/server_parameters.h"
#include "mongo/db/stats/timer_stats.h"
#include "mongo/db/storage/mmap_v1/mmap.h"
#include "mongo/util/log.h"
namespace mongo {
using std::endl;
using std::string;
namespace repl {
namespace {
// todo / idea: the prefetcher, when it fetches _id, on an upsert, will see if the record exists. if it does not,
// at write time, we can just do an insert, which will be faster.
// The count (of batches) and time spent fetching pages before application
// -- meaning depends on the prefetch behavior: all, _id index, none, etc.)
TimerStats prefetchIndexStats;
ServerStatusMetricField<TimerStats> displayPrefetchIndexPages("repl.preload.indexes",
&prefetchIndexStats);
TimerStats prefetchDocStats;
ServerStatusMetricField<TimerStats> displayPrefetchDocPages("repl.preload.docs", &prefetchDocStats);
// page in pages needed for all index lookups on a given object
void prefetchIndexPages(OperationContext* txn,
Collection* collection,
const BackgroundSync::IndexPrefetchConfig& prefetchConfig,
const BSONObj& obj) {
// do we want prefetchConfig to be (1) as-is, (2) for update ops only, or (3) configured per op type?
// One might want PREFETCH_NONE for updates, but it's more rare that it is a bad idea for inserts.
// #3 (per op), a big issue would be "too many knobs".
switch (prefetchConfig) {
case BackgroundSync::PREFETCH_NONE:
return;
case BackgroundSync::PREFETCH_ID_ONLY: {
TimerHolder timer(&prefetchIndexStats);
// on the update op case, the call to prefetchRecordPages will touch the _id index.
// thus perhaps this option isn't very useful?
try {
IndexDescriptor* desc = collection->getIndexCatalog()->findIdIndex(txn);
if (!desc)
return;
IndexAccessMethod* iam = collection->getIndexCatalog()->getIndex(desc);
invariant(iam);
iam->touch(txn, obj);
} catch (const DBException& e) {
LOG(2) << "ignoring exception in prefetchIndexPages(): " << e.what() << endl;
}
break;
}
case BackgroundSync::PREFETCH_ALL: {
// indexCount includes all indexes, including ones
// in the process of being built
IndexCatalog::IndexIterator ii =
collection->getIndexCatalog()->getIndexIterator(txn, true);
while (ii.more()) {
TimerHolder timer(&prefetchIndexStats);
// This will page in all index pages for the given object.
try {
IndexDescriptor* desc = ii.next();
IndexAccessMethod* iam = collection->getIndexCatalog()->getIndex(desc);
verify(iam);
iam->touch(txn, obj);
} catch (const DBException& e) {
LOG(2) << "ignoring exception in prefetchIndexPages(): " << e.what() << endl;
}
}
break;
}
default:
fassertFailed(16427);
}
}
// page in the data pages for a record associated with an object
void prefetchRecordPages(OperationContext* txn, Database* db, const char* ns, const BSONObj& obj) {
BSONElement _id;
if (obj.getObjectID(_id)) {
TimerHolder timer(&prefetchDocStats);
BSONObjBuilder builder;
builder.append(_id);
BSONObj result;
try {
if (Helpers::findById(txn, db, ns, builder.done(), result)) {
// do we want to use Record::touch() here? it's pretty similar.
volatile char _dummy_char = '\0';
// Touch the first word on every page in order to fault it into memory
for (int i = 0; i < result.objsize(); i += g_minOSPageSizeBytes) {
_dummy_char += *(result.objdata() + i);
}
// hit the last page, in case we missed it above
_dummy_char += *(result.objdata() + result.objsize() - 1);
}
} catch (const DBException& e) {
LOG(2) << "ignoring exception in prefetchRecordPages(): " << e.what() << endl;
}
}
}
} // namespace
// prefetch for an oplog operation
void prefetchPagesForReplicatedOp(OperationContext* txn, Database* db, const BSONObj& op) {
invariant(db);
const BackgroundSync::IndexPrefetchConfig prefetchConfig =
BackgroundSync::get()->getIndexPrefetchConfig();
const char* opField;
const char* opType = op.getStringField("op");
switch (*opType) {
case 'i': // insert
case 'd': // delete
opField = "o";
break;
case 'u': // update
opField = "o2";
break;
default:
// prefetch ignores other ops
return;
}
BSONObj obj = op.getObjectField(opField);
const char* ns = op.getStringField("ns");
// This will have to change for engines other than MMAP V1, because they might not have
// means for directly prefetching pages from the collection. For this purpose, acquire S
// lock on the database, instead of optimizing with IS.
Lock::CollectionLock collLock(txn->lockState(), ns, MODE_S);
Collection* collection = db->getCollection(ns);
if (!collection) {
return;
}
LOG(4) << "index prefetch for op " << *opType << endl;
// should we prefetch index pages on updates? if the update is in-place and doesn't change
// indexed values, it is actually slower - a lot slower if there are a dozen indexes or
// lots of multikeys. possible variations (not all mutually exclusive):
// 1) current behavior: full prefetch
// 2) don't do it for updates
// 3) don't do multikey indexes for updates
// 4) don't prefetchIndexPages on some heuristic; e.g., if it's an $inc.
// 5) if not prefetching index pages (#2), we should do it if we are upsertings and it
// will be an insert. to do that we could do the prefetchRecordPage first and if DNE
// then we do #1.
//
// note that on deletes 'obj' does not have all the keys we would want to prefetch on.
// a way to achieve that would be to prefetch the record first, and then afterwards do
// this part.
//
prefetchIndexPages(txn, collection, prefetchConfig, obj);
// do not prefetch the data for inserts; it doesn't exist yet
//
// we should consider doing the record prefetch for the delete op case as we hit the record
// when we delete. note if done we only want to touch the first page.
//
// update: do record prefetch.
if ((*opType == 'u') &&
// do not prefetch the data for capped collections because
// they typically do not have an _id index for findById() to use.
!collection->isCapped()) {
prefetchRecordPages(txn, db, ns, obj);
}
}
class ReplIndexPrefetch : public ServerParameter {
public:
ReplIndexPrefetch() : ServerParameter(ServerParameterSet::getGlobal(), "replIndexPrefetch") {}
virtual ~ReplIndexPrefetch() {}
const char* _value() {
if (getGlobalReplicationCoordinator()->getReplicationMode() !=
ReplicationCoordinator::modeReplSet) {
return "uninitialized";
}
BackgroundSync::IndexPrefetchConfig ip = BackgroundSync::get()->getIndexPrefetchConfig();
switch (ip) {
case BackgroundSync::PREFETCH_NONE:
return "none";
case BackgroundSync::PREFETCH_ID_ONLY:
return "_id_only";
case BackgroundSync::PREFETCH_ALL:
return "all";
default:
return "invalid";
}
}
virtual void append(OperationContext* txn, BSONObjBuilder& b, const string& name) {
b.append(name, _value());
}
virtual Status set(const BSONElement& newValueElement) {
if (getGlobalReplicationCoordinator()->getReplicationMode() !=
ReplicationCoordinator::modeReplSet) {
return Status(ErrorCodes::BadValue, "replication is not enabled");
}
std::string prefetch = newValueElement.valuestrsafe();
return setFromString(prefetch);
}
virtual Status setFromString(const string& prefetch) {
log() << "changing replication index prefetch behavior to " << prefetch << endl;
BackgroundSync::IndexPrefetchConfig prefetchConfig;
if (prefetch == "none")
prefetchConfig = BackgroundSync::PREFETCH_NONE;
else if (prefetch == "_id_only")
prefetchConfig = BackgroundSync::PREFETCH_ID_ONLY;
else if (prefetch == "all")
prefetchConfig = BackgroundSync::PREFETCH_ALL;
else {
return Status(ErrorCodes::BadValue,
str::stream() << "unrecognized indexPrefetch setting: " << prefetch);
}
BackgroundSync::get()->setIndexPrefetchConfig(prefetchConfig);
return Status::OK();
}
} replIndexPrefetch;
} // namespace repl
} // namespace mongo
|