1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
|
/**
* Copyright (C) 2018-present MongoDB, Inc.
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the Server Side Public License, version 1,
* as published by MongoDB, Inc.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* Server Side Public License for more details.
*
* You should have received a copy of the Server Side Public License
* along with this program. If not, see
* <http://www.mongodb.com/licensing/server-side-public-license>.
*
* As a special exception, the copyright holders give permission to link the
* code of portions of this program with the OpenSSL library under certain
* conditions as described in each individual source file and distribute
* linked combinations including the program with the OpenSSL library. You
* must comply with the Server Side Public License in all respects for
* all of the code used other than as permitted herein. If you modify file(s)
* with this exception, you may extend this exception to your version of the
* file(s), but you are not obligated to do so. If you do not wish to do so,
* delete this exception statement from your version. If you delete this
* exception statement from all source files in the program, then also delete
* it in the license file.
*/
#define MONGO_LOGV2_DEFAULT_COMPONENT ::mongo::logv2::LogComponent::kStorage
#include "mongo/platform/basic.h"
#include "mongo/db/db_raii.h"
#include "mongo/db/catalog/database_holder.h"
#include "mongo/db/concurrency/locker.h"
#include "mongo/db/curop.h"
#include "mongo/db/repl/replication_coordinator.h"
#include "mongo/db/s/collection_sharding_state.h"
#include "mongo/db/s/database_sharding_state.h"
#include "mongo/db/storage/snapshot_helper.h"
#include "mongo/logv2/log.h"
namespace mongo {
namespace {
const boost::optional<int> kDoNotChangeProfilingLevel = boost::none;
// TODO: SERVER-44105 remove
// If set to false, secondary reads should wait behind the PBW lock.
const auto allowSecondaryReadsDuringBatchApplication_DONT_USE =
OperationContext::declareDecoration<boost::optional<bool>>();
/**
* Performs some checks to determine whether the operation is compatible with a lock-free read.
* Multi-doc transactions are not supported, nor are operations holding an exclusive lock.
*/
bool supportsLockFreeRead(OperationContext* opCtx) {
// Lock-free reads are not supported in multi-document transactions.
// Lock-free reads are not supported under an exclusive lock (nested reads under exclusive lock
// holding operations).
// Lock-free reads are not supported if a storage txn is already open w/o the lock-free reads
// operation flag set.
return !storageGlobalParams.disableLockFreeReads && !opCtx->inMultiDocumentTransaction() &&
!opCtx->lockState()->isWriteLocked() &&
!(opCtx->recoveryUnit()->isActive() && !opCtx->isLockFreeReadsOp());
}
/**
* Type that pretends to be a Collection. It implements the minimal interface used by
* acquireCollectionAndConsistentSnapshot(). We are tricking acquireCollectionAndConsistentSnapshot
* to establish a consistent snapshot with just the catalog and not for a specific Collection.
*/
class FakeCollection {
public:
// We just need to return something that would not considered to be the oplog. A default
// constructed NamespaceString is fine.
const NamespaceString& ns() const {
return _ns;
};
// We just need to return something that compares equal with itself here.
boost::optional<Timestamp> getMinimumVisibleSnapshot() const {
return boost::none;
}
private:
NamespaceString _ns;
};
/**
* Helper function to acquire a collection and consistent snapshot without holding the RSTL or
* collection locks.
*
* GetCollectionAndEstablishReadSourceFunc is called before we open a snapshot, it needs to fetch
* the Collection from the catalog and select the read source.
*
* GetCollectionAfterSnapshotFunc is called after the snapshot is opened, it needs to fetch the
* Collection from the catalog that is used to compare consistency with the Collection returned by
* GetCollectionAndEstablishReadSourceFunc.
*
* ResetFunc is called when we failed to achieve consistency and need to retry.
*/
template <typename GetCollectionAndEstablishReadSourceFunc,
typename GetCollectionAfterSnapshotFunc,
typename ResetFunc>
auto acquireCollectionAndConsistentSnapshot(
OperationContext* opCtx,
bool isLockFreeReadSubOperation,
CollectionCatalogStasher& catalogStasher,
GetCollectionAndEstablishReadSourceFunc getCollectionAndEstablishReadSource,
GetCollectionAfterSnapshotFunc getCollectionAfterSnapshot,
ResetFunc reset) {
// Figure out what type of Collection GetCollectionAndEstablishReadSourceFunc returns. It needs
// to behave like a pointer.
using CollectionPtrT = decltype(std::declval<GetCollectionAndEstablishReadSourceFunc>()(
std::declval<OperationContext*>(),
std::declval<const CollectionCatalog&>(),
std::declval<bool>()));
CollectionPtrT collection;
catalogStasher.reset();
while (true) {
// AutoGetCollectionForReadBase can choose a read source based on the current replication
// state. Therefore we must fetch the repl state beforehand, to compare with afterwards.
long long replTerm = repl::ReplicationCoordinator::get(opCtx)->getTerm();
auto catalog = CollectionCatalog::get(opCtx);
collection =
getCollectionAndEstablishReadSource(opCtx, *catalog, isLockFreeReadSubOperation);
// A lock request does not always find a collection to lock.
if (!collection)
break;
// If this is a nested lock acquisition, then we already have a consistent stashed catalog
// and snapshot from which to read and we can skip the below logic.
if (isLockFreeReadSubOperation) {
// A consistent in-memory and on-disk state is already set up by a higher level AutoGet*
// instance. Save the catalog on this instance, to retain it against out-of-order
// AutoGet* destruction, and return early.
catalogStasher.stash(catalog);
return collection;
}
// We must open a storage snapshot consistent with the fetched in-memory Collection instance
// and chosen read source. The Collection instance and replication state after opening a
// snapshot will be compared with the previously acquired state. If either does not match,
// then this loop will retry lock acquisition and read source selection until there is a
// match.
//
// Note: getCollectionAndEstablishReadSource() may open a snapshot for PIT reads, so
// preallocateSnapshot() may be a no-op, but that is OK because the snapshot is established
// by getCollectionAndEstablishReadSource() after it fetches a Collection instance.
if (collection->ns().isOplog()) {
// Signal to the RecoveryUnit that the snapshot will be used for reading the oplog.
// Normally the snapshot is opened from a cursor that can take special action when
// reading from the oplog.
opCtx->recoveryUnit()->preallocateSnapshotForOplogRead();
} else {
opCtx->recoveryUnit()->preallocateSnapshot();
}
// The collection may have been dropped since the previous lookup, run the loop one more
// time to cleanup if newCollection is nullptr
auto newCatalog = CollectionCatalog::get(opCtx);
if (catalog == newCatalog) {
auto newCollection = getCollectionAfterSnapshot(opCtx, *catalog);
if (newCollection && catalog == newCatalog &&
collection->getMinimumVisibleSnapshot() ==
newCollection->getMinimumVisibleSnapshot() &&
replTerm == repl::ReplicationCoordinator::get(opCtx)->getTerm()) {
catalogStasher.stash(std::move(catalog));
break;
}
}
LOGV2_DEBUG(5067701,
3,
"Retrying acquiring state for lock-free read because collection, catalog or "
"replication state changed.");
reset();
opCtx->recoveryUnit()->abandonSnapshot();
}
return collection;
}
} // namespace
AutoStatsTracker::AutoStatsTracker(OperationContext* opCtx,
const NamespaceString& nss,
Top::LockType lockType,
LogMode logMode,
int dbProfilingLevel,
Date_t deadline)
: _opCtx(opCtx), _lockType(lockType), _nss(nss), _logMode(logMode) {
if (_logMode == LogMode::kUpdateTop) {
return;
}
stdx::lock_guard<Client> clientLock(*_opCtx->getClient());
CurOp::get(_opCtx)->enter_inlock(_nss.ns().c_str(), dbProfilingLevel);
}
AutoStatsTracker::~AutoStatsTracker() {
if (_logMode == LogMode::kUpdateCurOp) {
return;
}
auto curOp = CurOp::get(_opCtx);
Top::get(_opCtx->getServiceContext())
.record(_opCtx,
_nss.ns(),
curOp->getLogicalOp(),
_lockType,
durationCount<Microseconds>(curOp->elapsedTimeExcludingPauses()),
curOp->isCommand(),
curOp->getReadWriteType());
}
template <typename AutoGetCollectionType, typename EmplaceAutoCollFunc>
AutoGetCollectionForReadBase<AutoGetCollectionType, EmplaceAutoCollFunc>::
AutoGetCollectionForReadBase(OperationContext* opCtx,
const EmplaceAutoCollFunc& emplaceAutoColl,
bool isLockFreeReadSubOperation) {
// If this instance is nested and lock-free, then we do not want to adjust any setting, but we
// do need to set up the Collection reference.
if (isLockFreeReadSubOperation) {
emplaceAutoColl.emplace(_autoColl);
return;
}
// The caller was expecting to conflict with batch application before entering this function.
// i.e. the caller does not currently have a ShouldNotConflict... block in scope.
bool callerWasConflicting = opCtx->lockState()->shouldConflictWithSecondaryBatchApplication();
if (allowSecondaryReadsDuringBatchApplication_DONT_USE(opCtx).value_or(true) &&
opCtx->getServiceContext()->getStorageEngine()->supportsReadConcernSnapshot()) {
_shouldNotConflictWithSecondaryBatchApplicationBlock.emplace(opCtx->lockState());
}
emplaceAutoColl.emplace(_autoColl);
repl::ReplicationCoordinator* const replCoord = repl::ReplicationCoordinator::get(opCtx);
const auto readConcernLevel = repl::ReadConcernArgs::get(opCtx).getLevel();
// If the collection doesn't exist or disappears after releasing locks and waiting, there is no
// need to check for pending catalog changes.
while (const auto& coll = _autoColl->getCollection()) {
// Ban snapshot reads on capped collections.
uassert(ErrorCodes::SnapshotUnavailable,
"Reading from capped collections with readConcern snapshot is not supported",
!coll->isCapped() ||
readConcernLevel != repl::ReadConcernLevel::kSnapshotReadConcern);
// Disallow snapshot reads and causal consistent majority reads on config.transactions
// outside of transactions to avoid running the collection at a point-in-time in the middle
// of a secondary batch. Such reads are unsafe because config.transactions updates are
// coalesced on secondaries. Majority reads without an afterClusterTime is allowed because
// they are allowed to return arbitrarily stale data. We allow kNoTimestamp and kLastApplied
// reads because they must be from internal readers given the snapshot/majority readConcern
// (e.g. for session checkout).
const NamespaceString nss = coll->ns();
const auto afterClusterTime = repl::ReadConcernArgs::get(opCtx).getArgsAfterClusterTime();
const auto allowTransactionTableSnapshot =
repl::ReadConcernArgs::get(opCtx).allowTransactionTableSnapshot();
auto readSource = opCtx->recoveryUnit()->getTimestampReadSource();
if (nss == NamespaceString::kSessionTransactionsTableNamespace &&
readSource != RecoveryUnit::ReadSource::kNoTimestamp &&
readSource != RecoveryUnit::ReadSource::kLastApplied &&
((readConcernLevel == repl::ReadConcernLevel::kSnapshotReadConcern &&
!allowTransactionTableSnapshot) ||
(readConcernLevel == repl::ReadConcernLevel::kMajorityReadConcern &&
afterClusterTime))) {
uasserted(5557800,
"Snapshot reads and causal consistent majority reads on config.transactions "
"are not supported");
}
// During batch application on secondaries, there is a potential to read inconsistent states
// that would normally be protected by the PBWM lock. In order to serve secondary reads
// during this period, we default to not acquiring the lock (by setting
// _shouldNotConflictWithSecondaryBatchApplicationBlock). On primaries, we always read at a
// consistent time, so not taking the PBWM lock is not a problem. On secondaries, we have to
// guarantee we read at a consistent state, so we must read at the lastApplied timestamp,
// which is set after each complete batch.
// Once we have our locks, check whether or not we should override the ReadSource that was
// set before acquiring locks.
auto [newReadSource, shouldReadAtLastApplied] =
SnapshotHelper::shouldChangeReadSource(opCtx, nss);
if (newReadSource) {
opCtx->recoveryUnit()->setTimestampReadSource(*newReadSource);
readSource = *newReadSource;
}
const auto readTimestamp = opCtx->recoveryUnit()->getPointInTimeReadTimestamp(opCtx);
if (readTimestamp && afterClusterTime) {
// Readers that use afterClusterTime have already waited at a higher level for the
// all_durable time to advance to a specified optime, and they assume the read timestamp
// of the operation is at least that waited-for timestamp. For kNoOverlap, which is
// the minimum of lastApplied and all_durable, this invariant ensures that
// afterClusterTime reads do not choose a read timestamp older than the one requested.
invariant(*readTimestamp >= afterClusterTime->asTimestamp(),
str::stream() << "read timestamp " << readTimestamp->toString()
<< "was less than afterClusterTime: "
<< afterClusterTime->asTimestamp().toString());
}
// This assertion protects operations from reading inconsistent data on secondaries when
// using the default ReadSource of kNoTimestamp.
// Reading at lastApplied on secondaries is the safest behavior and is enabled for all user
// and DBDirectClient reads using 'local' and 'available' readConcerns. If an internal
// operation wishes to read without a timestamp during a batch, a ShouldNotConflict can
// suppress this fatal assertion with the following considerations:
// * The operation is not reading replicated data in a replication state where batch
// application is active OR
// * Reading inconsistent, out-of-order data is either inconsequential or required by
// the operation.
// If the caller entered this function expecting to conflict with batch application
// (i.e. no ShouldNotConflict block in scope), but they are reading without a timestamp and
// not holding the PBWM lock, then there is a possibility that this reader may
// unintentionally see inconsistent data during a batch. Certain namespaces are applied
// serially in oplog application, and therefore can be safely read without taking the PBWM
// lock or reading at a timestamp.
if (readSource == RecoveryUnit::ReadSource::kNoTimestamp && callerWasConflicting &&
!nss.mustBeAppliedInOwnOplogBatch() && shouldReadAtLastApplied) {
LOGV2_FATAL(4728700,
"Reading from replicated collection on a secondary without read timestamp "
"or PBWM lock",
"collection"_attr = nss);
}
auto minSnapshot = coll->getMinimumVisibleSnapshot();
if (!SnapshotHelper::collectionChangesConflictWithRead(minSnapshot, readTimestamp)) {
return;
}
// If we are reading at a provided timestamp earlier than the latest catalog changes,
// then we must return an error.
if (readSource == RecoveryUnit::ReadSource::kProvided) {
uasserted(ErrorCodes::SnapshotUnavailable,
str::stream()
<< "Unable to read from a snapshot due to pending collection catalog "
"changes; please retry the operation. Snapshot timestamp is "
<< readTimestamp->toString() << ". Collection minimum is "
<< minSnapshot->toString());
}
invariant(
// The kMajorityCommitted and kLastApplied read sources already read from timestamps
// that are safe with respect to concurrent secondary batch application, and are
// eligible for retrying.
readSource == RecoveryUnit::ReadSource::kMajorityCommitted ||
readSource == RecoveryUnit::ReadSource::kNoOverlap ||
readSource == RecoveryUnit::ReadSource::kLastApplied);
invariant(readConcernLevel != repl::ReadConcernLevel::kSnapshotReadConcern);
// Yield locks in order to do the blocking call below.
_autoColl = boost::none;
// If there are pending catalog changes when using a no-overlap or lastApplied read source,
// we yield to get a new read timestamp ahead of the minimum visible snapshot.
if (readSource == RecoveryUnit::ReadSource::kLastApplied ||
readSource == RecoveryUnit::ReadSource::kNoOverlap) {
invariant(readTimestamp);
LOGV2(20576,
"Tried reading at a timestamp, but future catalog changes are pending. "
"Trying again",
"readTimestamp"_attr = *readTimestamp,
"collection"_attr = nss.ns(),
"collectionMinSnapshot"_attr = *minSnapshot);
// If we are AutoGetting multiple collections, it is possible that we've already done
// some reads and locked in our snapshot. At this point, the only way out is to fail
// the operation. The client application will need to retry.
uassert(
ErrorCodes::SnapshotUnavailable,
str::stream() << "Unable to read from a snapshot due to pending collection catalog "
"changes and holding multiple collection locks; please retry the "
"operation. Snapshot timestamp is "
<< readTimestamp->toString() << ". Collection minimum is "
<< minSnapshot->toString(),
!opCtx->lockState()->isLocked());
// Abandon our snapshot. We may select a new read timestamp or ReadSource in the next
// loop iteration.
opCtx->recoveryUnit()->abandonSnapshot();
}
if (readSource == RecoveryUnit::ReadSource::kMajorityCommitted) {
replCoord->waitUntilSnapshotCommitted(opCtx, *minSnapshot);
uassertStatusOK(opCtx->recoveryUnit()->majorityCommittedSnapshotAvailable());
}
{
stdx::lock_guard<Client> lk(*opCtx->getClient());
CurOp::get(opCtx)->yielded();
}
emplaceAutoColl.emplace(_autoColl);
}
}
EmplaceAutoGetCollectionForRead::EmplaceAutoGetCollectionForRead(
OperationContext* opCtx,
const NamespaceStringOrUUID& nsOrUUID,
AutoGetCollectionViewMode viewMode,
Date_t deadline)
: _opCtx(opCtx), _nsOrUUID(nsOrUUID), _viewMode(viewMode), _deadline(deadline) {
// Multi-document transactions need MODE_IX locks, otherwise MODE_IS.
_collectionLockMode = getLockModeForQuery(opCtx, nsOrUUID.nss());
}
void EmplaceAutoGetCollectionForRead::emplace(boost::optional<AutoGetCollection>& autoColl) const {
autoColl.emplace(_opCtx, _nsOrUUID, _collectionLockMode, _viewMode, _deadline);
}
AutoGetCollectionForRead::AutoGetCollectionForRead(OperationContext* opCtx,
const NamespaceStringOrUUID& nsOrUUID,
AutoGetCollectionViewMode viewMode,
Date_t deadline)
: AutoGetCollectionForReadBase(
opCtx, EmplaceAutoGetCollectionForRead(opCtx, nsOrUUID, viewMode, deadline)) {}
AutoGetCollectionForReadLockFree::EmplaceHelper::EmplaceHelper(
OperationContext* opCtx,
CollectionCatalogStasher& catalogStasher,
const NamespaceStringOrUUID& nsOrUUID,
AutoGetCollectionViewMode viewMode,
Date_t deadline,
bool isLockFreeReadSubOperation)
: _opCtx(opCtx),
_catalogStasher(catalogStasher),
_nsOrUUID(nsOrUUID),
_viewMode(viewMode),
_deadline(deadline),
_isLockFreeReadSubOperation(isLockFreeReadSubOperation) {}
void AutoGetCollectionForReadLockFree::EmplaceHelper::emplace(
boost::optional<AutoGetCollectionLockFree>& autoColl) const {
autoColl.emplace(
_opCtx,
_nsOrUUID,
/* restoreFromYield */
[& catalogStasher = _catalogStasher, isSubOperation = _isLockFreeReadSubOperation](
std::shared_ptr<const Collection>& collection,
OperationContext* opCtx,
CollectionUUID uuid) {
// A sub-operation should never yield because it would break the consistent in-memory
// and on-disk view of the higher level operation.
invariant(!isSubOperation);
collection = acquireCollectionAndConsistentSnapshot(
opCtx,
/* isLockFreeReadSubOperation */
isSubOperation,
/* CollectionCatalogStasher */
catalogStasher,
/* GetCollectionAndEstablishReadSourceFunc */
[uuid](OperationContext* opCtx,
const CollectionCatalog& catalog,
bool isLockFreeReadSubOperation) {
// There should only ever be one helper recovering from a query yield, so it
// should never be nested.
invariant(!isLockFreeReadSubOperation);
auto coll = catalog.lookupCollectionByUUIDForRead(opCtx, uuid);
// After yielding and reacquiring locks, the preconditions that were used to
// select our ReadSource initially need to be checked again. We select a
// ReadSource based on replication state. After a query yields its locks, the
// replication state may have changed, invalidating our current choice of
// ReadSource. Using the same preconditions, change our ReadSource if necessary.
if (coll) {
auto [newReadSource, _] =
SnapshotHelper::shouldChangeReadSource(opCtx, coll->ns());
if (newReadSource) {
opCtx->recoveryUnit()->setTimestampReadSource(*newReadSource);
}
}
return coll;
},
/* GetCollectionAfterSnapshotFunc */
[uuid](OperationContext* opCtx, const CollectionCatalog& catalog) {
return catalog.lookupCollectionByUUIDForRead(opCtx, uuid);
},
/* ResetFunc */
[]() {});
},
_viewMode,
_deadline);
}
AutoGetCollectionForReadLockFree::AutoGetCollectionForReadLockFree(
OperationContext* opCtx,
const NamespaceStringOrUUID& nsOrUUID,
AutoGetCollectionViewMode viewMode,
Date_t deadline)
: _catalogStash(opCtx) {
bool isLockFreeReadSubOperation = opCtx->isLockFreeReadsOp();
// Supported lock-free reads should only ever have an open storage snapshot prior to calling
// this helper if it is a nested lock-free operation. The storage snapshot and in-memory state
// used across lock=free reads must be consistent.
invariant(supportsLockFreeRead(opCtx) &&
(!opCtx->recoveryUnit()->isActive() || isLockFreeReadSubOperation));
EmplaceHelper emplaceFunc(
opCtx, _catalogStash, nsOrUUID, viewMode, deadline, isLockFreeReadSubOperation);
acquireCollectionAndConsistentSnapshot(
opCtx,
/* isLockFreeReadSubOperation */
isLockFreeReadSubOperation,
/* CollectionCatalogStasher */
_catalogStash,
/* GetCollectionAndEstablishReadSourceFunc */
[this, &emplaceFunc](
OperationContext* opCtx, const CollectionCatalog&, bool isLockFreeReadSubOperation) {
_autoGetCollectionForReadBase.emplace(opCtx, emplaceFunc, isLockFreeReadSubOperation);
return _autoGetCollectionForReadBase->getCollection().get();
},
/* GetCollectionAfterSnapshotFunc */
[this](OperationContext* opCtx, const CollectionCatalog& catalog) {
return catalog.lookupCollectionByUUIDForRead(
opCtx, _autoGetCollectionForReadBase.get()->uuid());
},
/* ResetFunc */
[this]() { _autoGetCollectionForReadBase.reset(); });
}
AutoGetCollectionForReadMaybeLockFree::AutoGetCollectionForReadMaybeLockFree(
OperationContext* opCtx,
const NamespaceStringOrUUID& nsOrUUID,
AutoGetCollectionViewMode viewMode,
Date_t deadline) {
if (supportsLockFreeRead(opCtx)) {
_autoGetLockFree.emplace(opCtx, nsOrUUID, viewMode, deadline);
} else {
_autoGet.emplace(opCtx, nsOrUUID, viewMode, deadline);
}
}
const ViewDefinition* AutoGetCollectionForReadMaybeLockFree::getView() const {
if (_autoGet) {
return _autoGet->getView();
} else {
return _autoGetLockFree->getView();
}
}
const NamespaceString& AutoGetCollectionForReadMaybeLockFree::getNss() const {
if (_autoGet) {
return _autoGet->getNss();
} else {
return _autoGetLockFree->getNss();
}
}
const CollectionPtr& AutoGetCollectionForReadMaybeLockFree::getCollection() const {
if (_autoGet) {
return _autoGet->getCollection();
} else {
return _autoGetLockFree->getCollection();
}
}
template <typename AutoGetCollectionForReadType>
AutoGetCollectionForReadCommandBase<AutoGetCollectionForReadType>::
AutoGetCollectionForReadCommandBase(OperationContext* opCtx,
const NamespaceStringOrUUID& nsOrUUID,
AutoGetCollectionViewMode viewMode,
Date_t deadline,
AutoStatsTracker::LogMode logMode)
: _autoCollForRead(opCtx, nsOrUUID, viewMode, deadline),
_statsTracker(
opCtx,
_autoCollForRead.getNss(),
Top::LockType::ReadLocked,
logMode,
CollectionCatalog::get(opCtx)->getDatabaseProfileLevel(_autoCollForRead.getNss().db()),
deadline) {
if (!_autoCollForRead.getView()) {
auto css =
CollectionShardingState::getSharedForLockFreeReads(opCtx, _autoCollForRead.getNss());
css->checkShardVersionOrThrow(opCtx);
}
}
OldClientContext::OldClientContext(OperationContext* opCtx, const std::string& ns, bool doVersion)
: _opCtx(opCtx), _db(DatabaseHolder::get(opCtx)->getDb(opCtx, ns)) {
if (!_db) {
const auto dbName = nsToDatabaseSubstring(ns);
_db = DatabaseHolder::get(opCtx)->openDb(_opCtx, dbName, &_justCreated);
invariant(_db);
}
auto const currentOp = CurOp::get(_opCtx);
if (doVersion) {
switch (currentOp->getNetworkOp()) {
case dbGetMore: // getMore is special and should be handled elsewhere
case dbUpdate: // update & delete check shard version as part of the write executor
case dbDelete: // path, so no need to check them here as well
break;
default:
CollectionShardingState::get(_opCtx, NamespaceString(ns))
->checkShardVersionOrThrow(_opCtx);
break;
}
}
stdx::lock_guard<Client> lk(*_opCtx->getClient());
currentOp->enter_inlock(ns.c_str(),
CollectionCatalog::get(opCtx)->getDatabaseProfileLevel(_db->name()));
}
AutoGetCollectionForReadCommandMaybeLockFree::AutoGetCollectionForReadCommandMaybeLockFree(
OperationContext* opCtx,
const NamespaceStringOrUUID& nsOrUUID,
AutoGetCollectionViewMode viewMode,
Date_t deadline,
AutoStatsTracker::LogMode logMode) {
if (supportsLockFreeRead(opCtx)) {
_autoGetLockFree.emplace(opCtx, nsOrUUID, viewMode, deadline, logMode);
} else {
_autoGet.emplace(opCtx, nsOrUUID, viewMode, deadline, logMode);
}
}
const CollectionPtr& AutoGetCollectionForReadCommandMaybeLockFree::getCollection() const {
if (_autoGet) {
return _autoGet->getCollection();
} else {
return _autoGetLockFree->getCollection();
}
}
const ViewDefinition* AutoGetCollectionForReadCommandMaybeLockFree::getView() const {
if (_autoGet) {
return _autoGet->getView();
} else {
return _autoGetLockFree->getView();
}
}
const NamespaceString& AutoGetCollectionForReadCommandMaybeLockFree::getNss() const {
if (_autoGet) {
return _autoGet->getNss();
} else {
return _autoGetLockFree->getNss();
}
}
AutoReadLockFree::AutoReadLockFree(OperationContext* opCtx, Date_t deadline)
: _catalogStash(opCtx),
_lockFreeReadsBlock(opCtx),
_globalLock(
opCtx, MODE_IS, deadline, Lock::InterruptBehavior::kThrow, true /* skipRSTLLock */) {
// The catalog will be stashed inside the CollectionCatalogStasher.
FakeCollection fakeColl;
acquireCollectionAndConsistentSnapshot(
opCtx,
/* isLockFreeReadSubOperation */
false,
/* CollectionCatalogStasher */
_catalogStash,
/* GetCollectionAndEstablishReadSourceFunc */
[&](OperationContext* opCtx, const CollectionCatalog&, bool) { return &fakeColl; },
/* GetCollectionAfterSnapshotFunc */
[&](OperationContext* opCtx, const CollectionCatalog& catalog) { return &fakeColl; },
/* ResetFunc */
[]() {});
}
AutoGetDbForReadLockFree::AutoGetDbForReadLockFree(OperationContext* opCtx,
StringData dbName,
Date_t deadline)
: _catalogStash(opCtx),
_lockFreeReadsBlock(opCtx),
_globalLock(
opCtx, MODE_IS, deadline, Lock::InterruptBehavior::kThrow, true /* skipRSTLLock */) {
// The catalog will be stashed inside the CollectionCatalogStasher.
FakeCollection fakeColl;
acquireCollectionAndConsistentSnapshot(
opCtx,
/* isLockFreeReadSubOperation */
false,
/* CollectionCatalogStasher */
_catalogStash,
/* GetCollectionAndEstablishReadSourceFunc */
[&](OperationContext* opCtx, const CollectionCatalog&, bool) {
// Check that the sharding database version matches our read.
// Note: this must always be checked, regardless of whether the collection exists, so
// that the dbVersion of this node or the caller gets updated quickly in case either is
// stale.
auto dss = DatabaseShardingState::getSharedForLockFreeReads(opCtx, dbName);
auto dssLock = DatabaseShardingState::DSSLock::lockShared(opCtx, dss.get());
dss->checkDbVersion(opCtx, dssLock);
return &fakeColl;
},
/* GetCollectionAfterSnapshotFunc */
[&](OperationContext* opCtx, const CollectionCatalog& catalog) { return &fakeColl; },
/* ResetFunc */
[]() {});
}
AutoGetDbForReadMaybeLockFree::AutoGetDbForReadMaybeLockFree(OperationContext* opCtx,
StringData dbName,
Date_t deadline) {
if (supportsLockFreeRead(opCtx)) {
_autoGetLockFree.emplace(opCtx, dbName, deadline);
} else {
_autoGet.emplace(opCtx, dbName, MODE_IS, deadline);
}
}
OldClientContext::~OldClientContext() {
// If in an interrupt, don't record any stats.
// It is possible to have no lock after saving the lock state and being interrupted while
// waiting to restore.
if (_opCtx->getKillStatus() != ErrorCodes::OK)
return;
invariant(_opCtx->lockState()->isLocked());
auto currentOp = CurOp::get(_opCtx);
Top::get(_opCtx->getClient()->getServiceContext())
.record(_opCtx,
currentOp->getNS(),
currentOp->getLogicalOp(),
_opCtx->lockState()->isWriteLocked() ? Top::LockType::WriteLocked
: Top::LockType::ReadLocked,
_timer.micros(),
currentOp->isCommand(),
currentOp->getReadWriteType());
}
LockMode getLockModeForQuery(OperationContext* opCtx, const boost::optional<NamespaceString>& nss) {
invariant(opCtx);
// Use IX locks for multi-statement transactions; otherwise, use IS locks.
if (opCtx->inMultiDocumentTransaction()) {
uassert(51071,
"Cannot query system.views within a transaction",
!nss || !nss->isSystemDotViews());
return MODE_IX;
}
return MODE_IS;
}
BlockSecondaryReadsDuringBatchApplication_DONT_USE::
BlockSecondaryReadsDuringBatchApplication_DONT_USE(OperationContext* opCtx)
: _opCtx(opCtx) {
auto allowSecondaryReads = &allowSecondaryReadsDuringBatchApplication_DONT_USE(opCtx);
allowSecondaryReads->swap(_originalSettings);
*allowSecondaryReads = false;
}
BlockSecondaryReadsDuringBatchApplication_DONT_USE::
~BlockSecondaryReadsDuringBatchApplication_DONT_USE() {
auto allowSecondaryReads = &allowSecondaryReadsDuringBatchApplication_DONT_USE(_opCtx);
allowSecondaryReads->swap(_originalSettings);
}
template class AutoGetCollectionForReadBase<AutoGetCollection, EmplaceAutoGetCollectionForRead>;
template class AutoGetCollectionForReadCommandBase<AutoGetCollectionForRead>;
template class AutoGetCollectionForReadBase<AutoGetCollectionLockFree,
AutoGetCollectionForReadLockFree::EmplaceHelper>;
template class AutoGetCollectionForReadCommandBase<AutoGetCollectionForReadLockFree>;
} // namespace mongo
|