summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAndres Freund <andres@anarazel.de>2023-03-07 21:36:46 -0800
committerAndres Freund <andres@anarazel.de>2023-03-07 21:52:32 -0800
commitbe504a3e974d75be6f95c8f9b7367126034f2d12 (patch)
tree16ecb8ea2125aede8bde37a5650ce3641d0d9eaf
parenta4e003338d1832981354f5f89d9d5858439fd669 (diff)
downloadpostgresql-be504a3e974d75be6f95c8f9b7367126034f2d12.tar.gz
Fix corruption due to vacuum_defer_cleanup_age underflowing 64bit xids
When vacuum_defer_cleanup_age is bigger than the current xid, including the epoch, the subtraction of vacuum_defer_cleanup_age would lead to a wrapped around xid. While that normally is not a problem, the subsequent conversion to a 64bit xid results in a 64bit-xid very far into the future. As that xid is used as a horizon to detect whether rows versions are old enough to be removed, that allows removal of rows that are still visible (i.e. corruption). If vacuum_defer_cleanup_age was never changed from the default, there is no chance of this bug occurring. This bug was introduced in dc7420c2c92. A lesser version of it exists in 12-13, introduced by fb5344c969a, affecting only GiST. The 12-13 version of the issue can, in rare cases, lead to pages in a gist index getting recycled too early, potentially causing index entries to be found multiple times. The fix is fairly simple - don't allow vacuum_defer_cleanup_age to retreat further than FirstNormalTransactionId. Patches to make similar bugs easier to find, by adding asserts to the 64bit xid infrastructure, have been proposed, but are not suitable for backpatching. Currently there are no tests for vacuum_defer_cleanup_age. A patch introducing infrastructure to make writing a test easier has been posted to the list. Reported-by: Michail Nikolaev <michail.nikolaev@gmail.com> Reviewed-by: Matthias van de Meent <boekewurm+postgres@gmail.com> Author: Andres Freund <andres@anarazel.de> Discussion: https://postgr.es/m/20230108002923.cyoser3ttmt63bfn@awork3.anarazel.de Backpatch: 12-, but impact/fix is smaller for 12-13
-rw-r--r--src/backend/storage/ipc/procarray.c85
1 files changed, 73 insertions, 12 deletions
diff --git a/src/backend/storage/ipc/procarray.c b/src/backend/storage/ipc/procarray.c
index a7071b2fce..a9fb97460d 100644
--- a/src/backend/storage/ipc/procarray.c
+++ b/src/backend/storage/ipc/procarray.c
@@ -367,6 +367,9 @@ static inline void ProcArrayEndTransactionInternal(PGPROC *proc, TransactionId l
static void ProcArrayGroupClearXid(PGPROC *proc, TransactionId latestXid);
static void MaintainLatestCompletedXid(TransactionId latestXid);
static void MaintainLatestCompletedXidRecovery(TransactionId latestXid);
+static void TransactionIdRetreatSafely(TransactionId *xid,
+ int retreat_by,
+ FullTransactionId rel);
static inline FullTransactionId FullXidRelativeTo(FullTransactionId rel,
TransactionId xid);
@@ -1888,17 +1891,35 @@ ComputeXidHorizons(ComputeXidHorizonsResult *h)
* so guc.c should limit it to no more than the xidStopLimit threshold
* in varsup.c. Also note that we intentionally don't apply
* vacuum_defer_cleanup_age on standby servers.
+ *
+ * Need to use TransactionIdRetreatSafely() instead of open-coding the
+ * subtraction, to prevent creating an xid before
+ * FirstNormalTransactionId.
*/
- h->oldest_considered_running =
- TransactionIdRetreatedBy(h->oldest_considered_running,
- vacuum_defer_cleanup_age);
- h->shared_oldest_nonremovable =
- TransactionIdRetreatedBy(h->shared_oldest_nonremovable,
- vacuum_defer_cleanup_age);
- h->data_oldest_nonremovable =
- TransactionIdRetreatedBy(h->data_oldest_nonremovable,
- vacuum_defer_cleanup_age);
- /* defer doesn't apply to temp relations */
+ Assert(TransactionIdPrecedesOrEquals(h->oldest_considered_running,
+ h->shared_oldest_nonremovable));
+ Assert(TransactionIdPrecedesOrEquals(h->shared_oldest_nonremovable,
+ h->data_oldest_nonremovable));
+
+ if (vacuum_defer_cleanup_age > 0)
+ {
+ TransactionIdRetreatSafely(&h->oldest_considered_running,
+ vacuum_defer_cleanup_age,
+ h->latest_completed);
+ TransactionIdRetreatSafely(&h->shared_oldest_nonremovable,
+ vacuum_defer_cleanup_age,
+ h->latest_completed);
+ TransactionIdRetreatSafely(&h->data_oldest_nonremovable,
+ vacuum_defer_cleanup_age,
+ h->latest_completed);
+ /* defer doesn't apply to temp relations */
+
+
+ Assert(TransactionIdPrecedesOrEquals(h->oldest_considered_running,
+ h->shared_oldest_nonremovable));
+ Assert(TransactionIdPrecedesOrEquals(h->shared_oldest_nonremovable,
+ h->data_oldest_nonremovable));
+ }
}
/*
@@ -2470,8 +2491,10 @@ GetSnapshotData(Snapshot snapshot)
oldestfxid = FullXidRelativeTo(latest_completed, oldestxid);
/* apply vacuum_defer_cleanup_age */
- def_vis_xid_data =
- TransactionIdRetreatedBy(xmin, vacuum_defer_cleanup_age);
+ def_vis_xid_data = xmin;
+ TransactionIdRetreatSafely(&def_vis_xid_data,
+ vacuum_defer_cleanup_age,
+ oldestfxid);
/* Check whether there's a replication slot requiring an older xmin. */
def_vis_xid_data =
@@ -4296,6 +4319,44 @@ GlobalVisCheckRemovableXid(Relation rel, TransactionId xid)
}
/*
+ * Safely retract *xid by retreat_by, store the result in *xid.
+ *
+ * Need to be careful to prevent *xid from retreating below
+ * FirstNormalTransactionId during epoch 0. This is important to prevent
+ * generating xids that cannot be converted to a FullTransactionId without
+ * wrapping around.
+ *
+ * If retreat_by would lead to a too old xid, FirstNormalTransactionId is
+ * returned instead.
+ */
+static void
+TransactionIdRetreatSafely(TransactionId *xid, int retreat_by, FullTransactionId rel)
+{
+ TransactionId original_xid = *xid;
+ FullTransactionId fxid;
+ uint64 fxid_i;
+
+ Assert(TransactionIdIsNormal(original_xid));
+ Assert(retreat_by >= 0); /* relevant GUCs are stored as ints */
+ AssertTransactionIdInAllowableRange(original_xid);
+
+ if (retreat_by == 0)
+ return;
+
+ fxid = FullXidRelativeTo(rel, original_xid);
+ fxid_i = U64FromFullTransactionId(fxid);
+
+ if ((fxid_i - FirstNormalTransactionId) <= retreat_by)
+ *xid = FirstNormalTransactionId;
+ else
+ {
+ *xid = TransactionIdRetreatedBy(original_xid, retreat_by);
+ Assert(TransactionIdIsNormal(*xid));
+ Assert(NormalTransactionIdPrecedes(*xid, original_xid));
+ }
+}
+
+/*
* Convert a 32 bit transaction id into 64 bit transaction id, by assuming it
* is within MaxTransactionId / 2 of XidFromFullTransactionId(rel).
*