summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMichael Cahill <michael.cahill@mongodb.com>2015-10-07 15:55:26 +1100
committerMichael Cahill <michael.cahill@mongodb.com>2015-10-07 15:55:26 +1100
commit6def405726ba8060f968dac388cc8eb07e09a242 (patch)
tree3f259b46284d67daf901bcae6b716de2a5f64a81
parent0d74bc686a3a616bfa40e3272a0a04e6958c3892 (diff)
downloadmongo-6def405726ba8060f968dac388cc8eb07e09a242.tar.gz
WT-2157 If we give up trying to split a page, make sure it is written by the next checkpoint.
-rw-r--r--src/btree/bt_sync.c3
-rw-r--r--src/reconcile/rec_write.c37
2 files changed, 12 insertions, 28 deletions
diff --git a/src/btree/bt_sync.c b/src/btree/bt_sync.c
index 247bdef65c8..237d900c3d1 100644
--- a/src/btree/bt_sync.c
+++ b/src/btree/bt_sync.c
@@ -140,8 +140,7 @@ __sync_file(WT_SESSION_IMPL *session, int syncop)
*/
if (!WT_PAGE_IS_INTERNAL(page) &&
F_ISSET(txn, WT_TXN_HAS_SNAPSHOT) &&
- WT_TXNID_LT(txn->snap_max, mod->first_dirty_txn) &&
- mod->rec_result != WT_PM_REC_REWRITE) {
+ WT_TXNID_LT(txn->snap_max, mod->first_dirty_txn)) {
__wt_page_modify_set(session, page);
continue;
}
diff --git a/src/reconcile/rec_write.c b/src/reconcile/rec_write.c
index f2c32a434bf..40917bebf56 100644
--- a/src/reconcile/rec_write.c
+++ b/src/reconcile/rec_write.c
@@ -44,7 +44,6 @@ typedef struct {
* Track maximum transaction ID seen and first unwritten transaction ID.
*/
uint64_t max_txn;
- uint64_t first_dirty_txn;
/*
* When we can't mark the page clean (for example, checkpoint found some
@@ -292,7 +291,7 @@ typedef struct {
} WT_RECONCILE;
static void __rec_bnd_cleanup(WT_SESSION_IMPL *, WT_RECONCILE *, bool);
-static void __rec_cell_build_addr(
+static void __rec_cell_build_addr(WT_SESSION_IMPL *,
WT_RECONCILE *, const void *, size_t, u_int, uint64_t);
static int __rec_cell_build_int_key(WT_SESSION_IMPL *,
WT_RECONCILE *, const void *, size_t, bool *);
@@ -538,11 +537,6 @@ __rec_write_status(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_PAGE *page)
*/
if (r->leave_dirty) {
/*
- * Update the page's first unwritten transaction ID.
- */
- mod->first_dirty_txn = r->first_dirty_txn;
-
- /*
* The page remains dirty.
*
* Any checkpoint call cleared the tree's modified flag before
@@ -880,12 +874,6 @@ __rec_write_init(WT_SESSION_IMPL *session,
r->cache_write_lookaside = r->cache_write_restore = false;
- /*
- * Running transactions may update the page after we write it, so
- * this is the highest ID we can be confident we will see.
- */
- r->first_dirty_txn = conn->txn_global.last_running;
-
return (0);
}
@@ -1083,17 +1071,11 @@ __rec_txn_read(WT_SESSION_IMPL *session, WT_RECONCILE *r,
if ((txnid = upd->txnid) == WT_TXN_ABORTED)
continue;
- /*
- * Track the largest/smallest transaction IDs on the list and
- * the smallest not-globally-visible transaction on the page.
- */
+ /* Track the largest/smallest transaction IDs on the list. */
if (WT_TXNID_LT(max_txn, txnid))
max_txn = txnid;
if (WT_TXNID_LT(txnid, min_txn))
min_txn = txnid;
- if (WT_TXNID_LT(txnid, r->first_dirty_txn) &&
- !__wt_txn_visible_all(session, txnid))
- r->first_dirty_txn = txnid;
/*
* Find the first update we can use.
@@ -3837,7 +3819,8 @@ __rec_col_int(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_PAGE *page)
val->cell_len = 0;
val->len = val->buf.size;
} else
- __rec_cell_build_addr(r, addr->addr, addr->size,
+ __rec_cell_build_addr(session, r,
+ addr->addr, addr->size,
__rec_vtype(addr), ref->key.recno);
WT_CHILD_RELEASE_ERR(session, hazard, ref);
@@ -3883,7 +3866,7 @@ __rec_col_merge(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_PAGE *page)
/* Build the value cell. */
addr = &multi->addr;
- __rec_cell_build_addr(r,
+ __rec_cell_build_addr(session, r,
addr->addr, addr->size, __rec_vtype(addr), r->recno);
/* Boundary: split or write the page. */
@@ -4708,7 +4691,7 @@ __rec_row_int(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_PAGE *page)
vtype = state == WT_CHILD_PROXY ?
WT_CELL_ADDR_DEL : (u_int)vpack->raw;
}
- __rec_cell_build_addr(r, p, size, vtype, WT_RECNO_OOB);
+ __rec_cell_build_addr(session, r, p, size, vtype, WT_RECNO_OOB);
WT_CHILD_RELEASE_ERR(session, hazard, ref);
/*
@@ -4794,8 +4777,8 @@ __rec_row_merge(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_PAGE *page)
r->cell_zero = false;
addr = &multi->addr;
- __rec_cell_build_addr(
- r, addr->addr, addr->size, __rec_vtype(addr), WT_RECNO_OOB);
+ __rec_cell_build_addr(session, r,
+ addr->addr, addr->size, __rec_vtype(addr), WT_RECNO_OOB);
/* Boundary: split or write the page. */
if (key->len + val->len > r->space_avail)
@@ -5863,13 +5846,15 @@ __rec_cell_build_leaf_key(WT_SESSION_IMPL *session,
* on the page.
*/
static void
-__rec_cell_build_addr(WT_RECONCILE *r,
+__rec_cell_build_addr(WT_SESSION_IMPL *session, WT_RECONCILE *r,
const void *addr, size_t size, u_int cell_type, uint64_t recno)
{
WT_KV *val;
val = &r->v;
+ WT_ASSERT(session, size != 0 || cell_type == WT_CELL_ADDR_DEL);
+
/*
* We don't check the address size because we can't store an address on
* an overflow page: if the address won't fit, the overflow page's