summaryrefslogtreecommitdiff
path: root/src/third_party/wiredtiger/src/reconcile/rec_write.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/third_party/wiredtiger/src/reconcile/rec_write.c')
-rw-r--r--src/third_party/wiredtiger/src/reconcile/rec_write.c117
1 files changed, 70 insertions, 47 deletions
diff --git a/src/third_party/wiredtiger/src/reconcile/rec_write.c b/src/third_party/wiredtiger/src/reconcile/rec_write.c
index 866eab349b7..a63d3eab361 100644
--- a/src/third_party/wiredtiger/src/reconcile/rec_write.c
+++ b/src/third_party/wiredtiger/src/reconcile/rec_write.c
@@ -295,9 +295,10 @@ static int __rec_cell_build_int_key(WT_SESSION_IMPL *,
static int __rec_cell_build_leaf_key(WT_SESSION_IMPL *,
WT_RECONCILE *, const void *, size_t, bool *);
static int __rec_cell_build_ovfl(WT_SESSION_IMPL *, WT_RECONCILE *,
- WT_KV *, uint8_t, wt_timestamp_t, wt_timestamp_t, uint64_t);
+ WT_KV *, uint8_t, wt_timestamp_t *, wt_timestamp_t *, uint64_t);
static int __rec_cell_build_val(WT_SESSION_IMPL *, WT_RECONCILE *,
- const void *, size_t, wt_timestamp_t, wt_timestamp_t, uint64_t);
+ const void *, size_t, wt_timestamp_t *, wt_timestamp_t *,
+ uint64_t);
static void __rec_cleanup(WT_SESSION_IMPL *, WT_RECONCILE *);
static int __rec_col_fix(WT_SESSION_IMPL *, WT_RECONCILE *, WT_REF *);
static int __rec_col_fix_slvg(WT_SESSION_IMPL *,
@@ -1933,7 +1934,8 @@ __rec_image_copy(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_KV *kv)
*/
static int
__rec_dict_replace(WT_SESSION_IMPL *session, WT_RECONCILE *r,
- wt_timestamp_t start_ts, wt_timestamp_t stop_ts, uint64_t rle, WT_KV *val)
+ wt_timestamp_t *start_tsp, wt_timestamp_t *stop_tsp,
+ uint64_t rle, WT_KV *val)
{
WT_DICTIONARY *dp;
uint64_t offset;
@@ -1976,7 +1978,7 @@ __rec_dict_replace(WT_SESSION_IMPL *session, WT_RECONCILE *r,
offset = (uint64_t)WT_PTRDIFF(r->first_free,
(uint8_t *)r->cur_ptr->image.mem + dp->offset);
val->len = val->cell_len = __wt_cell_pack_copy(
- session, &val->cell, start_ts, stop_ts, rle, offset);
+ session, &val->cell, start_tsp, stop_tsp, rle, offset);
val->buf.data = NULL;
val->buf.size = 0;
}
@@ -3473,18 +3475,21 @@ __wt_bulk_insert_row(WT_SESSION_IMPL *session, WT_CURSOR_BULK *cbulk)
WT_CURSOR *cursor;
WT_KV *key, *val;
WT_RECONCILE *r;
+ wt_timestamp_t start_ts, stop_ts;
bool ovfl_key;
r = cbulk->reconcile;
btree = S2BT(session);
cursor = &cbulk->cbt.iface;
+ start_ts = WT_TS_NONE;
+ stop_ts = WT_TS_MAX;
key = &r->k;
val = &r->v;
WT_RET(__rec_cell_build_leaf_key(session, r, /* Build key cell */
cursor->key.data, cursor->key.size, &ovfl_key));
WT_RET(__rec_cell_build_val(session, r, /* Build value cell */
- cursor->value.data, cursor->value.size, WT_TS_NONE, WT_TS_MAX, 0));
+ cursor->value.data, cursor->value.size, &start_ts, &stop_ts, 0));
/* Boundary: split or write the page. */
if (WT_CROSSING_SPLIT_BND(r, key->len + val->len)) {
@@ -3511,10 +3516,10 @@ __wt_bulk_insert_row(WT_SESSION_IMPL *session, WT_CURSOR_BULK *cbulk)
r->all_empty_value = false;
if (btree->dictionary)
WT_RET(__rec_dict_replace(
- session, r, WT_TS_NONE, WT_TS_MAX, 0, val));
+ session, r, &start_ts, &stop_ts, 0, val));
__rec_image_copy(session, r, val);
}
- __rec_addr_ts_update(r, WT_TS_NONE, WT_TS_NONE, WT_TS_MAX);
+ __rec_addr_ts_update(r, start_ts, start_ts, stop_ts);
/* Update compression state. */
__rec_key_state_update(r, ovfl_key);
@@ -3632,14 +3637,17 @@ __wt_bulk_insert_var(
WT_BTREE *btree;
WT_KV *val;
WT_RECONCILE *r;
+ wt_timestamp_t start_ts, stop_ts;
r = cbulk->reconcile;
btree = S2BT(session);
+ start_ts = WT_TS_NONE;
+ stop_ts = WT_TS_MAX;
val = &r->v;
if (deleted) {
val->cell_len = __wt_cell_pack_del(
- session, &val->cell, WT_TS_NONE, WT_TS_MAX, cbulk->rle);
+ session, &val->cell, &start_ts, &stop_ts, cbulk->rle);
val->buf.data = NULL;
val->buf.size = 0;
val->len = val->cell_len;
@@ -3651,7 +3659,7 @@ __wt_bulk_insert_var(
*/
WT_RET(__rec_cell_build_val(session,
r, cbulk->last.data, cbulk->last.size,
- WT_TS_NONE, WT_TS_MAX, cbulk->rle));
+ &start_ts, &stop_ts, cbulk->rle));
/* Boundary: split or write the page. */
if (WT_CROSSING_SPLIT_BND(r, val->len))
@@ -3660,9 +3668,9 @@ __wt_bulk_insert_var(
/* Copy the value onto the page. */
if (btree->dictionary)
WT_RET(__rec_dict_replace(
- session, r, WT_TS_NONE, WT_TS_MAX, cbulk->rle, val));
+ session, r, &start_ts, &stop_ts, cbulk->rle, val));
__rec_image_copy(session, r, val);
- __rec_addr_ts_update(r, WT_TS_NONE, WT_TS_NONE, WT_TS_MAX);
+ __rec_addr_ts_update(r, start_ts, start_ts, stop_ts);
/* Update the starting record number in case we split. */
r->recno += cbulk->rle;
@@ -4090,19 +4098,19 @@ __rec_col_var_helper(WT_SESSION_IMPL *session, WT_RECONCILE *r,
if (deleted) {
val->cell_len = __wt_cell_pack_del(
- session, &val->cell, start_ts, stop_ts, rle);
+ session, &val->cell, &start_ts, &stop_ts, rle);
val->buf.data = NULL;
val->buf.size = 0;
val->len = val->cell_len;
} else if (overflow_type) {
val->cell_len = __wt_cell_pack_ovfl(session, &val->cell,
- WT_CELL_VALUE_OVFL, start_ts, stop_ts, rle, value->size);
+ WT_CELL_VALUE_OVFL, &start_ts, &stop_ts, rle, value->size);
val->buf.data = value->data;
val->buf.size = value->size;
val->len = val->cell_len + value->size;
} else
WT_RET(__rec_cell_build_val(session,
- r, value->data, value->size, start_ts, stop_ts, rle));
+ r, value->data, value->size, &start_ts, &stop_ts, rle));
/* Boundary: split or write the page. */
if (__rec_need_split(r, val->len))
@@ -4111,7 +4119,7 @@ __rec_col_var_helper(WT_SESSION_IMPL *session, WT_RECONCILE *r,
/* Copy the value onto the page. */
if (!deleted && !overflow_type && btree->dictionary)
WT_RET(__rec_dict_replace(
- session, r, start_ts, stop_ts, rle, val));
+ session, r, &start_ts, &stop_ts, rle, val));
__rec_image_copy(session, r, val);
__rec_addr_ts_update(r, start_ts, start_ts, stop_ts);
@@ -4986,6 +4994,27 @@ __rec_row_merge(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_PAGE *page)
}
/*
+ * __rec_row_zero_len --
+ * Return if a zero-length item can be written.
+ */
+static bool
+__rec_row_zero_len(WT_SESSION_IMPL *session,
+ uint64_t txnid, wt_timestamp_t start_ts, wt_timestamp_t stop_ts)
+{
+ /* Before timestamps were stored on pages, it was always possible. */
+ if (!__wt_process.page_version_ts)
+ return (true);
+
+ /*
+ * The item must be globally visible because we're not writing anything
+ * on the page.
+ */
+ return ((start_ts == WT_TS_NONE ||
+ __wt_txn_visible_all(session, txnid, start_ts)) &&
+ stop_ts == WT_TS_MAX);
+}
+
+/*
* __rec_row_leaf --
* Reconcile a row-store leaf page.
*/
@@ -5110,8 +5139,8 @@ __rec_row_leaf(WT_SESSION_IMPL *session,
p = tmpval->data;
size = tmpval->size;
}
- WT_ERR(__rec_cell_build_val(
- session, r, p, size, start_ts, stop_ts, 0));
+ WT_ERR(__rec_cell_build_val(session,
+ r, p, size, &start_ts, &stop_ts, 0));
dictionary = true;
} else if (vpack->raw == WT_CELL_VALUE_OVFL_RM) {
/*
@@ -5157,7 +5186,7 @@ __rec_row_leaf(WT_SESSION_IMPL *session,
*/
WT_ERR(__rec_cell_build_val(session, r,
"ovfl-unused", strlen("ovfl-unused"),
- start_ts, stop_ts, 0));
+ &start_ts, &stop_ts, 0));
} else {
val->buf.data = vpack->cell;
val->buf.size = __wt_cell_total_len(vpack);
@@ -5185,14 +5214,14 @@ __rec_row_leaf(WT_SESSION_IMPL *session,
WT_ERR(__rec_cell_build_val(session, r,
cbt->iface.value.data,
cbt->iface.value.size,
- start_ts, stop_ts, 0));
+ &start_ts, &stop_ts, 0));
dictionary = true;
break;
case WT_UPDATE_STANDARD:
/* Take the value from the update. */
WT_ERR(__rec_cell_build_val(session, r,
upd->data, upd->size,
- start_ts, stop_ts, 0));
+ &start_ts, &stop_ts, 0));
dictionary = true;
break;
case WT_UPDATE_TOMBSTONE:
@@ -5338,20 +5367,16 @@ build:
session, r, key->len + val->len));
}
- /*
- * Copy the key/value pair onto the page. Zero-length items must
- * be globally visible as we're writing nothing to the page.
- */
+ /* Copy the key/value pair onto the page. */
__rec_image_copy(session, r, key);
if (val->len == 0 &&
- (!__wt_process.page_version_ts ||
- __wt_txn_visible_all(session, txnid, stop_ts)))
+ __rec_row_zero_len(session, txnid, start_ts, stop_ts))
r->any_empty_value = true;
else {
r->all_empty_value = false;
if (dictionary && btree->dictionary)
WT_ERR(__rec_dict_replace(
- session, r, start_ts, stop_ts, 0, val));
+ session, r, &start_ts, &stop_ts, 0, val));
__rec_image_copy(session, r, val);
}
__rec_addr_ts_update(r, start_ts, start_ts, stop_ts);
@@ -5438,12 +5463,12 @@ __rec_row_leaf_insert(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_INSERT *ins)
session, cbt, upd, F_ISSET(r, WT_REC_VISIBLE_ALL)));
WT_RET(__rec_cell_build_val(session, r,
cbt->iface.value.data, cbt->iface.value.size,
- start_ts, stop_ts, 0));
+ &start_ts, &stop_ts, 0));
break;
case WT_UPDATE_STANDARD:
/* Take the value from the update. */
WT_RET(__rec_cell_build_val(session, r,
- upd->data, upd->size, start_ts, stop_ts, 0));
+ upd->data, upd->size, &start_ts, &stop_ts, 0));
break;
case WT_UPDATE_TOMBSTONE:
continue;
@@ -5472,20 +5497,16 @@ __rec_row_leaf_insert(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_INSERT *ins)
session, r, key->len + val->len));
}
- /*
- * Copy the key/value pair onto the page. Zero-length items must
- * be globally visible as we're writing nothing to the page.
- */
+ /* Copy the key/value pair onto the page. */
__rec_image_copy(session, r, key);
if (val->len == 0 &&
- (!__wt_process.page_version_ts &&
- __wt_txn_visible_all(session, txnid, stop_ts)))
+ __rec_row_zero_len(session, txnid, start_ts, stop_ts))
r->any_empty_value = true;
else {
r->all_empty_value = false;
if (btree->dictionary)
WT_RET(__rec_dict_replace(
- session, r, start_ts, stop_ts, 0, val));
+ session, r, &start_ts, &stop_ts, 0, val));
__rec_image_copy(session, r, val);
}
__rec_addr_ts_update(r, start_ts, start_ts, stop_ts);
@@ -5909,12 +5930,13 @@ __rec_cell_build_int_key(WT_SESSION_IMPL *session,
{
WT_BTREE *btree;
WT_KV *key;
+ wt_timestamp_t start_ts, stop_ts;
*is_ovflp = false;
btree = S2BT(session);
-
key = &r->k;
+ start_ts = stop_ts = WT_TS_NONE; /* Keys aren't timestamped. */
/* Copy the bytes into the "current" and key buffers. */
WT_RET(__wt_buf_set(session, r->cur, data, size));
@@ -5926,7 +5948,7 @@ __rec_cell_build_int_key(WT_SESSION_IMPL *session,
*is_ovflp = true;
return (__rec_cell_build_ovfl(session, r,
- key, WT_CELL_KEY_OVFL, WT_TS_NONE, WT_TS_NONE, 0));
+ key, WT_CELL_KEY_OVFL, &start_ts, &stop_ts, 0));
}
key->cell_len = __wt_cell_pack_int_key(&key->cell, key->buf.size);
@@ -5946,6 +5968,7 @@ __rec_cell_build_leaf_key(WT_SESSION_IMPL *session,
{
WT_BTREE *btree;
WT_KV *key;
+ wt_timestamp_t start_ts, stop_ts;
size_t pfx_max;
const uint8_t *a, *b;
uint8_t pfx;
@@ -5953,7 +5976,7 @@ __rec_cell_build_leaf_key(WT_SESSION_IMPL *session,
*is_ovflp = false;
btree = S2BT(session);
-
+ start_ts = stop_ts = WT_TS_NONE; /* Keys aren't timestamped. */
key = &r->k;
pfx = 0;
@@ -6026,7 +6049,7 @@ __rec_cell_build_leaf_key(WT_SESSION_IMPL *session,
*is_ovflp = true;
return (__rec_cell_build_ovfl(session, r, key,
- WT_CELL_KEY_OVFL, WT_TS_NONE, WT_TS_NONE, 0));
+ WT_CELL_KEY_OVFL, &start_ts, &stop_ts, 0));
}
return (
__rec_cell_build_leaf_key(session, r, NULL, 0, is_ovflp));
@@ -6104,7 +6127,7 @@ __rec_cell_build_addr(WT_SESSION_IMPL *session,
static int
__rec_cell_build_val(WT_SESSION_IMPL *session, WT_RECONCILE *r,
const void *data, size_t size,
- wt_timestamp_t start_ts, wt_timestamp_t stop_ts, uint64_t rle)
+ wt_timestamp_t *start_tsp, wt_timestamp_t *stop_tsp, uint64_t rle)
{
WT_BTREE *btree;
WT_KV *val;
@@ -6133,11 +6156,11 @@ __rec_cell_build_val(WT_SESSION_IMPL *session, WT_RECONCILE *r,
WT_STAT_DATA_INCR(session, rec_overflow_value);
return (__rec_cell_build_ovfl(session, r,
- val, WT_CELL_VALUE_OVFL, start_ts, stop_ts, rle));
+ val, WT_CELL_VALUE_OVFL, start_tsp, stop_tsp, rle));
}
}
- val->cell_len = __wt_cell_pack_data(
- session, &val->cell, start_ts, stop_ts, rle, val->buf.size);
+ val->cell_len = __wt_cell_pack_value(
+ session, &val->cell, start_tsp, stop_tsp, rle, val->buf.size);
val->len = val->cell_len + val->buf.size;
return (0);
@@ -6150,7 +6173,7 @@ __rec_cell_build_val(WT_SESSION_IMPL *session, WT_RECONCILE *r,
static int
__rec_cell_build_ovfl(WT_SESSION_IMPL *session,
WT_RECONCILE *r, WT_KV *kv, uint8_t type,
- wt_timestamp_t start_ts, wt_timestamp_t stop_ts, uint64_t rle)
+ wt_timestamp_t *start_tsp, wt_timestamp_t *stop_tsp, uint64_t rle)
{
WT_BM *bm;
WT_BTREE *btree;
@@ -6210,7 +6233,7 @@ __rec_cell_build_ovfl(WT_SESSION_IMPL *session,
/* Build the cell and return. */
kv->cell_len = __wt_cell_pack_ovfl(
- session, &kv->cell, type, start_ts, stop_ts, rle, kv->buf.size);
+ session, &kv->cell, type, start_tsp, stop_tsp, rle, kv->buf.size);
kv->len = kv->cell_len + kv->buf.size;
err: __wt_scr_free(session, &tmp);
@@ -6373,7 +6396,7 @@ __rec_dictionary_lookup(
hash = __wt_hash_fnv64(val->buf.data, val->buf.size);
for (dp = __rec_dictionary_skip_search(r->dictionary_head, hash);
dp != NULL && dp->hash == hash; dp = dp->next[0]) {
- WT_RET(__wt_cell_pack_data_match(
+ WT_RET(__wt_cell_pack_value_match(
(WT_CELL *)((uint8_t *)r->cur_ptr->image.mem + dp->offset),
&val->cell, val->buf.data, &match));
if (match) {