diff options
Diffstat (limited to 'src/third_party/wiredtiger/src/reconcile/rec_write.c')
-rw-r--r-- | src/third_party/wiredtiger/src/reconcile/rec_write.c | 117 |
1 files changed, 70 insertions, 47 deletions
diff --git a/src/third_party/wiredtiger/src/reconcile/rec_write.c b/src/third_party/wiredtiger/src/reconcile/rec_write.c index 866eab349b7..a63d3eab361 100644 --- a/src/third_party/wiredtiger/src/reconcile/rec_write.c +++ b/src/third_party/wiredtiger/src/reconcile/rec_write.c @@ -295,9 +295,10 @@ static int __rec_cell_build_int_key(WT_SESSION_IMPL *, static int __rec_cell_build_leaf_key(WT_SESSION_IMPL *, WT_RECONCILE *, const void *, size_t, bool *); static int __rec_cell_build_ovfl(WT_SESSION_IMPL *, WT_RECONCILE *, - WT_KV *, uint8_t, wt_timestamp_t, wt_timestamp_t, uint64_t); + WT_KV *, uint8_t, wt_timestamp_t *, wt_timestamp_t *, uint64_t); static int __rec_cell_build_val(WT_SESSION_IMPL *, WT_RECONCILE *, - const void *, size_t, wt_timestamp_t, wt_timestamp_t, uint64_t); + const void *, size_t, wt_timestamp_t *, wt_timestamp_t *, + uint64_t); static void __rec_cleanup(WT_SESSION_IMPL *, WT_RECONCILE *); static int __rec_col_fix(WT_SESSION_IMPL *, WT_RECONCILE *, WT_REF *); static int __rec_col_fix_slvg(WT_SESSION_IMPL *, @@ -1933,7 +1934,8 @@ __rec_image_copy(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_KV *kv) */ static int __rec_dict_replace(WT_SESSION_IMPL *session, WT_RECONCILE *r, - wt_timestamp_t start_ts, wt_timestamp_t stop_ts, uint64_t rle, WT_KV *val) + wt_timestamp_t *start_tsp, wt_timestamp_t *stop_tsp, + uint64_t rle, WT_KV *val) { WT_DICTIONARY *dp; uint64_t offset; @@ -1976,7 +1978,7 @@ __rec_dict_replace(WT_SESSION_IMPL *session, WT_RECONCILE *r, offset = (uint64_t)WT_PTRDIFF(r->first_free, (uint8_t *)r->cur_ptr->image.mem + dp->offset); val->len = val->cell_len = __wt_cell_pack_copy( - session, &val->cell, start_ts, stop_ts, rle, offset); + session, &val->cell, start_tsp, stop_tsp, rle, offset); val->buf.data = NULL; val->buf.size = 0; } @@ -3473,18 +3475,21 @@ __wt_bulk_insert_row(WT_SESSION_IMPL *session, WT_CURSOR_BULK *cbulk) WT_CURSOR *cursor; WT_KV *key, *val; WT_RECONCILE *r; + wt_timestamp_t start_ts, stop_ts; bool ovfl_key; r = cbulk->reconcile; btree = S2BT(session); cursor = &cbulk->cbt.iface; + start_ts = WT_TS_NONE; + stop_ts = WT_TS_MAX; key = &r->k; val = &r->v; WT_RET(__rec_cell_build_leaf_key(session, r, /* Build key cell */ cursor->key.data, cursor->key.size, &ovfl_key)); WT_RET(__rec_cell_build_val(session, r, /* Build value cell */ - cursor->value.data, cursor->value.size, WT_TS_NONE, WT_TS_MAX, 0)); + cursor->value.data, cursor->value.size, &start_ts, &stop_ts, 0)); /* Boundary: split or write the page. */ if (WT_CROSSING_SPLIT_BND(r, key->len + val->len)) { @@ -3511,10 +3516,10 @@ __wt_bulk_insert_row(WT_SESSION_IMPL *session, WT_CURSOR_BULK *cbulk) r->all_empty_value = false; if (btree->dictionary) WT_RET(__rec_dict_replace( - session, r, WT_TS_NONE, WT_TS_MAX, 0, val)); + session, r, &start_ts, &stop_ts, 0, val)); __rec_image_copy(session, r, val); } - __rec_addr_ts_update(r, WT_TS_NONE, WT_TS_NONE, WT_TS_MAX); + __rec_addr_ts_update(r, start_ts, start_ts, stop_ts); /* Update compression state. */ __rec_key_state_update(r, ovfl_key); @@ -3632,14 +3637,17 @@ __wt_bulk_insert_var( WT_BTREE *btree; WT_KV *val; WT_RECONCILE *r; + wt_timestamp_t start_ts, stop_ts; r = cbulk->reconcile; btree = S2BT(session); + start_ts = WT_TS_NONE; + stop_ts = WT_TS_MAX; val = &r->v; if (deleted) { val->cell_len = __wt_cell_pack_del( - session, &val->cell, WT_TS_NONE, WT_TS_MAX, cbulk->rle); + session, &val->cell, &start_ts, &stop_ts, cbulk->rle); val->buf.data = NULL; val->buf.size = 0; val->len = val->cell_len; @@ -3651,7 +3659,7 @@ __wt_bulk_insert_var( */ WT_RET(__rec_cell_build_val(session, r, cbulk->last.data, cbulk->last.size, - WT_TS_NONE, WT_TS_MAX, cbulk->rle)); + &start_ts, &stop_ts, cbulk->rle)); /* Boundary: split or write the page. */ if (WT_CROSSING_SPLIT_BND(r, val->len)) @@ -3660,9 +3668,9 @@ __wt_bulk_insert_var( /* Copy the value onto the page. */ if (btree->dictionary) WT_RET(__rec_dict_replace( - session, r, WT_TS_NONE, WT_TS_MAX, cbulk->rle, val)); + session, r, &start_ts, &stop_ts, cbulk->rle, val)); __rec_image_copy(session, r, val); - __rec_addr_ts_update(r, WT_TS_NONE, WT_TS_NONE, WT_TS_MAX); + __rec_addr_ts_update(r, start_ts, start_ts, stop_ts); /* Update the starting record number in case we split. */ r->recno += cbulk->rle; @@ -4090,19 +4098,19 @@ __rec_col_var_helper(WT_SESSION_IMPL *session, WT_RECONCILE *r, if (deleted) { val->cell_len = __wt_cell_pack_del( - session, &val->cell, start_ts, stop_ts, rle); + session, &val->cell, &start_ts, &stop_ts, rle); val->buf.data = NULL; val->buf.size = 0; val->len = val->cell_len; } else if (overflow_type) { val->cell_len = __wt_cell_pack_ovfl(session, &val->cell, - WT_CELL_VALUE_OVFL, start_ts, stop_ts, rle, value->size); + WT_CELL_VALUE_OVFL, &start_ts, &stop_ts, rle, value->size); val->buf.data = value->data; val->buf.size = value->size; val->len = val->cell_len + value->size; } else WT_RET(__rec_cell_build_val(session, - r, value->data, value->size, start_ts, stop_ts, rle)); + r, value->data, value->size, &start_ts, &stop_ts, rle)); /* Boundary: split or write the page. */ if (__rec_need_split(r, val->len)) @@ -4111,7 +4119,7 @@ __rec_col_var_helper(WT_SESSION_IMPL *session, WT_RECONCILE *r, /* Copy the value onto the page. */ if (!deleted && !overflow_type && btree->dictionary) WT_RET(__rec_dict_replace( - session, r, start_ts, stop_ts, rle, val)); + session, r, &start_ts, &stop_ts, rle, val)); __rec_image_copy(session, r, val); __rec_addr_ts_update(r, start_ts, start_ts, stop_ts); @@ -4986,6 +4994,27 @@ __rec_row_merge(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_PAGE *page) } /* + * __rec_row_zero_len -- + * Return if a zero-length item can be written. + */ +static bool +__rec_row_zero_len(WT_SESSION_IMPL *session, + uint64_t txnid, wt_timestamp_t start_ts, wt_timestamp_t stop_ts) +{ + /* Before timestamps were stored on pages, it was always possible. */ + if (!__wt_process.page_version_ts) + return (true); + + /* + * The item must be globally visible because we're not writing anything + * on the page. + */ + return ((start_ts == WT_TS_NONE || + __wt_txn_visible_all(session, txnid, start_ts)) && + stop_ts == WT_TS_MAX); +} + +/* * __rec_row_leaf -- * Reconcile a row-store leaf page. */ @@ -5110,8 +5139,8 @@ __rec_row_leaf(WT_SESSION_IMPL *session, p = tmpval->data; size = tmpval->size; } - WT_ERR(__rec_cell_build_val( - session, r, p, size, start_ts, stop_ts, 0)); + WT_ERR(__rec_cell_build_val(session, + r, p, size, &start_ts, &stop_ts, 0)); dictionary = true; } else if (vpack->raw == WT_CELL_VALUE_OVFL_RM) { /* @@ -5157,7 +5186,7 @@ __rec_row_leaf(WT_SESSION_IMPL *session, */ WT_ERR(__rec_cell_build_val(session, r, "ovfl-unused", strlen("ovfl-unused"), - start_ts, stop_ts, 0)); + &start_ts, &stop_ts, 0)); } else { val->buf.data = vpack->cell; val->buf.size = __wt_cell_total_len(vpack); @@ -5185,14 +5214,14 @@ __rec_row_leaf(WT_SESSION_IMPL *session, WT_ERR(__rec_cell_build_val(session, r, cbt->iface.value.data, cbt->iface.value.size, - start_ts, stop_ts, 0)); + &start_ts, &stop_ts, 0)); dictionary = true; break; case WT_UPDATE_STANDARD: /* Take the value from the update. */ WT_ERR(__rec_cell_build_val(session, r, upd->data, upd->size, - start_ts, stop_ts, 0)); + &start_ts, &stop_ts, 0)); dictionary = true; break; case WT_UPDATE_TOMBSTONE: @@ -5338,20 +5367,16 @@ build: session, r, key->len + val->len)); } - /* - * Copy the key/value pair onto the page. Zero-length items must - * be globally visible as we're writing nothing to the page. - */ + /* Copy the key/value pair onto the page. */ __rec_image_copy(session, r, key); if (val->len == 0 && - (!__wt_process.page_version_ts || - __wt_txn_visible_all(session, txnid, stop_ts))) + __rec_row_zero_len(session, txnid, start_ts, stop_ts)) r->any_empty_value = true; else { r->all_empty_value = false; if (dictionary && btree->dictionary) WT_ERR(__rec_dict_replace( - session, r, start_ts, stop_ts, 0, val)); + session, r, &start_ts, &stop_ts, 0, val)); __rec_image_copy(session, r, val); } __rec_addr_ts_update(r, start_ts, start_ts, stop_ts); @@ -5438,12 +5463,12 @@ __rec_row_leaf_insert(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_INSERT *ins) session, cbt, upd, F_ISSET(r, WT_REC_VISIBLE_ALL))); WT_RET(__rec_cell_build_val(session, r, cbt->iface.value.data, cbt->iface.value.size, - start_ts, stop_ts, 0)); + &start_ts, &stop_ts, 0)); break; case WT_UPDATE_STANDARD: /* Take the value from the update. */ WT_RET(__rec_cell_build_val(session, r, - upd->data, upd->size, start_ts, stop_ts, 0)); + upd->data, upd->size, &start_ts, &stop_ts, 0)); break; case WT_UPDATE_TOMBSTONE: continue; @@ -5472,20 +5497,16 @@ __rec_row_leaf_insert(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_INSERT *ins) session, r, key->len + val->len)); } - /* - * Copy the key/value pair onto the page. Zero-length items must - * be globally visible as we're writing nothing to the page. - */ + /* Copy the key/value pair onto the page. */ __rec_image_copy(session, r, key); if (val->len == 0 && - (!__wt_process.page_version_ts && - __wt_txn_visible_all(session, txnid, stop_ts))) + __rec_row_zero_len(session, txnid, start_ts, stop_ts)) r->any_empty_value = true; else { r->all_empty_value = false; if (btree->dictionary) WT_RET(__rec_dict_replace( - session, r, start_ts, stop_ts, 0, val)); + session, r, &start_ts, &stop_ts, 0, val)); __rec_image_copy(session, r, val); } __rec_addr_ts_update(r, start_ts, start_ts, stop_ts); @@ -5909,12 +5930,13 @@ __rec_cell_build_int_key(WT_SESSION_IMPL *session, { WT_BTREE *btree; WT_KV *key; + wt_timestamp_t start_ts, stop_ts; *is_ovflp = false; btree = S2BT(session); - key = &r->k; + start_ts = stop_ts = WT_TS_NONE; /* Keys aren't timestamped. */ /* Copy the bytes into the "current" and key buffers. */ WT_RET(__wt_buf_set(session, r->cur, data, size)); @@ -5926,7 +5948,7 @@ __rec_cell_build_int_key(WT_SESSION_IMPL *session, *is_ovflp = true; return (__rec_cell_build_ovfl(session, r, - key, WT_CELL_KEY_OVFL, WT_TS_NONE, WT_TS_NONE, 0)); + key, WT_CELL_KEY_OVFL, &start_ts, &stop_ts, 0)); } key->cell_len = __wt_cell_pack_int_key(&key->cell, key->buf.size); @@ -5946,6 +5968,7 @@ __rec_cell_build_leaf_key(WT_SESSION_IMPL *session, { WT_BTREE *btree; WT_KV *key; + wt_timestamp_t start_ts, stop_ts; size_t pfx_max; const uint8_t *a, *b; uint8_t pfx; @@ -5953,7 +5976,7 @@ __rec_cell_build_leaf_key(WT_SESSION_IMPL *session, *is_ovflp = false; btree = S2BT(session); - + start_ts = stop_ts = WT_TS_NONE; /* Keys aren't timestamped. */ key = &r->k; pfx = 0; @@ -6026,7 +6049,7 @@ __rec_cell_build_leaf_key(WT_SESSION_IMPL *session, *is_ovflp = true; return (__rec_cell_build_ovfl(session, r, key, - WT_CELL_KEY_OVFL, WT_TS_NONE, WT_TS_NONE, 0)); + WT_CELL_KEY_OVFL, &start_ts, &stop_ts, 0)); } return ( __rec_cell_build_leaf_key(session, r, NULL, 0, is_ovflp)); @@ -6104,7 +6127,7 @@ __rec_cell_build_addr(WT_SESSION_IMPL *session, static int __rec_cell_build_val(WT_SESSION_IMPL *session, WT_RECONCILE *r, const void *data, size_t size, - wt_timestamp_t start_ts, wt_timestamp_t stop_ts, uint64_t rle) + wt_timestamp_t *start_tsp, wt_timestamp_t *stop_tsp, uint64_t rle) { WT_BTREE *btree; WT_KV *val; @@ -6133,11 +6156,11 @@ __rec_cell_build_val(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_STAT_DATA_INCR(session, rec_overflow_value); return (__rec_cell_build_ovfl(session, r, - val, WT_CELL_VALUE_OVFL, start_ts, stop_ts, rle)); + val, WT_CELL_VALUE_OVFL, start_tsp, stop_tsp, rle)); } } - val->cell_len = __wt_cell_pack_data( - session, &val->cell, start_ts, stop_ts, rle, val->buf.size); + val->cell_len = __wt_cell_pack_value( + session, &val->cell, start_tsp, stop_tsp, rle, val->buf.size); val->len = val->cell_len + val->buf.size; return (0); @@ -6150,7 +6173,7 @@ __rec_cell_build_val(WT_SESSION_IMPL *session, WT_RECONCILE *r, static int __rec_cell_build_ovfl(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_KV *kv, uint8_t type, - wt_timestamp_t start_ts, wt_timestamp_t stop_ts, uint64_t rle) + wt_timestamp_t *start_tsp, wt_timestamp_t *stop_tsp, uint64_t rle) { WT_BM *bm; WT_BTREE *btree; @@ -6210,7 +6233,7 @@ __rec_cell_build_ovfl(WT_SESSION_IMPL *session, /* Build the cell and return. */ kv->cell_len = __wt_cell_pack_ovfl( - session, &kv->cell, type, start_ts, stop_ts, rle, kv->buf.size); + session, &kv->cell, type, start_tsp, stop_tsp, rle, kv->buf.size); kv->len = kv->cell_len + kv->buf.size; err: __wt_scr_free(session, &tmp); @@ -6373,7 +6396,7 @@ __rec_dictionary_lookup( hash = __wt_hash_fnv64(val->buf.data, val->buf.size); for (dp = __rec_dictionary_skip_search(r->dictionary_head, hash); dp != NULL && dp->hash == hash; dp = dp->next[0]) { - WT_RET(__wt_cell_pack_data_match( + WT_RET(__wt_cell_pack_value_match( (WT_CELL *)((uint8_t *)r->cur_ptr->image.mem + dp->offset), &val->cell, val->buf.data, &match)); if (match) { |