diff options
author | Michael Cahill <michael.cahill@mongodb.com> | 2015-11-02 11:55:14 +1100 |
---|---|---|
committer | Michael Cahill <michael.cahill@mongodb.com> | 2015-11-02 11:55:14 +1100 |
commit | fb6ebe75207c3221314ed318595489a838ef1db0 (patch) | |
tree | 6b9b210b15f9b9685b9a5dd707001297127ee1d3 /src/third_party/wiredtiger/src/reconcile | |
parent | 4fbfa13ec0f819080a35ed8b528a030797e483a6 (diff) | |
download | mongo-fb6ebe75207c3221314ed318595489a838ef1db0.tar.gz |
Import wiredtiger-wiredtiger-mongodb-3.2.0-rc1-194-g0dc3f20.tar.gz from wiredtiger branch mongodb-3.2
Diffstat (limited to 'src/third_party/wiredtiger/src/reconcile')
-rw-r--r-- | src/third_party/wiredtiger/src/reconcile/rec_write.c | 270 |
1 files changed, 139 insertions, 131 deletions
diff --git a/src/third_party/wiredtiger/src/reconcile/rec_write.c b/src/third_party/wiredtiger/src/reconcile/rec_write.c index 40917bebf56..965f798e820 100644 --- a/src/third_party/wiredtiger/src/reconcile/rec_write.c +++ b/src/third_party/wiredtiger/src/reconcile/rec_write.c @@ -25,7 +25,7 @@ typedef struct { WT_PAGE *page; uint32_t flags; /* Caller's configuration */ - WT_ITEM dsk; /* Temporary disk-image buffer */ + WT_ITEM disk_image; /* Temporary disk-image buffer */ /* * Track start/stop write generation to decide if all changes to the @@ -40,9 +40,7 @@ typedef struct { uint64_t orig_btree_checkpoint_gen; uint64_t orig_txn_checkpoint_gen; - /* - * Track maximum transaction ID seen and first unwritten transaction ID. - */ + /* Track the page's maximum transaction ID. */ uint64_t max_txn; /* @@ -161,7 +159,7 @@ typedef struct { WT_ADDR addr; /* Split's written location */ uint32_t size; /* Split's size */ uint32_t cksum; /* Split's checksum */ - void *dsk; /* Split's disk image */ + void *disk_image; /* Split's disk image */ /* * Saved update list, supporting the WT_EVICT_UPDATE_RESTORE and @@ -450,12 +448,16 @@ __wt_reconcile(WT_SESSION_IMPL *session, } /* - * Clean up the boundary structures: some workloads result in millions - * of these structures, and if associated with some random session that - * got roped into doing forced eviction, they won't be discarded for the - * life of the session. + * Clean up reconciliation resources: some workloads have millions of + * boundary structures, and if associated with an application session + * pulled into doing forced eviction, they won't be discarded for the + * life of the session (or until session.reset is called). Discard all + * of the reconciliation resources if an application thread, not doing + * a checkpoint. */ - __rec_bnd_cleanup(session, r, false); + __rec_bnd_cleanup(session, r, + F_ISSET(session, WT_SESSION_INTERNAL) || + WT_SESSION_IS_CHECKPOINT(session) ? false : true); WT_RET(ret); @@ -619,7 +621,6 @@ __rec_root_write(WT_SESSION_IMPL *session, WT_PAGE *page, uint32_t flags) switch (mod->rec_result) { case WT_PM_REC_EMPTY: /* Page is empty */ case WT_PM_REC_REPLACE: /* 1-for-1 page swap */ - case WT_PM_REC_REWRITE: /* Rewrite */ return (0); case WT_PM_REC_MULTIBLOCK: /* Multiple blocks */ break; @@ -647,6 +648,12 @@ __rec_root_write(WT_SESSION_IMPL *session, WT_PAGE *page, uint32_t flags) WT_INTL_INDEX_GET(session, next, pindex); for (i = 0; i < mod->mod_multi_entries; ++i) { + /* + * There's special error handling required when re-instantiating + * pages in memory; it's not needed here, asserted for safety. + */ + WT_ASSERT(session, mod->mod_multi[i].supd == NULL); + WT_ERR(__wt_multi_to_ref(session, next, &mod->mod_multi[i], &pindex->index[i], NULL)); pindex->index[i]->home = next; @@ -751,7 +758,7 @@ __rec_write_init(WT_SESSION_IMPL *session, r->last = &r->_last; /* Disk buffers need to be aligned for writing. */ - F_SET(&r->dsk, WT_ITEM_ALIGNED); + F_SET(&r->disk_image, WT_ITEM_ALIGNED); } /* Reconciliation is not re-entrant, make sure that doesn't happen. */ @@ -809,6 +816,9 @@ __rec_write_init(WT_SESSION_IMPL *session, } r->flags = flags; + /* Track the page's maximum transaction ID. */ + r->max_txn = WT_TXN_NONE; + /* Track if the page can be marked clean. */ r->leave_dirty = false; @@ -890,7 +900,7 @@ __rec_destroy(WT_SESSION_IMPL *session, void *reconcilep) return; *(WT_RECONCILE **)reconcilep = NULL; - __wt_buf_free(session, &r->dsk); + __wt_buf_free(session, &r->disk_image); __wt_free(session, r->raw_entries); __wt_free(session, r->raw_offsets); @@ -945,14 +955,15 @@ __rec_bnd_cleanup(WT_SESSION_IMPL *session, WT_RECONCILE *r, bool destroy) * * During some big-page evictions we have seen boundary arrays that have * millions of elements. That should not be a normal event, but if the - * memory is associated with a random session, it won't be discarded - * until the session is closed. If there are more than 10,000 boundary - * structure elements, destroy the boundary array and we'll start over. + * memory is associated with a random application session, it won't be + * discarded until the session is closed or reset. If there are more + * than 10,000 boundary structure elements, discard the boundary array + * entirely and start over next time. */ if (destroy || r->bnd_entries > 10 * 1000) { for (bnd = r->bnd, i = 0; i < r->bnd_entries; ++bnd, ++i) { __wt_free(session, bnd->addr.addr); - __wt_free(session, bnd->dsk); + __wt_free(session, bnd->disk_image); __wt_free(session, bnd->supd); __wt_buf_free(session, &bnd->key); } @@ -973,7 +984,7 @@ __rec_bnd_cleanup(WT_SESSION_IMPL *session, WT_RECONCILE *r, bool destroy) ++last_used; for (bnd = r->bnd, i = 0; i < last_used; ++bnd, ++i) { __wt_free(session, bnd->addr.addr); - __wt_free(session, bnd->dsk); + __wt_free(session, bnd->disk_image); __wt_free(session, bnd->supd); } } @@ -1436,7 +1447,7 @@ __rec_child_deleted(WT_SESSION_IMPL *session, * If there are deleted child pages we can't discard immediately, keep * the page dirty so they are eventually freed. */ - r->leave_dirty = 1; + r->leave_dirty = true; /* * If the original page cannot be freed, we need to keep a slot on the @@ -1631,8 +1642,8 @@ __rec_incr(WT_SESSION_IMPL *session, WT_RECONCILE *r, uint32_t v, size_t size) * for overflow in diagnostic mode. */ WT_ASSERT(session, r->space_avail >= size); - WT_ASSERT(session, - WT_BLOCK_FITS(r->first_free, size, r->dsk.mem, r->dsk.memsize)); + WT_ASSERT(session, WT_BLOCK_FITS( + r->first_free, size, r->disk_image.mem, r->disk_image.memsize)); r->entries += v; r->space_avail -= size; @@ -1854,7 +1865,7 @@ __rec_split_bnd_init(WT_SESSION_IMPL *session, WT_BOUNDARY *bnd) WT_CLEAR(bnd->addr); bnd->size = 0; bnd->cksum = 0; - __wt_free(session, bnd->dsk); + __wt_free(session, bnd->disk_image); __wt_free(session, bnd->supd); bnd->supd_next = 0; @@ -1967,14 +1978,14 @@ __rec_split_init(WT_SESSION_IMPL *session, */ corrected_page_size = r->page_size; WT_RET(bm->write_size(bm, session, &corrected_page_size)); - WT_RET(__wt_buf_init(session, &r->dsk, corrected_page_size)); + WT_RET(__wt_buf_init(session, &r->disk_image, corrected_page_size)); /* * Clear the disk page's header and block-manager space, set the page * type (the type doesn't change, and setting it later would require * additional code in a few different places). */ - dsk = r->dsk.mem; + dsk = r->disk_image.mem; memset(dsk, 0, WT_PAGE_HEADER_BYTE_SIZE(btree)); dsk->type = page->type; @@ -2253,11 +2264,11 @@ __rec_split_grow(WT_SESSION_IMPL *session, WT_RECONCILE *r, size_t add_len) btree = S2BT(session); bm = btree->bm; - len = WT_PTRDIFF(r->first_free, r->dsk.mem); + len = WT_PTRDIFF(r->first_free, r->disk_image.mem); corrected_page_size = len + add_len; WT_RET(bm->write_size(bm, session, &corrected_page_size)); - WT_RET(__wt_buf_grow(session, &r->dsk, corrected_page_size)); - r->first_free = (uint8_t *)r->dsk.mem + len; + WT_RET(__wt_buf_grow(session, &r->disk_image, corrected_page_size)); + r->first_free = (uint8_t *)r->disk_image.mem + len; WT_ASSERT(session, corrected_page_size >= len); r->space_avail = corrected_page_size - len; WT_ASSERT(session, r->space_avail >= add_len); @@ -2278,7 +2289,7 @@ __rec_split(WT_SESSION_IMPL *session, WT_RECONCILE *r, size_t next_len) size_t inuse; btree = S2BT(session); - dsk = r->dsk.mem; + dsk = r->disk_image.mem; /* * We should never split during salvage, and we're about to drop core @@ -2410,8 +2421,10 @@ __rec_split(WT_SESSION_IMPL *session, WT_RECONCILE *r, size_t next_len) /* Finalize the header information and write the page. */ dsk->recno = last->recno; dsk->u.entries = r->entries; - dsk->mem_size = r->dsk.size = WT_PTRDIFF32(r->first_free, dsk); - WT_RET(__rec_split_write(session, r, last, &r->dsk, false)); + dsk->mem_size = + r->disk_image.size = WT_PTRDIFF32(r->first_free, dsk); + WT_RET( + __rec_split_write(session, r, last, &r->disk_image, false)); /* * Set the caller's entry count and buffer information for the @@ -2475,7 +2488,7 @@ __rec_split_raw_worker(WT_SESSION_IMPL *session, unpack = &_unpack; compressor = btree->compressor; dst = &r->raw_destination; - dsk = r->dsk.mem; + dsk = r->disk_image.mem; WT_RET(__rec_split_bnd_grow(session, r)); last = &r->bnd[r->bnd_next]; @@ -2751,7 +2764,7 @@ no_slots: r->first_free = dsk_start + len; r->space_avail += r->raw_offsets[result_slots]; WT_ASSERT(session, r->first_free + r->space_avail <= - (uint8_t *)r->dsk.mem + r->dsk.memsize); + (uint8_t *)r->disk_image.mem + r->disk_image.memsize); /* * Set the key for the next block (before writing the block, a @@ -2788,14 +2801,15 @@ no_slots: WT_STAT_FAST_DATA_INCR(session, compress_raw_fail); dsk->recno = last->recno; - dsk->mem_size = r->dsk.size = WT_PTRDIFF32(r->first_free, dsk); + dsk->mem_size = + r->disk_image.size = WT_PTRDIFF32(r->first_free, dsk); dsk->u.entries = r->entries; r->entries = 0; r->first_free = WT_PAGE_HEADER_BYTE(btree, dsk); r->space_avail = r->page_size - WT_PAGE_HEADER_BYTE_SIZE(btree); - write_ref = &r->dsk; + write_ref = &r->disk_image; last->already_compressed = false; } else { /* @@ -2823,7 +2837,7 @@ no_slots: last_block && __rec_is_checkpoint(session, r, last)) { if (write_ref == dst) WT_RET(__wt_buf_set( - session, &r->dsk, dst->mem, dst->size)); + session, &r->disk_image, dst->mem, dst->size)); } else WT_RET( __rec_split_write(session, r, last, write_ref, last_block)); @@ -2966,14 +2980,14 @@ __rec_split_finish_std(WT_SESSION_IMPL *session, WT_RECONCILE *r) bnd->entries = r->entries; /* Finalize the header information. */ - dsk = r->dsk.mem; + dsk = r->disk_image.mem; dsk->recno = bnd->recno; dsk->u.entries = r->entries; - dsk->mem_size = r->dsk.size = WT_PTRDIFF32(r->first_free, dsk); + dsk->mem_size = r->disk_image.size = WT_PTRDIFF32(r->first_free, dsk); /* If this is a checkpoint, we're done, otherwise write the page. */ return (__rec_is_checkpoint(session, r, bnd) ? - 0 : __rec_split_write(session, r, bnd, &r->dsk, true)); + 0 : __rec_split_write(session, r, bnd, &r->disk_image, true)); } /* @@ -3023,9 +3037,9 @@ __rec_split_fixup(WT_SESSION_IMPL *session, WT_RECONCILE *r) * WT_PAGE_HEADER header onto the scratch buffer, most of the header * information remains unchanged between the pages. */ - WT_RET(__wt_scr_alloc(session, r->dsk.memsize, &tmp)); + WT_RET(__wt_scr_alloc(session, r->disk_image.memsize, &tmp)); dsk = tmp->mem; - memcpy(dsk, r->dsk.mem, WT_PAGE_HEADER_SIZE); + memcpy(dsk, r->disk_image.mem, WT_PAGE_HEADER_SIZE); /* * For each split chunk we've created, update the disk image and copy @@ -3035,7 +3049,8 @@ __rec_split_fixup(WT_SESSION_IMPL *session, WT_RECONCILE *r) for (i = 0, bnd = r->bnd; i < r->bnd_next; ++i, ++bnd) { /* Copy the page contents to the temporary buffer. */ len = (bnd + 1)->offset - bnd->offset; - memcpy(dsk_start, (uint8_t *)r->dsk.mem + bnd->offset, len); + memcpy(dsk_start, + (uint8_t *)r->disk_image.mem + bnd->offset, len); /* Finalize the header information and write the page. */ dsk->recno = bnd->recno; @@ -3060,12 +3075,12 @@ __rec_split_fixup(WT_SESSION_IMPL *session, WT_RECONCILE *r) * chunk, including header, because if there was room for that large a * remnant, we wouldn't have switched from accumulating to a page end. */ - p = (uint8_t *)r->dsk.mem + bnd->offset; + p = (uint8_t *)r->disk_image.mem + bnd->offset; len = WT_PTRDIFF(r->first_free, p); if (len >= r->split_size - WT_PAGE_HEADER_BYTE_SIZE(btree)) WT_PANIC_ERR(session, EINVAL, "Reconciliation remnant too large for the split buffer"); - dsk = r->dsk.mem; + dsk = r->disk_image.mem; dsk_start = WT_PAGE_HEADER_BYTE(btree, dsk); (void)memmove(dsk_start, p, len); @@ -3208,13 +3223,17 @@ supd_check_complete: } /* - * If using the save/restore eviction path and we had to skip updates in - * order to build this disk image, we can't actually write it. Instead, - * we will re-instantiate the page using the disk image and the list of - * updates we skipped. + * If configured for an in-memory database, or using the save/restore + * eviction path and we had to skip updates in order to build this disk + * image, we can't actually write it. Instead, we will re-instantiate + * the page using the disk image and any list of updates we skipped. */ - if (F_ISSET(r, WT_EVICT_UPDATE_RESTORE) && bnd->supd != NULL) { - r->cache_write_restore = true; + if (F_ISSET(r, WT_EVICT_IN_MEMORY) || + (F_ISSET(r, WT_EVICT_UPDATE_RESTORE) && bnd->supd != NULL)) { + + /* Statistics tracking that we used update/restore. */ + if (F_ISSET(r, WT_EVICT_UPDATE_RESTORE) && bnd->supd != NULL) + r->cache_write_restore = true; /* * If the buffer is compressed (raw compression was configured), @@ -3228,10 +3247,10 @@ supd_check_complete: */ if (bnd->already_compressed) WT_ERR(__rec_raw_decompress( - session, buf->data, buf->size, &bnd->dsk)); + session, buf->data, buf->size, &bnd->disk_image)); else { WT_ERR(__wt_strndup( - session, buf->data, buf->size, &bnd->dsk)); + session, buf->data, buf->size, &bnd->disk_image)); WT_ASSERT(session, __wt_verify_dsk_image(session, "[evict split]", buf->data, buf->size, true) == 0); } @@ -3784,8 +3803,6 @@ __rec_col_int(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_PAGE *page) case WT_PM_REC_REPLACE: addr = &child->modify->mod_replace; break; - case WT_PM_REC_REWRITE: - break; WT_ILLEGAL_VALUE_ERR(session); } break; @@ -5281,7 +5298,7 @@ __rec_split_discard(WT_SESSION_IMPL *session, WT_PAGE *page) __wt_free(session, multi->key.ikey); break; } - if (multi->supd == NULL) { + if (multi->disk_image == NULL) { if (multi->addr.reuse) multi->addr.addr = NULL; else { @@ -5291,7 +5308,7 @@ __rec_split_discard(WT_SESSION_IMPL *session, WT_PAGE *page) } } else { __wt_free(session, multi->supd); - __wt_free(session, multi->supd_dsk); + __wt_free(session, multi->disk_image); } } __wt_free(session, mod->mod_multi); @@ -5319,6 +5336,44 @@ __rec_split_discard(WT_SESSION_IMPL *session, WT_PAGE *page) } /* + * __rec_split_dump_keys -- + * Dump out the split keys in verbose mode. + */ +static int +__rec_split_dump_keys(WT_SESSION_IMPL *session, WT_PAGE *page, WT_RECONCILE *r) +{ + WT_BOUNDARY *bnd; + WT_DECL_ITEM(tkey); + WT_DECL_RET; + uint32_t i; + + if (page->type == WT_PAGE_ROW_INT || page->type == WT_PAGE_ROW_LEAF) + WT_RET(__wt_scr_alloc(session, 0, &tkey)); + WT_ERR(__wt_verbose( + session, WT_VERB_SPLIT, "split: %" PRIu32 " pages", r->bnd_next)); + for (bnd = r->bnd, i = 0; i < r->bnd_next; ++bnd, ++i) + switch (page->type) { + case WT_PAGE_ROW_INT: + case WT_PAGE_ROW_LEAF: + WT_ERR(__wt_buf_set_printable( + session, tkey, bnd->key.data, bnd->key.size)); + WT_ERR(__wt_verbose(session, WT_VERB_SPLIT, + "starting key %.*s", + (int)tkey->size, (const char *)tkey->data)); + break; + case WT_PAGE_COL_FIX: + case WT_PAGE_COL_INT: + case WT_PAGE_COL_VAR: + WT_ERR(__wt_verbose(session, WT_VERB_SPLIT, + "starting recno %" PRIu64, bnd->recno)); + break; + WT_ILLEGAL_VALUE_ERR(session); + } +err: __wt_scr_free(session, &tkey); + return (ret); +} + +/* * __rec_write_wrapup -- * Finish the reconciliation. */ @@ -5328,7 +5383,6 @@ __rec_write_wrapup(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_PAGE *page) WT_BM *bm; WT_BOUNDARY *bnd; WT_BTREE *btree; - WT_MULTI *multi; WT_PAGE_MODIFY *mod; WT_REF *ref; size_t addr_size; @@ -5376,7 +5430,6 @@ __rec_write_wrapup(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_PAGE *page) case WT_PM_REC_EMPTY: /* Page deleted */ break; case WT_PM_REC_MULTIBLOCK: /* Multiple blocks */ - case WT_PM_REC_REWRITE: /* Rewrite */ /* * Discard the multiple replacement blocks. */ @@ -5442,24 +5495,14 @@ __rec_write_wrapup(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_PAGE *page) bnd = &r->bnd[0]; /* - * If we're saving/restoring changes for this page, there's - * nothing to write. Allocate, then initialize the array of - * replacement blocks. + * If saving/restoring changes for this page and there's only + * one block, there's nothing to write. This is an in-memory + * configuration or a special case of forced eviction: set up + * a single block as if to split, then use that disk image to + * rewrite the page in memory. */ - if (F_ISSET(r, WT_EVICT_UPDATE_RESTORE) && bnd->supd != NULL) { - WT_RET(__wt_calloc_def( - session, r->bnd_next, &mod->mod_multi)); - multi = mod->mod_multi; - multi->supd = bnd->supd; - multi->supd_entries = bnd->supd_next; - bnd->supd = NULL; - multi->supd_dsk = bnd->dsk; - bnd->dsk = NULL; - mod->mod_multi_entries = 1; - - mod->rec_result = WT_PM_REC_REWRITE; - break; - } + if (bnd->disk_image != NULL) + goto split; /* * If this is a root page, then we don't have an address and we @@ -5467,7 +5510,7 @@ __rec_write_wrapup(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_PAGE *page) * we were about to write the buffer so we know what to do here. */ if (bnd->addr.addr == NULL) - WT_RET(__wt_bt_write(session, &r->dsk, + WT_RET(__wt_bt_write(session, &r->disk_image, NULL, NULL, true, bnd->already_compressed)); else { mod->mod_replace = bnd->addr; @@ -5495,49 +5538,18 @@ __rec_write_wrapup(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_PAGE *page) WT_ILLEGAL_VALUE(session); } - /* Display the actual split keys. */ - if (WT_VERBOSE_ISSET(session, WT_VERB_SPLIT)) { - WT_DECL_ITEM(tkey); - WT_DECL_RET; - uint32_t i; - - if (page->type == WT_PAGE_ROW_INT || - page->type == WT_PAGE_ROW_LEAF) - WT_RET(__wt_scr_alloc(session, 0, &tkey)); - for (bnd = r->bnd, i = 0; i < r->bnd_next; ++bnd, ++i) - switch (page->type) { - case WT_PAGE_ROW_INT: - case WT_PAGE_ROW_LEAF: - WT_ERR(__wt_buf_set_printable( - session, tkey, - bnd->key.data, bnd->key.size)); - WT_ERR(__wt_verbose( - session, WT_VERB_SPLIT, - "split: starting key " - "%.*s", - (int)tkey->size, - (const char *)tkey->data)); - break; - case WT_PAGE_COL_FIX: - case WT_PAGE_COL_INT: - case WT_PAGE_COL_VAR: - WT_ERR(__wt_verbose( - session, WT_VERB_SPLIT, - "split: starting recno %" PRIu64, - bnd->recno)); - break; - WT_ILLEGAL_VALUE_ERR(session); - } -err: __wt_scr_free(session, &tkey); - WT_RET(ret); - } + /* Optionally display the actual split keys in verbose mode. */ + if (WT_VERBOSE_ISSET(session, WT_VERB_SPLIT)) + WT_RET(__rec_split_dump_keys(session, page, r)); + + /* Track the largest set of page-splits. */ if (r->bnd_next > r->bnd_next_max) { r->bnd_next_max = r->bnd_next; WT_STAT_FAST_DATA_SET( session, rec_multiblock_max, r->bnd_next_max); } - switch (page->type) { +split: switch (page->type) { case WT_PAGE_ROW_INT: case WT_PAGE_ROW_LEAF: WT_RET(__rec_split_row(session, r, page)); @@ -5575,14 +5587,10 @@ __rec_write_wrapup_err(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_PAGE *page) * information (otherwise we might think the backing block is being * reused on a subsequent reconciliation where we want to free it). */ - switch (mod->rec_result) { - case WT_PM_REC_MULTIBLOCK: - case WT_PM_REC_REWRITE: + if (mod->rec_result == WT_PM_REC_MULTIBLOCK) for (multi = mod->mod_multi, i = 0; i < mod->mod_multi_entries; ++multi, ++i) multi->addr.reuse = 0; - break; - } /* * On error, discard blocks we've written, they're unreferenced by the @@ -5641,18 +5649,18 @@ __rec_split_row(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_PAGE *page) WT_RET(__wt_row_ikey_alloc(session, 0, bnd->key.data, bnd->key.size, &multi->key.ikey)); - if (F_ISSET(r, WT_EVICT_UPDATE_RESTORE) && bnd->supd != NULL) { - multi->supd = bnd->supd; - multi->supd_entries = bnd->supd_next; - bnd->supd = NULL; - multi->supd_dsk = bnd->dsk; - bnd->dsk = NULL; - } else { + if (bnd->disk_image == NULL) { multi->addr = bnd->addr; multi->addr.reuse = 0; multi->size = bnd->size; multi->cksum = bnd->cksum; bnd->addr.addr = NULL; + } else { + multi->supd = bnd->supd; + multi->supd_entries = bnd->supd_next; + bnd->supd = NULL; + multi->disk_image = bnd->disk_image; + bnd->disk_image = NULL; } } mod->mod_multi_entries = r->bnd_next; @@ -5681,18 +5689,18 @@ __rec_split_col(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_PAGE *page) bnd = r->bnd, i = 0; i < r->bnd_next; ++multi, ++bnd, ++i) { multi->key.recno = bnd->recno; - if (F_ISSET(r, WT_EVICT_UPDATE_RESTORE) && bnd->supd != NULL) { - multi->supd = bnd->supd; - multi->supd_entries = bnd->supd_next; - bnd->supd = NULL; - multi->supd_dsk = bnd->dsk; - bnd->dsk = NULL; - } else { + if (bnd->disk_image == NULL) { multi->addr = bnd->addr; multi->addr.reuse = 0; multi->size = bnd->size; multi->cksum = bnd->cksum; bnd->addr.addr = NULL; + } else { + multi->supd = bnd->supd; + multi->supd_entries = bnd->supd_next; + bnd->supd = NULL; + multi->disk_image = bnd->disk_image; + bnd->disk_image = NULL; } } mod->mod_multi_entries = r->bnd_next; |