diff options
author | Keith Bostic <keith@wiredtiger.com> | 2012-10-24 20:01:10 +0000 |
---|---|---|
committer | Keith Bostic <keith@wiredtiger.com> | 2012-10-24 20:01:10 +0000 |
commit | 448ac2fd58f312861c55508dc0976138831d0775 (patch) | |
tree | 926cbe8fc4174d12d86f28e18260ace4ca203826 | |
parent | 5da25a5291b2db1103b4c358b12ee386b0604a24 (diff) | |
download | mongo-448ac2fd58f312861c55508dc0976138831d0775.tar.gz |
Instead of entering a fake key cell after the last cell on the page just
in case the page ends with a key cell which has no value, use the end of
the page to detect that case. This simplifies the changes required for
the new compression API. Reference #324.
-rw-r--r-- | src/btree/bt_page.c | 8 | ||||
-rw-r--r-- | src/btree/rec_write.c | 39 | ||||
-rw-r--r-- | src/btree/row_key.c | 24 | ||||
-rw-r--r-- | src/docs/upgrading.dox | 11 | ||||
-rw-r--r-- | src/include/cell.i | 28 |
5 files changed, 39 insertions, 71 deletions
diff --git a/src/btree/bt_page.c b/src/btree/bt_page.c index 2631ee29d39..5a0c17dcf5f 100644 --- a/src/btree/bt_page.c +++ b/src/btree/bt_page.c @@ -515,6 +515,14 @@ __inmem_row_leaf(WT_SESSION_IMPL *session, WT_PAGE *page, size_t *inmem_sizep) } } + /* + * We use the fact that cells exactly fill a page to detect the case of + * a row-store leaf page where the last cell is a key (that is, there's + * no subsequent value cell). Assert that to be true, the bug would be + * difficult to find/diagnose in the field. + */ + WT_ASSERT(session, cell == (WT_CELL *)((uint8_t *)dsk + dsk->size)); + WT_RET((__wt_calloc_def(session, (size_t)nindx, &page->u.row.d))); if (inmem_sizep != NULL) *inmem_sizep += nindx * sizeof(*page->u.row.d); diff --git a/src/btree/rec_write.c b/src/btree/rec_write.c index e13184b389d..a23afca13d9 100644 --- a/src/btree/rec_write.c +++ b/src/btree/rec_write.c @@ -1191,7 +1191,6 @@ static int __rec_split_write(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_BOUNDARY *bnd, WT_ITEM *buf, int checkpoint) { - WT_CELL *cell; WT_PAGE_HEADER *dsk; uint32_t size; uint8_t addr[WT_BTREE_MAX_ADDR_COOKIE]; @@ -1199,27 +1198,6 @@ __rec_split_write(WT_SESSION_IMPL *session, dsk = buf->mem; /* - * We always write an additional byte on row-store leaf pages after the - * key value pairs. The reason is that zero-length value items are not - * written on the page and they're detected by finding two adjacent key - * cells. If the last value item on a page is zero length, we need a - * key cell after it on the page to detect it. The row-store leaf page - * reconciliation code made sure we had a spare byte in the buffer, now - * write a trailing zero-length key cell. This isn't a valid key cell, - * but since it's not referenced by the entries on the page, no code but - * the code reading after the key cell, to find the key value, will ever - * see it. - */ -#define WT_TRAILING_KEY_CELL (sizeof(uint8_t)) - if (dsk->type == WT_PAGE_ROW_LEAF) { - WT_ASSERT_RET(session, buf->size < buf->memsize); - - cell = (WT_CELL *)&(((uint8_t *)buf->data)[buf->size]); - __wt_cell_pack_key_empty(cell); - ++buf->size; - } - - /* * Write the chunk and save the location information. There is one big * question: if this is a checkpoint, we're going to have to wrap up * our tracking information (freeing blocks we no longer need) before we @@ -1441,11 +1419,8 @@ __wt_rec_row_bulk_insert(WT_CURSOR_BULK *cbulk) /* * Boundary, split or write the page. - * - * We write a trailing key cell on the page after the K/V pairs - * (see WT_TRAILING_KEY_CELL for more information). */ - while (key->len + val->len + WT_TRAILING_KEY_CELL > r->space_avail) { + while (key->len + val->len > r->space_avail) { /* Split the page. */ WT_RET(__rec_split(session, r)); @@ -2999,12 +2974,8 @@ __rec_row_leaf(WT_SESSION_IMPL *session, /* * Boundary, split or write the page. - * - * We write a trailing key cell on the page after the K/V pairs - * (see WT_TRAILING_KEY_CELL for more information). */ - while (key->len + - val->len + WT_TRAILING_KEY_CELL > r->space_avail) { + while (key->len + val->len > r->space_avail) { /* * In one path above, we copied the key from the page * rather than building the actual key. In that case, @@ -3086,12 +3057,8 @@ __rec_row_leaf_insert(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_INSERT *ins) /* * Boundary, split or write the page. - * - * We write a trailing key cell on the page after the K/V pairs - * (see WT_TRAILING_KEY_CELL for more information). */ - while (key->len + - val->len + WT_TRAILING_KEY_CELL > r->space_avail) { + while (key->len + val->len > r->space_avail) { WT_RET(__rec_split(session, r)); /* diff --git a/src/btree/row_key.c b/src/btree/row_key.c index bf2326a19e4..f6dc88aa5af 100644 --- a/src/btree/row_key.c +++ b/src/btree/row_key.c @@ -342,16 +342,17 @@ WT_CELL * __wt_row_value(WT_PAGE *page, WT_ROW *rip) { WT_CELL *cell; + WT_CELL_UNPACK unpack; u_int type; cell = WT_ROW_KEY_COPY(rip); /* * Key copied. * - * Now, cell either references a WT_IKEY structure that has a value-cell - * offset, or references the on-page key WT_CELL, and we can walk past - * that to find the value WT_CELL. Both can be processed regardless of - * what other threads are doing. + * Cell now either references a WT_IKEY structure with a cell offset, + * or references the on-page key WT_CELL. Both can be processed + * regardless of what other threads are doing. If it's the former, + * use it to get the latter. */ if (__wt_off_page(page, cell)) cell = WT_PAGE_REF_OFFSET(page, ((WT_IKEY *)cell)->cell_offset); @@ -359,10 +360,19 @@ __wt_row_value(WT_PAGE *page, WT_ROW *rip) /* * Row-store leaf pages may have a single data cell between each key, or * keys may be adjacent (when the data cell is empty). Move to the next - * key. The page reconciliation code guarantees there is always a key - * cell after an empty data cell, so this is safe. + * cell and check its type. + * + * One special case: if the last key on a page is a key without a value, + * don't walk off the end of the page: the size of the underlying disk + * image is exact, which means the end of the last cell on the page plus + * the length of the cell should be the byte immediately after the page + * disk image. */ - cell = __wt_cell_next(cell); + __wt_cell_unpack(cell, &unpack); + cell = (WT_CELL *)((uint8_t *)cell + __wt_cell_total_len(&unpack)); + if (__wt_off_page(page, cell)) + return (NULL); + type = __wt_cell_type(cell); return (type == WT_CELL_KEY || type == WT_CELL_KEY_OVFL ? NULL : cell); } diff --git a/src/docs/upgrading.dox b/src/docs/upgrading.dox index ed2a6927fe5..65894cb032f 100644 --- a/src/docs/upgrading.dox +++ b/src/docs/upgrading.dox @@ -1,5 +1,16 @@ /*! @page upgrading Upgrading WiredTiger applications +@section version_135 Upgrading to Version 1.3.5 +<dl> + +<dt>Version 1.3.5 file format changes</dt> +<dd> +The underlying file formats changed in the 1.3.5 release; tables and files +should be dumped and re-loaded into a new database. +</dd> + +</dl> +<hr> @section version_13 Upgrading to Version 1.3 <dl> diff --git a/src/include/cell.i b/src/include/cell.i index 888a367801d..7a919d5ebe3 100644 --- a/src/include/cell.i +++ b/src/include/cell.i @@ -330,21 +330,6 @@ __wt_cell_pack_key(WT_CELL *cell, uint8_t prefix, uint32_t size) } /* - * __wt_cell_pack_key_empty -- - * Write an empty key cell. - */ -static inline void -__wt_cell_pack_key_empty(WT_CELL *cell) -{ - /* - * At the end of a row-store leaf page we have to write an empty key to - * act as a marker in case the last value on the page is zero-length. - * See the caller of this function for details. - */ - cell->__chunk[0] = WT_CELL_KEY; -} - -/* * __wt_cell_pack_ovfl -- * Pack an overflow cell. */ @@ -629,19 +614,6 @@ __wt_cell_unpack(WT_CELL *cell, WT_CELL_UNPACK *unpack) } /* - * __wt_cell_next -- - * Return the next WT_CELL on the page. - */ -static inline WT_CELL * -__wt_cell_next(WT_CELL *cell) -{ - WT_CELL_UNPACK unpack; - - __wt_cell_unpack(cell, &unpack); - return ((WT_CELL *)((uint8_t *)cell + unpack.__len)); -} - -/* * __wt_cell_unpack_ref -- * Set a buffer to reference the data from an unpacked cell. */ |