diff options
author | Keith Bostic <keith@wiredtiger.com> | 2014-06-16 09:40:20 -0400 |
---|---|---|
committer | Keith Bostic <keith@wiredtiger.com> | 2014-06-16 09:40:20 -0400 |
commit | 9524e17a7e5471aa75cbfc484b50ad0b131639d9 (patch) | |
tree | f4983159ece1be83880f9c6d1bded2c183976760 | |
parent | 414595e57a31626ad0e3db9d40578f8f5a115c84 (diff) | |
download | mongo-9524e17a7e5471aa75cbfc484b50ad0b131639d9.tar.gz |
Add a new field to the WT_CURSOR_BTREE structure so we can pass back a
reference to the modify function's allocated update structure, avoids a
potential data copy when changing the user's cursor to point to internal
data. Closes #1070.
-rw-r--r-- | src/btree/bt_cursor.c | 19 | ||||
-rw-r--r-- | src/btree/col_modify.c | 6 | ||||
-rw-r--r-- | src/btree/row_modify.c | 6 | ||||
-rw-r--r-- | src/include/cursor.h | 6 |
4 files changed, 28 insertions, 9 deletions
diff --git a/src/btree/bt_cursor.c b/src/btree/bt_cursor.c index d942c12f421..da2ef171414 100644 --- a/src/btree/bt_cursor.c +++ b/src/btree/bt_cursor.c @@ -681,15 +681,16 @@ retry: WT_RET(__cursor_func_init(cbt, 1)); err: if (ret == WT_RESTART) goto retry; - /* If successful, point the cursor at internal copies of the data. */ - if (ret == 0) { - if (!WT_DATA_IN_ITEM(&cursor->key)) - WT_TRET(__wt_buf_set(session, &cursor->key, - cursor->key.data, cursor->key.size)); - if (!WT_DATA_IN_ITEM(&cursor->value)) - WT_TRET(__wt_buf_set(session, &cursor->value, - cursor->value.data, cursor->value.size)); - } + /* + * If successful, point the cursor at internal copies of the data. We + * could shuffle memory in the cursor so the key/value pair are in local + * buffer memory, but that's a data copy. We don't want to do another + * search (and we might get a different update structure if we race). + * To make this work, we add a field to the btree cursor to pass back a + * pointer to the modify function's allocated update structure. + */ + if (ret == 0) + WT_TRET(__wt_kv_return(session, cbt, cbt->modify_update)); if (ret != 0) WT_TRET(__cursor_error_resolve(cbt)); diff --git a/src/btree/col_modify.c b/src/btree/col_modify.c index 41ce37a0130..e2e3adbd714 100644 --- a/src/btree/col_modify.c +++ b/src/btree/col_modify.c @@ -86,6 +86,9 @@ __wt_col_modify(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt, WT_ERR(__wt_txn_modify(session, upd)); logged = 1; + /* Avoid a data copy in WT_CURSOR.update. */ + cbt->modify_update = upd; + /* * Point the new WT_UPDATE item to the next element in the list. * If we get it right, the serialization function lock acts as @@ -135,6 +138,9 @@ __wt_col_modify(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt, __wt_update_alloc(session, value, &upd, &upd_size)); WT_ERR(__wt_txn_modify(session, upd)); logged = 1; + + /* Avoid a data copy in WT_CURSOR.update. */ + cbt->modify_update = upd; } else upd_size = sizeof(WT_UPDATE) + upd->size; ins->upd = upd; diff --git a/src/btree/row_modify.c b/src/btree/row_modify.c index c088fabe53a..03772e317b4 100644 --- a/src/btree/row_modify.c +++ b/src/btree/row_modify.c @@ -66,6 +66,9 @@ __wt_row_modify(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt, __wt_update_alloc(session, value, &upd, &upd_size)); WT_ERR(__wt_txn_modify(session, upd)); logged = 1; + + /* Avoid WT_CURSOR.update data copy. */ + cbt->modify_update = upd; } else { upd_size = sizeof(WT_UPDATE) + upd->size; /* @@ -132,6 +135,9 @@ __wt_row_modify(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt, __wt_update_alloc(session, value, &upd, &upd_size)); WT_ERR(__wt_txn_modify(session, upd)); logged = 1; + + /* Avoid WT_CURSOR.update data copy. */ + cbt->modify_update = upd; } else upd_size = sizeof(WT_UPDATE) + upd->size; ins->upd = upd; diff --git a/src/include/cursor.h b/src/include/cursor.h index 7d59bd88f39..90314f0af61 100644 --- a/src/include/cursor.h +++ b/src/include/cursor.h @@ -158,6 +158,12 @@ struct __wt_cursor_btree { WT_ITEM tmp; /* + * The update structure allocated by the row- and column-store modify + * functions, used to avoid a data copy in the WT_CURSOR.update call. + */ + WT_UPDATE *modify_update; + + /* * Fixed-length column-store items are a single byte, and it's simpler * and cheaper to allocate the space for it now than keep checking to * see if we need to grow the buffer. |