summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKeith Bostic <keith@wiredtiger.com>2014-06-16 09:40:20 -0400
committerKeith Bostic <keith@wiredtiger.com>2014-06-16 09:40:20 -0400
commit9524e17a7e5471aa75cbfc484b50ad0b131639d9 (patch)
treef4983159ece1be83880f9c6d1bded2c183976760
parent414595e57a31626ad0e3db9d40578f8f5a115c84 (diff)
downloadmongo-9524e17a7e5471aa75cbfc484b50ad0b131639d9.tar.gz
Add a new field to the WT_CURSOR_BTREE structure so we can pass back a
reference to the modify function's allocated update structure, avoids a potential data copy when changing the user's cursor to point to internal data. Closes #1070.
-rw-r--r--src/btree/bt_cursor.c19
-rw-r--r--src/btree/col_modify.c6
-rw-r--r--src/btree/row_modify.c6
-rw-r--r--src/include/cursor.h6
4 files changed, 28 insertions, 9 deletions
diff --git a/src/btree/bt_cursor.c b/src/btree/bt_cursor.c
index d942c12f421..da2ef171414 100644
--- a/src/btree/bt_cursor.c
+++ b/src/btree/bt_cursor.c
@@ -681,15 +681,16 @@ retry: WT_RET(__cursor_func_init(cbt, 1));
err: if (ret == WT_RESTART)
goto retry;
- /* If successful, point the cursor at internal copies of the data. */
- if (ret == 0) {
- if (!WT_DATA_IN_ITEM(&cursor->key))
- WT_TRET(__wt_buf_set(session, &cursor->key,
- cursor->key.data, cursor->key.size));
- if (!WT_DATA_IN_ITEM(&cursor->value))
- WT_TRET(__wt_buf_set(session, &cursor->value,
- cursor->value.data, cursor->value.size));
- }
+ /*
+ * If successful, point the cursor at internal copies of the data. We
+ * could shuffle memory in the cursor so the key/value pair are in local
+ * buffer memory, but that's a data copy. We don't want to do another
+ * search (and we might get a different update structure if we race).
+ * To make this work, we add a field to the btree cursor to pass back a
+ * pointer to the modify function's allocated update structure.
+ */
+ if (ret == 0)
+ WT_TRET(__wt_kv_return(session, cbt, cbt->modify_update));
if (ret != 0)
WT_TRET(__cursor_error_resolve(cbt));
diff --git a/src/btree/col_modify.c b/src/btree/col_modify.c
index 41ce37a0130..e2e3adbd714 100644
--- a/src/btree/col_modify.c
+++ b/src/btree/col_modify.c
@@ -86,6 +86,9 @@ __wt_col_modify(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt,
WT_ERR(__wt_txn_modify(session, upd));
logged = 1;
+ /* Avoid a data copy in WT_CURSOR.update. */
+ cbt->modify_update = upd;
+
/*
* Point the new WT_UPDATE item to the next element in the list.
* If we get it right, the serialization function lock acts as
@@ -135,6 +138,9 @@ __wt_col_modify(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt,
__wt_update_alloc(session, value, &upd, &upd_size));
WT_ERR(__wt_txn_modify(session, upd));
logged = 1;
+
+ /* Avoid a data copy in WT_CURSOR.update. */
+ cbt->modify_update = upd;
} else
upd_size = sizeof(WT_UPDATE) + upd->size;
ins->upd = upd;
diff --git a/src/btree/row_modify.c b/src/btree/row_modify.c
index c088fabe53a..03772e317b4 100644
--- a/src/btree/row_modify.c
+++ b/src/btree/row_modify.c
@@ -66,6 +66,9 @@ __wt_row_modify(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt,
__wt_update_alloc(session, value, &upd, &upd_size));
WT_ERR(__wt_txn_modify(session, upd));
logged = 1;
+
+ /* Avoid WT_CURSOR.update data copy. */
+ cbt->modify_update = upd;
} else {
upd_size = sizeof(WT_UPDATE) + upd->size;
/*
@@ -132,6 +135,9 @@ __wt_row_modify(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt,
__wt_update_alloc(session, value, &upd, &upd_size));
WT_ERR(__wt_txn_modify(session, upd));
logged = 1;
+
+ /* Avoid WT_CURSOR.update data copy. */
+ cbt->modify_update = upd;
} else
upd_size = sizeof(WT_UPDATE) + upd->size;
ins->upd = upd;
diff --git a/src/include/cursor.h b/src/include/cursor.h
index 7d59bd88f39..90314f0af61 100644
--- a/src/include/cursor.h
+++ b/src/include/cursor.h
@@ -158,6 +158,12 @@ struct __wt_cursor_btree {
WT_ITEM tmp;
/*
+ * The update structure allocated by the row- and column-store modify
+ * functions, used to avoid a data copy in the WT_CURSOR.update call.
+ */
+ WT_UPDATE *modify_update;
+
+ /*
* Fixed-length column-store items are a single byte, and it's simpler
* and cheaper to allocate the space for it now than keep checking to
* see if we need to grow the buffer.