summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLuke Chen <luke.chen@mongodb.com>2022-01-11 16:22:29 +1100
committerEvergreen Agent <no-reply@evergreen.mongodb.com>2022-01-11 05:52:02 +0000
commit9e51dd0d383ed86b08540418dee372db6cfb5dfe (patch)
tree62d56798ee274cdce3a01279758f8d2c48f8e7d2
parent34ebad09dcc990cb025cd05e2795a1f218af6db1 (diff)
downloadmongo-9e51dd0d383ed86b08540418dee372db6cfb5dfe.tar.gz
Import wiredtiger: e5af17e9111138938f8d3ac0e928321a35cae91b from branch mongodb-5.2
ref: 9347af9cb5..e5af17e911 for: 5.2.0-rc5 WT-8198 Switch the bulk load cursor to a scratch buffer
-rw-r--r--src/third_party/wiredtiger/import.data2
-rw-r--r--src/third_party/wiredtiger/src/cursor/cur_bulk.c25
-rw-r--r--src/third_party/wiredtiger/src/include/cursor.h2
-rw-r--r--src/third_party/wiredtiger/src/reconcile/rec_col.c4
4 files changed, 22 insertions, 11 deletions
diff --git a/src/third_party/wiredtiger/import.data b/src/third_party/wiredtiger/import.data
index e4df97c78ba..07f4920db37 100644
--- a/src/third_party/wiredtiger/import.data
+++ b/src/third_party/wiredtiger/import.data
@@ -2,5 +2,5 @@
"vendor": "wiredtiger",
"github": "wiredtiger/wiredtiger.git",
"branch": "mongodb-5.2",
- "commit": "9347af9cb5271855cbea7719f1b57bfe94621912"
+ "commit": "e5af17e9111138938f8d3ac0e928321a35cae91b"
}
diff --git a/src/third_party/wiredtiger/src/cursor/cur_bulk.c b/src/third_party/wiredtiger/src/cursor/cur_bulk.c
index 5672cbbf1c7..6ad5e98da72 100644
--- a/src/third_party/wiredtiger/src/cursor/cur_bulk.c
+++ b/src/third_party/wiredtiger/src/cursor/cur_bulk.c
@@ -149,8 +149,8 @@ __curbulk_insert_var(WT_CURSOR *cursor)
* If not the first insert and the key space is sequential, compare the current value
* against the last value; if the same, just increment the RLE count.
*/
- if (recno == cbulk->recno + 1 && cbulk->last.size == cursor->value.size &&
- memcmp(cbulk->last.data, cursor->value.data, cursor->value.size) == 0) {
+ if (recno == cbulk->recno + 1 && cbulk->last->size == cursor->value.size &&
+ memcmp(cbulk->last->data, cursor->value.data, cursor->value.size) == 0) {
++cbulk->rle;
++cbulk->recno;
goto duplicate;
@@ -173,7 +173,7 @@ __curbulk_insert_var(WT_CURSOR *cursor)
cbulk->recno = recno;
/* Save a copy of the value for the next comparison. */
- ret = __wt_buf_set(session, &cbulk->last, cursor->value.data, cursor->value.size);
+ ret = __wt_buf_set(session, cbulk->last, cursor->value.data, cursor->value.size);
duplicate:
err:
@@ -203,7 +203,7 @@ __bulk_row_keycmp_err(WT_CURSOR_BULK *cbulk)
"bulk-load presented with out-of-order keys: %s compares smaller than previously inserted "
"key %s",
__wt_buf_set_printable(session, cursor->key.data, cursor->key.size, false, a),
- __wt_buf_set_printable(session, cbulk->last.data, cbulk->last.size, false, b));
+ __wt_buf_set_printable(session, cbulk->last->data, cbulk->last->size, false, b));
err:
__wt_scr_free(session, &a);
@@ -242,14 +242,14 @@ __curbulk_insert_row(WT_CURSOR *cursor)
* application doesn't accidentally corrupt the table.
*/
if (!cbulk->first_insert) {
- WT_ERR(__wt_compare(session, btree->collator, &cursor->key, &cbulk->last, &cmp));
+ WT_ERR(__wt_compare(session, btree->collator, &cursor->key, cbulk->last, &cmp));
if (cmp <= 0)
WT_ERR(__bulk_row_keycmp_err(cbulk));
} else
cbulk->first_insert = false;
/* Save a copy of the key for the next comparison. */
- WT_ERR(__wt_buf_set(session, &cbulk->last, cursor->key.data, cursor->key.size));
+ WT_ERR(__wt_buf_set(session, cbulk->last, cursor->key.data, cursor->key.size));
ret = __wt_bulk_insert_row(session, cbulk);
@@ -326,6 +326,17 @@ __wt_curbulk_init(
if (bitmap)
F_SET(cursor, WT_CURSTD_RAW);
+ /*
+ * The bulk last buffer is used to detect out-of-order keys in row-store to avoid corruption,
+ * and to detect duplicate values in variable-length column-store, where we increment the RLE
+ * instead of storing another value. In variable-length column-store, if the first two values we
+ * load are zero-length, the first one will set the last buffer's data field to NULL, and the
+ * second will cause us to call the underlying memory comparison function with a NULL pointer,
+ * which triggers run-time analyzers. Give the buffer some memory to avoid the problem (h/t to
+ * C99 typos).
+ */
+ WT_RET(__wt_scr_alloc(session, 100, &cbulk->last));
+
return (__wt_bulk_init(session, cbulk));
}
@@ -340,6 +351,6 @@ __wt_curbulk_close(WT_SESSION_IMPL *session, WT_CURSOR_BULK *cbulk)
ret = __wt_bulk_wrapup(session, cbulk);
- __wt_buf_free(session, &cbulk->last);
+ __wt_scr_free(session, &cbulk->last);
return (ret);
}
diff --git a/src/third_party/wiredtiger/src/include/cursor.h b/src/third_party/wiredtiger/src/include/cursor.h
index 85797573e23..0bfbb66fe11 100644
--- a/src/third_party/wiredtiger/src/include/cursor.h
+++ b/src/third_party/wiredtiger/src/include/cursor.h
@@ -246,7 +246,7 @@ struct __wt_cursor_bulk {
* row-store compares keys during bulk load to avoid corruption.
*/
bool first_insert; /* First insert */
- WT_ITEM last; /* Last key/value inserted */
+ WT_ITEM *last; /* Last key/value inserted */
/*
* Additional column-store bulk load support.
diff --git a/src/third_party/wiredtiger/src/reconcile/rec_col.c b/src/third_party/wiredtiger/src/reconcile/rec_col.c
index 1b71533546e..0f0b40e0dde 100644
--- a/src/third_party/wiredtiger/src/reconcile/rec_col.c
+++ b/src/third_party/wiredtiger/src/reconcile/rec_col.c
@@ -142,8 +142,8 @@ __wt_bulk_insert_var(WT_SESSION_IMPL *session, WT_CURSOR_BULK *cbulk, bool delet
* Store the bulk cursor's last buffer, not the current value, we're tracking duplicates,
* which means we want the previous value seen, not the current value.
*/
- WT_RET(
- __wt_rec_cell_build_val(session, r, cbulk->last.data, cbulk->last.size, &tw, cbulk->rle));
+ WT_RET(__wt_rec_cell_build_val(
+ session, r, cbulk->last->data, cbulk->last->size, &tw, cbulk->rle));
/* Boundary: split or write the page. */
if (WT_CROSSING_SPLIT_BND(r, val->len))