From 9e51dd0d383ed86b08540418dee372db6cfb5dfe Mon Sep 17 00:00:00 2001 From: Luke Chen Date: Tue, 11 Jan 2022 16:22:29 +1100 Subject: Import wiredtiger: e5af17e9111138938f8d3ac0e928321a35cae91b from branch mongodb-5.2 ref: 9347af9cb5..e5af17e911 for: 5.2.0-rc5 WT-8198 Switch the bulk load cursor to a scratch buffer --- src/third_party/wiredtiger/import.data | 2 +- src/third_party/wiredtiger/src/cursor/cur_bulk.c | 25 ++++++++++++++++------ src/third_party/wiredtiger/src/include/cursor.h | 2 +- src/third_party/wiredtiger/src/reconcile/rec_col.c | 4 ++-- 4 files changed, 22 insertions(+), 11 deletions(-) diff --git a/src/third_party/wiredtiger/import.data b/src/third_party/wiredtiger/import.data index e4df97c78ba..07f4920db37 100644 --- a/src/third_party/wiredtiger/import.data +++ b/src/third_party/wiredtiger/import.data @@ -2,5 +2,5 @@ "vendor": "wiredtiger", "github": "wiredtiger/wiredtiger.git", "branch": "mongodb-5.2", - "commit": "9347af9cb5271855cbea7719f1b57bfe94621912" + "commit": "e5af17e9111138938f8d3ac0e928321a35cae91b" } diff --git a/src/third_party/wiredtiger/src/cursor/cur_bulk.c b/src/third_party/wiredtiger/src/cursor/cur_bulk.c index 5672cbbf1c7..6ad5e98da72 100644 --- a/src/third_party/wiredtiger/src/cursor/cur_bulk.c +++ b/src/third_party/wiredtiger/src/cursor/cur_bulk.c @@ -149,8 +149,8 @@ __curbulk_insert_var(WT_CURSOR *cursor) * If not the first insert and the key space is sequential, compare the current value * against the last value; if the same, just increment the RLE count. */ - if (recno == cbulk->recno + 1 && cbulk->last.size == cursor->value.size && - memcmp(cbulk->last.data, cursor->value.data, cursor->value.size) == 0) { + if (recno == cbulk->recno + 1 && cbulk->last->size == cursor->value.size && + memcmp(cbulk->last->data, cursor->value.data, cursor->value.size) == 0) { ++cbulk->rle; ++cbulk->recno; goto duplicate; @@ -173,7 +173,7 @@ __curbulk_insert_var(WT_CURSOR *cursor) cbulk->recno = recno; /* Save a copy of the value for the next comparison. */ - ret = __wt_buf_set(session, &cbulk->last, cursor->value.data, cursor->value.size); + ret = __wt_buf_set(session, cbulk->last, cursor->value.data, cursor->value.size); duplicate: err: @@ -203,7 +203,7 @@ __bulk_row_keycmp_err(WT_CURSOR_BULK *cbulk) "bulk-load presented with out-of-order keys: %s compares smaller than previously inserted " "key %s", __wt_buf_set_printable(session, cursor->key.data, cursor->key.size, false, a), - __wt_buf_set_printable(session, cbulk->last.data, cbulk->last.size, false, b)); + __wt_buf_set_printable(session, cbulk->last->data, cbulk->last->size, false, b)); err: __wt_scr_free(session, &a); @@ -242,14 +242,14 @@ __curbulk_insert_row(WT_CURSOR *cursor) * application doesn't accidentally corrupt the table. */ if (!cbulk->first_insert) { - WT_ERR(__wt_compare(session, btree->collator, &cursor->key, &cbulk->last, &cmp)); + WT_ERR(__wt_compare(session, btree->collator, &cursor->key, cbulk->last, &cmp)); if (cmp <= 0) WT_ERR(__bulk_row_keycmp_err(cbulk)); } else cbulk->first_insert = false; /* Save a copy of the key for the next comparison. */ - WT_ERR(__wt_buf_set(session, &cbulk->last, cursor->key.data, cursor->key.size)); + WT_ERR(__wt_buf_set(session, cbulk->last, cursor->key.data, cursor->key.size)); ret = __wt_bulk_insert_row(session, cbulk); @@ -326,6 +326,17 @@ __wt_curbulk_init( if (bitmap) F_SET(cursor, WT_CURSTD_RAW); + /* + * The bulk last buffer is used to detect out-of-order keys in row-store to avoid corruption, + * and to detect duplicate values in variable-length column-store, where we increment the RLE + * instead of storing another value. In variable-length column-store, if the first two values we + * load are zero-length, the first one will set the last buffer's data field to NULL, and the + * second will cause us to call the underlying memory comparison function with a NULL pointer, + * which triggers run-time analyzers. Give the buffer some memory to avoid the problem (h/t to + * C99 typos). + */ + WT_RET(__wt_scr_alloc(session, 100, &cbulk->last)); + return (__wt_bulk_init(session, cbulk)); } @@ -340,6 +351,6 @@ __wt_curbulk_close(WT_SESSION_IMPL *session, WT_CURSOR_BULK *cbulk) ret = __wt_bulk_wrapup(session, cbulk); - __wt_buf_free(session, &cbulk->last); + __wt_scr_free(session, &cbulk->last); return (ret); } diff --git a/src/third_party/wiredtiger/src/include/cursor.h b/src/third_party/wiredtiger/src/include/cursor.h index 85797573e23..0bfbb66fe11 100644 --- a/src/third_party/wiredtiger/src/include/cursor.h +++ b/src/third_party/wiredtiger/src/include/cursor.h @@ -246,7 +246,7 @@ struct __wt_cursor_bulk { * row-store compares keys during bulk load to avoid corruption. */ bool first_insert; /* First insert */ - WT_ITEM last; /* Last key/value inserted */ + WT_ITEM *last; /* Last key/value inserted */ /* * Additional column-store bulk load support. diff --git a/src/third_party/wiredtiger/src/reconcile/rec_col.c b/src/third_party/wiredtiger/src/reconcile/rec_col.c index 1b71533546e..0f0b40e0dde 100644 --- a/src/third_party/wiredtiger/src/reconcile/rec_col.c +++ b/src/third_party/wiredtiger/src/reconcile/rec_col.c @@ -142,8 +142,8 @@ __wt_bulk_insert_var(WT_SESSION_IMPL *session, WT_CURSOR_BULK *cbulk, bool delet * Store the bulk cursor's last buffer, not the current value, we're tracking duplicates, * which means we want the previous value seen, not the current value. */ - WT_RET( - __wt_rec_cell_build_val(session, r, cbulk->last.data, cbulk->last.size, &tw, cbulk->rle)); + WT_RET(__wt_rec_cell_build_val( + session, r, cbulk->last->data, cbulk->last->size, &tw, cbulk->rle)); /* Boundary: split or write the page. */ if (WT_CROSSING_SPLIT_BND(r, val->len)) -- cgit v1.2.1