diff options
Diffstat (limited to 'src/third_party/wiredtiger/src/btree/col_modify.c')
-rw-r--r-- | src/third_party/wiredtiger/src/btree/col_modify.c | 223 |
1 files changed, 223 insertions, 0 deletions
diff --git a/src/third_party/wiredtiger/src/btree/col_modify.c b/src/third_party/wiredtiger/src/btree/col_modify.c new file mode 100644 index 00000000000..3a4a2a2987d --- /dev/null +++ b/src/third_party/wiredtiger/src/btree/col_modify.c @@ -0,0 +1,223 @@ +/*- + * Copyright (c) 2008-2014 WiredTiger, Inc. + * All rights reserved. + * + * See the file LICENSE for redistribution information. + */ + +#include "wt_internal.h" + +static int __col_insert_alloc( + WT_SESSION_IMPL *, uint64_t, u_int, WT_INSERT **, size_t *); + +/* + * __wt_col_modify -- + * Column-store delete, insert, and update. + */ +int +__wt_col_modify(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt, + uint64_t recno, WT_ITEM *value, WT_UPDATE *upd, int is_remove) +{ + WT_BTREE *btree; + WT_DECL_RET; + WT_INSERT *ins; + WT_INSERT_HEAD *ins_head, **ins_headp; + WT_ITEM _value; + WT_PAGE *page; + WT_UPDATE *old_upd; + size_t ins_size, upd_size; + u_int i, skipdepth; + int append, logged; + + btree = cbt->btree; + ins = NULL; + page = cbt->ref->page; + append = logged = 0; + + /* This code expects a remove to have a NULL value. */ + if (is_remove) { + if (btree->type == BTREE_COL_FIX) { + value = &_value; + value->data = ""; + value->size = 1; + } else + value = NULL; + } else { + /* + * There's some chance the application specified a record past + * the last record on the page. If that's the case, and we're + * inserting a new WT_INSERT/WT_UPDATE pair, it goes on the + * append list, not the update list. In addition, a recno of + * 0 implies an append operation, we're allocating a new row. + */ + if (recno == 0 || + recno > (btree->type == BTREE_COL_VAR ? + __col_var_last_recno(page) : __col_fix_last_recno(page))) + append = 1; + } + + /* If we don't yet have a modify structure, we'll need one. */ + WT_RET(__wt_page_modify_init(session, page)); + + /* + * Delete, insert or update a column-store entry. + * + * If modifying a previously modified record, create a new WT_UPDATE + * entry and have a serialized function link it into an existing + * WT_INSERT entry's WT_UPDATE list. + * + * Else, allocate an insert array as necessary, build a WT_INSERT and + * WT_UPDATE structure pair, and call a serialized function to insert + * the WT_INSERT structure. + */ + if (cbt->compare == 0 && cbt->ins != NULL) { + /* + * If we are restoring updates that couldn't be evicted, the + * key must not exist on the new page. + */ + WT_ASSERT(session, upd == NULL); + + /* Make sure the update can proceed. */ + WT_ERR(__wt_txn_update_check( + session, old_upd = cbt->ins->upd)); + + /* Allocate a WT_UPDATE structure and transaction ID. */ + WT_ERR(__wt_update_alloc(session, value, &upd, &upd_size)); + WT_ERR(__wt_txn_modify(session, upd)); + logged = 1; + + /* Avoid a data copy in WT_CURSOR.update. */ + cbt->modify_update = upd; + + /* + * Point the new WT_UPDATE item to the next element in the list. + * If we get it right, the serialization function lock acts as + * our memory barrier to flush this write. + */ + upd->next = old_upd; + + /* Serialize the update. */ + WT_ERR(__wt_update_serial( + session, page, &cbt->ins->upd, &upd, upd_size)); + } else { + /* Allocate the append/update list reference as necessary. */ + if (append) { + WT_PAGE_ALLOC_AND_SWAP(session, + page, page->modify->mod_append, ins_headp, 1); + ins_headp = &page->modify->mod_append[0]; + } else if (page->type == WT_PAGE_COL_FIX) { + WT_PAGE_ALLOC_AND_SWAP(session, + page, page->modify->mod_update, ins_headp, 1); + ins_headp = &page->modify->mod_update[0]; + } else { + WT_PAGE_ALLOC_AND_SWAP(session, + page, page->modify->mod_update, ins_headp, + page->pg_var_entries); + ins_headp = &page->modify->mod_update[cbt->slot]; + } + + /* Allocate the WT_INSERT_HEAD structure as necessary. */ + WT_PAGE_ALLOC_AND_SWAP(session, page, *ins_headp, ins_head, 1); + ins_head = *ins_headp; + + /* Choose a skiplist depth for this insert. */ + skipdepth = __wt_skip_choose_depth(session); + + /* + * Allocate a WT_INSERT/WT_UPDATE pair and transaction ID, and + * update the cursor to reference it (the WT_INSERT_HEAD might + * be allocated, the WT_INSERT was allocated). + */ + WT_ERR(__col_insert_alloc( + session, recno, skipdepth, &ins, &ins_size)); + cbt->ins_head = ins_head; + cbt->ins = ins; + + if (upd == NULL) { + WT_ERR( + __wt_update_alloc(session, value, &upd, &upd_size)); + WT_ERR(__wt_txn_modify(session, upd)); + logged = 1; + + /* Avoid a data copy in WT_CURSOR.update. */ + cbt->modify_update = upd; + } else + upd_size = sizeof(WT_UPDATE) + upd->size; + ins->upd = upd; + ins_size += upd_size; + + /* + * If there was no insert list during the search, or there was + * no search because the record number has not been allocated + * yet, the cursor's information cannot be correct, search + * couldn't have initialized it. + * + * Otherwise, point the new WT_INSERT item's skiplist to the + * next elements in the insert list (which we will check are + * still valid inside the serialization function). + * + * The serial mutex acts as our memory barrier to flush these + * writes before inserting them into the list. + */ + if (WT_SKIP_FIRST(ins_head) == NULL || recno == 0) + for (i = 0; i < skipdepth; i++) { + cbt->ins_stack[i] = &ins_head->head[i]; + ins->next[i] = cbt->next_stack[i] = NULL; + } + else + for (i = 0; i < skipdepth; i++) + ins->next[i] = cbt->next_stack[i]; + + /* Append or insert the WT_INSERT structure. */ + if (append) + WT_ERR(__wt_col_append_serial( + session, page, cbt->ins_head, cbt->ins_stack, + &ins, ins_size, &cbt->recno, skipdepth)); + else + WT_ERR(__wt_insert_serial( + session, page, cbt->ins_head, cbt->ins_stack, + &ins, ins_size, skipdepth)); + } + + /* If the update was successful, add it to the in-memory log. */ + if (logged) + WT_ERR(__wt_txn_log_op(session, cbt)); + + if (0) { +err: /* + * Remove the update from the current transaction, so we don't + * try to modify it on rollback. + */ + if (logged) + __wt_txn_unmodify(session); + __wt_free(session, ins); + __wt_free(session, upd); + } + + return (ret); +} + +/* + * __col_insert_alloc -- + * Column-store insert: allocate a WT_INSERT structure and fill it in. + */ +static int +__col_insert_alloc(WT_SESSION_IMPL *session, + uint64_t recno, u_int skipdepth, WT_INSERT **insp, size_t *ins_sizep) +{ + WT_INSERT *ins; + size_t ins_size; + + /* + * Allocate the WT_INSERT structure and skiplist pointers, then copy + * the record number into place. + */ + ins_size = sizeof(WT_INSERT) + skipdepth * sizeof(WT_INSERT *); + WT_RET(__wt_calloc(session, 1, ins_size, &ins)); + + WT_INSERT_RECNO(ins) = recno; + + *insp = ins; + *ins_sizep = ins_size; + return (0); +} |