diff options
author | Don Anderson <dda@ddanderson.com> | 2012-03-15 14:29:22 -0400 |
---|---|---|
committer | Don Anderson <dda@ddanderson.com> | 2012-03-15 14:29:22 -0400 |
commit | b9b00694d3b7e4fcaf59fdf8e5196bcd0040b581 (patch) | |
tree | 0e43cb64a2d788cbec61843f1d4a739081ee1344 | |
parent | c0f8a06d6a77ea64f24b28b59c62c866c9982ae0 (diff) | |
parent | 9e67b397abc3f7931aa708327011c0acf8e61f60 (diff) | |
download | mongo-b9b00694d3b7e4fcaf59fdf8e5196bcd0040b581.tar.gz |
Merge branch 'master' of https://github.com/wiredtiger/wiredtiger
-rw-r--r-- | dist/s_copyright | 4 | ||||
-rw-r--r-- | src/btree/bt_curprev.c | 20 | ||||
-rw-r--r-- | src/btree/bt_evict.c | 119 | ||||
-rw-r--r-- | src/btree/bt_page.c | 58 | ||||
-rw-r--r-- | src/btree/bt_read.c | 2 | ||||
-rw-r--r-- | src/btree/rec_evict.c | 38 | ||||
-rw-r--r-- | src/include/cache.i | 6 | ||||
-rw-r--r-- | test/format/Makefile.am | 3 | ||||
-rw-r--r-- | test/format/README | 14 | ||||
-rw-r--r-- | test/format/bdb.c | 118 | ||||
-rw-r--r-- | test/format/config.c | 4 | ||||
-rw-r--r-- | test/format/format.h | 70 | ||||
-rw-r--r-- | test/format/s_dumpcmp.in | 47 | ||||
-rw-r--r-- | test/format/t.c | 131 | ||||
-rw-r--r-- | test/format/util.c | 152 | ||||
-rw-r--r-- | test/format/wts.c | 277 | ||||
-rw-r--r-- | test/format/wts_bulk.c | 157 | ||||
-rw-r--r-- | test/format/wts_ops.c | 591 |
18 files changed, 916 insertions, 895 deletions
diff --git a/dist/s_copyright b/dist/s_copyright index 05ea9a4e6be..afca1458c46 100644 --- a/dist/s_copyright +++ b/dist/s_copyright @@ -72,8 +72,8 @@ l="LICENSE COPYING" # Search for files, ignoring test/3rdparty. for i in `cd .. && - find bench build_posix dist docs lang src test \ - -name '*.[chi]' -o -name '*.cxx' -o -name '*.java' -o -name '*.py' | + find [a-z]* -name '*.[chi]' \ + -o -name '*.cxx' -o -name '*.java' -o -name '*.py' | sed -e '/test\/3rdparty\//d' -e 's/^\.\///'`; do check $i done diff --git a/src/btree/bt_curprev.c b/src/btree/bt_curprev.c index 865fcf2ee44..39d0aa8aed3 100644 --- a/src/btree/bt_curprev.c +++ b/src/btree/bt_curprev.c @@ -16,12 +16,14 @@ * search item's next array). * * Helper macros to go from a stack pointer at level i, pointing into a next - * array, to insert node containing that next array. + * array, back to the insert node containing that next array. */ +#undef PREV_ITEM #define PREV_ITEM(ins_head, insp, i) \ (((insp) == &(ins_head)->head[i] || (insp) == NULL) ? NULL : \ (WT_INSERT *)((char *)((insp) - (i)) - offsetof(WT_INSERT, next))) +#undef PREV_INS #define PREV_INS(cbt, i) \ PREV_ITEM((cbt)->ins_head, (cbt)->ins_stack[(i)], (i)) @@ -39,11 +41,12 @@ __cursor_skip_prev(WT_CURSOR_BTREE *cbt) session = (WT_SESSION_IMPL *)cbt->iface.session; +restart: /* * If the search stack does not point at the current item, fill it in * with a search. */ - if ((current = cbt->ins) != PREV_INS(cbt, 0)) { + while ((current = cbt->ins) != PREV_INS(cbt, 0)) { if (cbt->btree->type == BTREE_ROW) { key.data = WT_INSERT_KEY(current); key.size = WT_INSERT_KEY_SIZE(current); @@ -52,10 +55,6 @@ __cursor_skip_prev(WT_CURSOR_BTREE *cbt) } else cbt->ins = __col_insert_search(cbt->ins_head, cbt->ins_stack, WT_INSERT_RECNO(current)); - - /* Check that we found the expected item. */ - WT_ASSERT(session, cbt->ins == current); - WT_ASSERT(session, PREV_INS(cbt, 0) == current); } /* @@ -91,7 +90,14 @@ __cursor_skip_prev(WT_CURSOR_BTREE *cbt) /* Walk any remaining levels until just before the current node. */ while (i >= 0) { - WT_ASSERT(session, ins != NULL); + /* + * If we get to the end of a list without finding the current + * item, we must have raced with an insert. Restart the search. + */ + if (ins == NULL) { + cbt->ins_stack[0] = NULL; + goto restart; + } if (ins->next[i] != current) /* Stay at this level */ ins = ins->next[i]; else { /* Drop down a level */ diff --git a/src/btree/bt_evict.c b/src/btree/bt_evict.c index b6970cd3202..fe2bd27da2f 100644 --- a/src/btree/bt_evict.c +++ b/src/btree/bt_evict.c @@ -22,9 +22,9 @@ static int __evict_worker(WT_SESSION_IMPL *); * Tuning constants: I hesitate to call this tuning, but we want to review some * number of pages from each file's in-memory tree for each page we evict. */ -#define WT_EVICT_GROUP 10 /* Evict N pages at a time */ -#define WT_EVICT_WALK_PER_TABLE 20 /* Pages to visit per file */ -#define WT_EVICT_WALK_BASE 100 /* Pages tracked across file visits */ +#define WT_EVICT_GROUP 20 /* Evict N pages at a time */ +#define WT_EVICT_WALK_PER_TABLE 25 /* Pages to visit per file */ +#define WT_EVICT_WALK_BASE 50 /* Pages tracked across file visits */ /* * WT_EVICT_REQ_FOREACH -- @@ -198,6 +198,7 @@ __wt_evict_page_request(WT_SESSION_IMPL *session, WT_PAGE *page) * thread will see this later. */ WT_VERBOSE(session, evictserver, "eviction server request table full"); + page->ref->state = WT_REF_MEM; return (WT_RESTART); } @@ -230,7 +231,7 @@ __wt_cache_evict_server(void *arg) * whether there is work to do. If so, evict_cond will * be signalled and the wait below won't block. */ - __wt_eviction_check(session, NULL); + __wt_eviction_check(session, NULL, 1); WT_VERBOSE(session, evictserver, "sleeping"); __wt_cond_wait(session, cache->evict_cond); @@ -353,16 +354,6 @@ __evict_request_walk(WT_SESSION_IMPL *session) memset(cache->evict, 0, cache->evict_allocated); if (F_ISSET(er, WT_EVICT_REQ_PAGE)) { - /* - * If we are pushing out a page, that page might be our - * eviction location. If so, try to move on to the - * next page, or restart the walk if that fails - * (evict_page will be set to NULL). - */ - if (session->btree->evict_page == er->page) - (void)__wt_tree_np( - session, &session->btree->evict_page, 1, 1); - ref = er->page->ref; WT_ASSERT(session, ref->page == er->page); WT_ASSERT(session, ref->state == WT_REF_EVICTING); @@ -379,16 +370,13 @@ __evict_request_walk(WT_SESSION_IMPL *session) __wt_yield(); /* - * If eviction fails, free up the page and hope it + * If eviction fails, it will free up the page: hope it * works next time. Application threads may be holding * a reference while trying to get another (e.g., if * they have two cursors open), so blocking * indefinitely leads to deadlock. */ - if ((ret = __wt_rec_evict(session, er->page, 0)) != 0) { - WT_ASSERT(session, ref->page == er->page); - ref->state = WT_REF_MEM; - } + ret = __wt_rec_evict(session, er->page, 0); } else { /* * If we're about to do a walk of the file tree (and @@ -570,7 +558,7 @@ __evict_walk_file(WT_SESSION_IMPL *session, u_int *slotp) WT_BTREE *btree; WT_CACHE *cache; WT_PAGE *page; - int i, restarted_once; + int i, restarts, ret; btree = session->btree; cache = S2C(session)->cache; @@ -588,36 +576,29 @@ __evict_walk_file(WT_SESSION_IMPL *session, u_int *slotp) * We can't evict the page just returned to us, it marks our place in * the tree. So, always stay one page ahead of the page being returned. */ - i = restarted_once = 0; - do { - if ((page = btree->evict_page) == NULL) - goto skip; - - /* - * Root and pinned pages can't be evicted. - * !!! - * It's still in flux if root pages are pinned or not, test for - * both cases for now. - */ - if (WT_PAGE_IS_ROOT(page)) - goto skip; - - /* - * Skip locked pages: we would skip them later, and they just - * fill up the eviction list for no benefit. - */ - if (page->ref->state != WT_REF_MEM) - goto skip; + for (i = restarts = ret = 0; + i < WT_EVICT_WALK_PER_TABLE && restarts <= 1 && ret == 0; + ret = __wt_tree_np(session, &btree->evict_page, 1, 1)) { + if ((page = btree->evict_page) == NULL) { + ++restarts; + continue; + } /* + * Root and pinned pages can't be evicted, nor can locked + * pages: we would skip them later, and they just fill up the + * eviction list for no benefit. + * * Skip pages that must be merged into their parents. Don't * skip pages marked WT_PAGE_REC_EMPTY or SPLIT: updates after * their last reconciliation may have changed their state and * only the eviction code can check whether they should really * be skipped. */ - if (F_ISSET(page, WT_PAGE_REC_SPLIT_MERGE)) - goto skip; + if (WT_PAGE_IS_ROOT(page) || + page->ref->state != WT_REF_MEM || + F_ISSET(page, WT_PAGE_REC_SPLIT_MERGE)) + continue; WT_VERBOSE(session, evictserver, "select: %p, size %" PRIu32, page, page->memory_footprint); @@ -626,13 +607,9 @@ __evict_walk_file(WT_SESSION_IMPL *session, u_int *slotp) cache->evict[*slotp].page = page; cache->evict[*slotp].btree = btree; ++*slotp; + } -skip: WT_RET(__wt_tree_np(session, &btree->evict_page, 1, 1)); - if (btree->evict_page == NULL && restarted_once++ == 1) - break; - } while (i < WT_EVICT_WALK_PER_TABLE); - - return (0); + return (ret); } /* @@ -664,7 +641,7 @@ __evict_dup_remove(WT_SESSION_IMPL *session) */ evict = cache->evict; elem = cache->evict_entries; - qsort(evict, (size_t)elem, sizeof(WT_EVICT_LIST), __evict_page_cmp); + qsort(evict, elem, sizeof(WT_EVICT_LIST), __evict_page_cmp); for (i = 0; i < elem; i = j) { /* * Once we hit a NULL, we're done, the NULLs all sorted to the @@ -673,17 +650,15 @@ __evict_dup_remove(WT_SESSION_IMPL *session) if (evict[i].page == NULL) break; - for (j = i + 1; j < elem; ++j) { - /* Delete the second and any subsequent duplicates. */ - if (evict[i].page == evict[j].page) - __evict_clr(&evict[j]); - else - break; - } + /* Delete any subsequent duplicates. */ + for (j = i + 1; + j < elem && evict[j].page == evict[i].page; + ++j) + __evict_clr(&evict[j]); } /* Sort the array by LRU, then evict the most promising candidates. */ - qsort(cache->evict, elem, sizeof(WT_EVICT_LIST), __evict_lru_cmp); + qsort(evict, i, sizeof(WT_EVICT_LIST), __evict_lru_cmp); } /* @@ -735,15 +710,6 @@ __evict_get_page( *btreep = evict->btree; /* - * If we're evicting our current eviction point in the file, - * try to move on to the next page, or restart the walk if that - * fails (evict_page will be set to NULL). - */ - if (*pagep == evict->btree->evict_page) - (void)__wt_tree_np( - session, &evict->btree->evict_page, 1, 1); - - /* * Paranoia: remove the entry so we never try and reconcile * the same page on reconciliation error. */ @@ -768,6 +734,8 @@ __wt_evict_lru_page(WT_SESSION_IMPL *session) if (page == NULL) return (WT_NOTFOUND); + WT_ASSERT(session, page->ref->state == WT_REF_EVICTING); + /* Reference the correct WT_BTREE handle. */ saved_btree = session->btree; WT_SET_BTREE_IN_SESSION(session, btree); @@ -776,19 +744,14 @@ __wt_evict_lru_page(WT_SESSION_IMPL *session) * We don't care why eviction failed (maybe the page was dirty and we're * out of disk space, or the page had an in-memory subtree already being * evicted). Regardless, don't pick the same page every time. + * + * We used to bump the page's read_gen only if eviction failed, but + * that isn't safe: at that point, eviction has already unlocked the + * page and some other thread may have evicted it by the time we look + * at it. */ - if (__wt_rec_evict(session, page, 0) != 0) { - page->read_gen = __wt_cache_read_gen(session); - - /* - * If the evicting state of the page was not cleared, clear it - * now to make the page available again. - */ - if (page->ref->state == WT_REF_EVICTING) { - WT_ASSERT(session, page->ref->page == page); - page->ref->state = WT_REF_MEM; - } - } + page->read_gen = __wt_cache_read_gen(session); + (void)__wt_rec_evict(session, page, 0); WT_ATOMIC_ADD(btree->lru_count, -1); diff --git a/src/btree/bt_page.c b/src/btree/bt_page.c index 97f565571e3..21cdddaa635 100644 --- a/src/btree/bt_page.c +++ b/src/btree/bt_page.c @@ -26,7 +26,13 @@ __wt_page_in_func( #endif ) { - int read_lockout; + int first, read_lockout; + + /* + * Only wake the eviction server once: after that, we're just wasting + * effort and making a busy mutex busier. + */ + first = 1; for (;;) { switch (ref->state) { @@ -35,7 +41,8 @@ __wt_page_in_func( * The page isn't in memory, attempt to set the * state to WT_REF_READING. If successful, read it. */ - __wt_eviction_check(session, &read_lockout); + __wt_eviction_check(session, &read_lockout, first); + first = 0; if (read_lockout || !WT_ATOMIC_CAS( ref->state, WT_REF_DISK, WT_REF_READING)) break; @@ -334,8 +341,8 @@ __inmem_row_int(WT_SESSION_IMPL *session, WT_PAGE *page, size_t *inmem_sizep) /* * Internal row-store page entries map one-to-two to the number of - * physical entries on the page (each physical entry is a data item - * and offset object). + * physical entries on the page (each in-memory entry is a key item + * and location coookie). */ nindx = dsk->u.entries / 2; WT_RET((__wt_calloc_def(session, (size_t)nindx, &page->u.intl.t))); @@ -351,9 +358,8 @@ __inmem_row_int(WT_SESSION_IMPL *session, WT_PAGE *page, size_t *inmem_sizep) /* * Walk the page, instantiating keys: the page contains sorted key and - * offpage-reference pairs. Keys are row store internal pages with - * on-page/overflow (WT_CELL_KEY/KEY_OVFL) items, and offpage references - * are WT_CELL_OFF items. + * location cookie pairs. Keys are on-page/overflow items and location + * cookies are WT_CELL_ADDR items. */ ref = page->u.intl.t; WT_CELL_FOREACH(btree, dsk, cell, unpack, i) { @@ -455,33 +461,43 @@ __inmem_row_leaf(WT_SESSION_IMPL *session, WT_PAGE *page, size_t *inmem_sizep) /* * Leaf row-store page entries map to a maximum of two-to-one to the - * number of physical entries on the page (each physical entry might - * be a key without any subsequent data item). - */ - WT_RET((__wt_calloc_def( - session, (size_t)dsk->u.entries * 2, &page->u.row.d))); - if (inmem_sizep != NULL) - *inmem_sizep += 2 * dsk->u.entries * sizeof(*page->u.row.d); - - /* - * Walk a row-store page of WT_CELLs, building indices and finding the - * end of the page. + * number of physical entries on the page (each physical entry might be + * a key without a subsequent data item). To avoid over-allocation in + * workloads with large numbers of empty data items, first walk the page + * counting the number of keys, then allocate the indices. * * The page contains key/data pairs. Keys are on-page (WT_CELL_KEY) or * overflow (WT_CELL_KEY_OVFL) items, data are either a single on-page * (WT_CELL_VALUE) or overflow (WT_CELL_VALUE_OVFL) item. */ nindx = 0; - rip = page->u.row.d; WT_CELL_FOREACH(btree, dsk, cell, unpack, i) { __wt_cell_unpack(cell, unpack); switch (unpack->type) { case WT_CELL_KEY: case WT_CELL_KEY_OVFL: ++nindx; - if (rip->key != NULL) - ++rip; + break; + case WT_CELL_VALUE: + case WT_CELL_VALUE_OVFL: + break; + WT_ILLEGAL_VALUE(session); + } + } + + WT_RET((__wt_calloc_def(session, (size_t)nindx, &page->u.row.d))); + if (inmem_sizep != NULL) + *inmem_sizep += nindx * sizeof(*page->u.row.d); + + /* Walk the page again, building indices. */ + rip = page->u.row.d; + WT_CELL_FOREACH(btree, dsk, cell, unpack, i) { + __wt_cell_unpack(cell, unpack); + switch (unpack->type) { + case WT_CELL_KEY: + case WT_CELL_KEY_OVFL: rip->key = cell; + ++rip; break; case WT_CELL_VALUE: case WT_CELL_VALUE_OVFL: diff --git a/src/btree/bt_read.c b/src/btree/bt_read.c index 4d9017ca0cd..a0057e733d4 100644 --- a/src/btree/bt_read.c +++ b/src/btree/bt_read.c @@ -47,7 +47,7 @@ __wt_cache_read(WT_SESSION_IMPL *session, WT_PAGE *parent, WT_REF *ref) WT_ASSERT(session, page != NULL); ref->page = page; - ref->state = WT_REF_MEM; + WT_PUBLISH(ref->state, WT_REF_MEM); return (0); err: ref->state = WT_REF_DISK; diff --git a/src/btree/rec_evict.c b/src/btree/rec_evict.c index 716d79b3251..9da60eda485 100644 --- a/src/btree/rec_evict.c +++ b/src/btree/rec_evict.c @@ -13,7 +13,7 @@ static int __rec_discard_page(WT_SESSION_IMPL *, WT_PAGE *); static void __rec_excl_clear(WT_SESSION_IMPL *); static int __rec_page_clean_update(WT_SESSION_IMPL *, WT_PAGE *); static int __rec_page_dirty_update(WT_SESSION_IMPL *, WT_PAGE *); -static int __rec_review(WT_SESSION_IMPL *, WT_PAGE *, uint32_t, int); +static int __rec_review(WT_SESSION_IMPL *, WT_REF *, WT_PAGE *, uint32_t, int); static int __rec_root_addr_update(WT_SESSION_IMPL *, uint8_t *, uint32_t); static int __rec_root_clean_update(WT_SESSION_IMPL *, WT_PAGE *); static int __rec_root_dirty_update(WT_SESSION_IMPL *, WT_PAGE *); @@ -43,8 +43,12 @@ __wt_rec_evict(WT_SESSION_IMPL *session, WT_PAGE *page, uint32_t flags) * we're done. We have to make this check for clean pages, too: while * unlikely eviction would choose an internal page with children, it's * not disallowed anywhere. + * + * Note that page->ref may be NULL in some cases (e.g., for root pages + * or during salvage). That's OK if WT_REC_SINGLE is set: we won't + * check hazard references in that case. */ - WT_ERR(__rec_review(session, page, flags, 1)); + WT_ERR(__rec_review(session, page->ref, page, flags, 1)); /* Count evictions of internal pages during normal operation. */ if (!LF_ISSET(WT_REC_SINGLE) && @@ -325,12 +329,22 @@ __rec_discard_page(WT_SESSION_IMPL *session, WT_PAGE *page) * a split-merge page, then the reference must be cleared before * the page is discarded. */ - if (F_ISSET( - page, WT_PAGE_REC_MASK) == WT_PAGE_REC_SPLIT && + if (F_ISSET(page, WT_PAGE_REC_MASK) == WT_PAGE_REC_SPLIT && mod->u.split != NULL) __wt_page_out(session, mod->u.split, 0); } + /* + * If we are evicting the file's current eviction point, clear it so + * the walk will be restarted. + * + * !!! + * This check would arguably be cleaner in bt_evict.c, but that level + * isn't aware of all of the pages within a subtree that are evicted. + */ + if (session->btree->evict_page == page) + session->btree->evict_page = NULL; + /* Discard the page itself. */ __wt_page_out(session, page, 0); @@ -341,11 +355,17 @@ __rec_discard_page(WT_SESSION_IMPL *session, WT_PAGE *page) * __rec_review -- * Get exclusive access to the page and review the page and its subtree * for conditions that would block its eviction. + * + * The ref and page arguments may appear to be redundant, because usually + * ref->page == page and page->ref == ref. However, we need both because + * (a) there are cases where ref == NULL (e.g., for root page or during + * salvage), and (b) we can't safely look at page->ref until we have a + * hazard reference. */ static int -__rec_review(WT_SESSION_IMPL *session, WT_PAGE *page, uint32_t flags, int top) +__rec_review(WT_SESSION_IMPL *session, + WT_REF *ref, WT_PAGE *page, uint32_t flags, int top) { - WT_REF *ref; uint32_t i; /* @@ -353,7 +373,7 @@ __rec_review(WT_SESSION_IMPL *session, WT_PAGE *page, uint32_t flags, int top) * locked down. */ if (!LF_ISSET(WT_REC_SINGLE)) - WT_RET(__hazard_exclusive(session, page->ref, top)); + WT_RET(__hazard_exclusive(session, ref, top)); /* * Recurse through the page's subtree: this happens first because we @@ -366,8 +386,8 @@ __rec_review(WT_SESSION_IMPL *session, WT_PAGE *page, uint32_t flags, int top) case WT_REF_DISK: /* On-disk */ break; case WT_REF_MEM: /* In-memory */ - WT_RET( - __rec_review(session, ref->page, flags, 0)); + WT_RET(__rec_review( + session, ref, ref->page, flags, 0)); break; case WT_REF_EVICTING: /* Being evaluated */ case WT_REF_LOCKED: /* Being evicted */ diff --git a/src/include/cache.i b/src/include/cache.i index 866f00693c2..5ac61f6cb69 100644 --- a/src/include/cache.i +++ b/src/include/cache.i @@ -10,7 +10,7 @@ * Wake the eviction server if necessary. */ static inline void -__wt_eviction_check(WT_SESSION_IMPL *session, int *read_lockoutp) +__wt_eviction_check(WT_SESSION_IMPL *session, int *read_lockoutp, int wake) { WT_CACHE *cache; WT_CONNECTION_IMPL *conn; @@ -31,7 +31,7 @@ __wt_eviction_check(WT_SESSION_IMPL *session, int *read_lockoutp) *read_lockoutp = (bytes_inuse > bytes_max); /* Wake eviction when we're over the trigger cache size. */ - if (bytes_inuse > cache->eviction_trigger * (bytes_max / 100)) + if (wake && bytes_inuse > cache->eviction_trigger * (bytes_max / 100)) __wt_evict_server_wake(session); } @@ -60,7 +60,7 @@ __wt_eviction_page_check(WT_SESSION_IMPL *session, WT_PAGE *page) */ WT_RET(__wt_evict_page_request(session, page)); } else - __wt_eviction_check(session, NULL); + __wt_eviction_check(session, NULL, 1); return (0); } diff --git a/test/format/Makefile.am b/test/format/Makefile.am index 4727e0df742..f10e8b47b49 100644 --- a/test/format/Makefile.am +++ b/test/format/Makefile.am @@ -3,7 +3,8 @@ INCLUDES = -I$(top_builddir) -I$(BDB) noinst_PROGRAMS = t noinst_DATA = s_dumpcmp -t_SOURCES = config.h format.h bdb.c config.c t.c util.c wts.c wts_bulk.c wts_ops.c +t_SOURCES =\ + config.h format.h bdb.c config.c t.c util.c wts.c wts_bulk.c wts_ops.c t_LDADD = $(top_builddir)/libwiredtiger.la -L$(BDB)/build_unix -ldb t_LDFLAGS = -static diff --git a/test/format/README b/test/format/README index 5f276ad48ac..81cebc894db 100644 --- a/test/format/README +++ b/test/format/README @@ -1,9 +1,9 @@ -The test program format randomly generates WiredTiger files with -different size objects and then does single-threaded operations -on those files. The goal is to test the WiredTiger file formats. +The test/format program randomly generates WiredTiger files with different +size objects and then does operations on those files. The goal is to +test the WiredTiger file formats. -format should be compiled with a version of Berkeley DB (which it -uses to verify format's results). Create a link "db" in the -build_posix directory that links to the top-level of a Berkeley DB -distribution directory which contains a configured and compiled +test/format should be compiled with a version of Berkeley DB (which +it uses to verify format's results). Create a link "db" in the +build_posix directory that links to the top-level of a Berkeley +DB distribution directory which contains a configured and compiled build_unix subdirectory. diff --git a/test/format/bdb.c b/test/format/bdb.c index bf21a003e17..9cbe8012583 100644 --- a/test/format/bdb.c +++ b/test/format/bdb.c @@ -8,6 +8,9 @@ #define BDB 1 /* Berkeley DB header files */ #include "format.h" +static DBT key, value; +static uint8_t *keybuf; + static int bdb_compare_reverse(DB *dbp, const DBT *k1, const DBT *k2) { @@ -23,7 +26,7 @@ bdb_compare_reverse(DB *dbp, const DBT *k1, const DBT *k2) } void -bdb_startup(void) +bdb_open(void) { DB *db; DBC *dbc; @@ -48,10 +51,12 @@ bdb_startup(void) g.bdb = db; assert(db->cursor(db, NULL, &dbc, 0) == 0); g.dbc = dbc; + + key_gen_setup(&keybuf); } void -bdb_teardown(void) +bdb_close(void) { DB *db; DBC *dbc; @@ -63,6 +68,9 @@ bdb_teardown(void) assert(dbc->close(dbc) == 0); assert(db->close(db, 0) == 0); assert(dbenv->close(dbenv, 0) == 0); + + free(keybuf); + keybuf = NULL; } void @@ -70,7 +78,6 @@ bdb_insert( const void *key_data, uint32_t key_size, const void *value_data, uint32_t value_size) { - static DBT key, value; DBC *dbc; key.data = (void *)key_data; @@ -83,118 +90,91 @@ bdb_insert( assert(dbc->put(dbc, &key, &value, DB_KEYFIRST) == 0); } -int +void bdb_np(int next, void *keyp, uint32_t *keysizep, void *valuep, uint32_t *valuesizep, int *notfoundp) { - static DBT key, value; - DB *db = g.bdb; DBC *dbc = g.dbc; int ret; *notfoundp = 0; - if ((ret = dbc->get(dbc, &key, &value, next ? DB_NEXT : DB_PREV)) != 0) { - if (ret == DB_NOTFOUND) { - *notfoundp = 1; - return (0); - } - db->err(db, ret, - "dbc->get: %s: {%.*s}", - next ? "DB_NEXT" : "DB_PREV", - (int)key.size, (char *)key.data); - return (1); + if (ret != DB_NOTFOUND) + die(ret, "dbc.get: %s: {%.*s}", + next ? "DB_NEXT" : "DB_PREV", + (int)key.size, (char *)key.data); + *notfoundp = 1; + } else { + *(void **)keyp = key.data; + *keysizep = key.size; + *(void **)valuep = value.data; + *valuesizep = value.size; } - *(void **)keyp = key.data; - *keysizep = key.size; - *(void **)valuep = value.data; - *valuesizep = value.size; - return (0); } -int +void bdb_read(uint64_t keyno, void *valuep, uint32_t *valuesizep, int *notfoundp) { - static DBT key, value; - DB *db = g.bdb; DBC *dbc = g.dbc; int ret; - *notfoundp = 0; - - key_gen(&key.data, &key.size, keyno, 0); + key.data = keybuf; + key_gen(key.data, &key.size, keyno, 0); + *notfoundp = 0; if ((ret = dbc->get(dbc, &key, &value, DB_SET)) != 0) { - if (ret == DB_NOTFOUND) { - *notfoundp = 1; - return (0); - } - db->err(db, ret, - "dbc->get: DB_SET: {%.*s}", - (int)key.size, (char *)key.data); - return (1); + if (ret != DB_NOTFOUND) + die(ret, "dbc.get: DB_SET: {%.*s}", + (int)key.size, (char *)key.data); + *notfoundp = 1; + } else { + *(void **)valuep = value.data; + *valuesizep = value.size; } - *(void **)valuep = value.data; - *valuesizep = value.size; - return (0); } -int +void bdb_put(const void *arg_key, uint32_t arg_key_size, const void *arg_value, uint32_t arg_value_size, int *notfoundp) { - static DBT key, value; - DB *db = g.bdb; DBC *dbc = g.dbc; int ret; - *notfoundp = 0; - key.data = (void *)arg_key; key.size = arg_key_size; value.data = (void *)arg_value; value.size = arg_value_size; + *notfoundp = 0; if ((ret = dbc->put(dbc, &key, &value, DB_KEYFIRST)) != 0) { - if (ret == DB_NOTFOUND) { - *notfoundp = 1; - return (0); + if (ret != DB_NOTFOUND) { + die(ret, "dbc.put: DB_KEYFIRST: {%.*s}{%.*s}", + (int)key.size, (char *)key.data, + (int)value.size, (char *)value.data); } - db->err(db, ret, "dbc->put: DB_KEYFIRST: {%.*s}{%.*s}", - (int)key.size, (char *)key.data, - (int)value.size, (char *)value.data); - return (1); + *notfoundp = 1; } - return (0); } -int +void bdb_del(uint64_t keyno, int *notfoundp) { - static DBT value; - static DBT key; - DB *db = g.bdb; DBC *dbc = g.dbc; int ret; - *notfoundp = 0; - - key_gen(&key.data, &key.size, keyno, 0); + key.data = keybuf; + key_gen(key.data, &key.size, keyno, 0); - if ((ret = bdb_read(keyno, &value.data, &value.size, notfoundp)) != 0) - return (1); + bdb_read(keyno, &value.data, &value.size, notfoundp); if (*notfoundp) - return (0); + return; + if ((ret = dbc->del(dbc, 0)) != 0) { - if (ret == DB_NOTFOUND) { - *notfoundp = 1; - return (0); - } - db->err(db, ret, - "dbc->del: {%.*s}", (int)key.size, (char *)key.data); - return (1); + if (ret != DB_NOTFOUND) + die(ret, "dbc.del: {%.*s}", + (int)key.size, (char *)key.data); + *notfoundp = 1; } - return (0); } diff --git a/test/format/config.c b/test/format/config.c index 0b08be14c54..a9bca4e7c00 100644 --- a/test/format/config.c +++ b/test/format/config.c @@ -103,7 +103,7 @@ config_print(int error_display) fp = stdout; else if ((fp = fopen("__run", "w")) == NULL) - die("__run", errno); + die(errno, "fopen: __run"); fprintf(fp, "############################################\n"); fprintf(fp, "# RUN PARAMETERS\n"); @@ -138,7 +138,7 @@ config_file(const char *name) char *p, buf[256]; if ((fp = fopen(name, "r")) == NULL) - die(name, errno); + die(errno, "fopen: %s", name); while (fgets(buf, sizeof(buf), fp) != NULL) { for (p = buf; *p != '\0' && *p != '\n'; ++p) ; diff --git a/test/format/format.h b/test/format/format.h index 2f3bac9dd3b..4ee5d89b900 100644 --- a/test/format/format.h +++ b/test/format/format.h @@ -12,6 +12,7 @@ #include <errno.h> #include <inttypes.h> #include <limits.h> +#include <pthread.h> #include <signal.h> #include <stdio.h> #include <stdlib.h> @@ -37,6 +38,8 @@ #define WT_TABLENAME "file:__wt" +#define SINGLETHREADED (g.threads == 1) + typedef struct { char *progname; /* Program name */ @@ -44,9 +47,6 @@ typedef struct { void *dbc; /* BDB cursor handle */ void *wts_conn; /* WT_CONNECTION handle */ - void *wts_cursor; /* WT_CURSOR handle */ - void *wts_cursor_insert; /* WT_CURSOR insert handle */ - void *wts_session; /* WT_SESSION handle */ FILE *rand_log; /* Random number log */ @@ -56,15 +56,14 @@ typedef struct { LOG_FILE=1, /* Use a log file */ LOG_OPS=2 /* Log all operations */ } logging; - FILE *logfp; /* Log file. */ + FILE *logfp; /* Log file */ int replay; /* Replaying a run. */ int track; /* Track progress */ + int threads; /* Threads doing operations */ char *config_open; /* Command-line configuration */ - char *key_gen_buf; - uint32_t c_bitcnt; /* Config values */ uint32_t c_bzip; uint32_t c_cache; @@ -92,32 +91,49 @@ typedef struct { } GLOBAL; extern GLOBAL g; -int bdb_del(uint64_t, int *); +typedef struct { + uint64_t search; + uint64_t insert; + uint64_t update; + uint64_t remove; + + pthread_t tid; /* thread ID */ + +#define TINFO_RUNNING 1 /* Running */ +#define TINFO_COMPLETE 2 /* Finished */ +#define TINFO_JOINED 3 /* Resolved */ + volatile int state; /* state */ +} TINFO; + +void bdb_close(void); +void bdb_del(uint64_t, int *); void bdb_insert(const void *, uint32_t, const void *, uint32_t); -int bdb_np(int, void *, uint32_t *, void *, uint32_t *, int *); -int bdb_put(const void *, uint32_t, const void *, uint32_t, int *); -int bdb_read(uint64_t, void *, uint32_t *, int *); -void bdb_startup(void); -void bdb_teardown(void); +void bdb_np(int, void *, uint32_t *, void *, uint32_t *, int *); +void bdb_open(void); +void bdb_put(const void *, uint32_t, const void *, uint32_t, int *); +void bdb_read(uint64_t, void *, uint32_t *, int *); + +void config_error(void); const char * config_dtype(void); -void config_error(void); void config_file(const char *); void config_print(int); void config_setup(void); void config_single(const char *, int); -void die(const char *, int); -void key_gen(void *, uint32_t *, uint64_t, int); -void key_gen_setup(void); -void track(const char *, uint64_t); -void value_gen(void *, uint32_t *, uint64_t); -int wts_bulk_load(void); -int wts_dump(const char *, int); -int wts_ops(void); +void die(int, const char *, ...); +void key_len_setup(void); +void key_gen_setup(uint8_t **); +void key_gen(uint8_t *, uint32_t *, uint64_t, int); +void track(const char *, uint64_t, TINFO *); +void val_gen_setup(uint8_t **); +void value_gen(uint8_t *, uint32_t *, uint64_t); +void wts_close(void); +void wts_dump(const char *, int); +void wts_load(void); +void wts_open(void); +void wts_ops(void); uint32_t wts_rand(void); -int wts_read_scan(void); -int wts_salvage(void); -int wts_startup(int); -int wts_stats(void); -int wts_teardown(void); -int wts_verify(const char *); +void wts_read_scan(void); +void wts_salvage(void); +void wts_stats(void); +void wts_verify(const char *); diff --git a/test/format/s_dumpcmp.in b/test/format/s_dumpcmp.in index c523fcd507c..71eed7b055f 100644 --- a/test/format/s_dumpcmp.in +++ b/test/format/s_dumpcmp.in @@ -27,26 +27,6 @@ if test $# -ne 0; then exit 1 fi -if test $dump_bdb -eq 1; then - if test $colflag -eq 0; then - $bdb/db_dump -p __bdb | - sed -e '1,/HEADER=END/d' \ - -e '/DATA=END/d' \ - -e 's/^ //' > __bdb_dump - else - # Format stores record numbers in Berkeley DB as string keys, - # it's simpler that way. Convert record numbers from strings - # to numbers. - $bdb/db_dump -p __bdb | - sed -e '1,/HEADER=END/d' \ - -e '/DATA=END/d' \ - -e 's/^ //' | - sed -e 's/^0*//' \ - -e 's/\.00$//' \ - -e N > __bdb_dump - fi -fi - ext='"../../ext/collators/reverse/.libs/reverse_collator.so"' bzext="../../ext/compressors/bzip2_compress/.libs/bzip2_compress.so" if test -e $bzext ; then @@ -54,7 +34,30 @@ if test -e $bzext ; then fi config='extensions=['$ext']' -$build_top/wt -C "$config" dump file:__wt | - sed -e '1,/^Data$/d' > __wt_dump +$build_top/wt -C "$config" dump file:__wt | sed -e '1,/^Data$/d' > __wt_dump + +if test $dump_bdb -ne 1; then + exit 0 +fi + +if test $colflag -eq 0; then + $bdb/db_dump -p __bdb | + sed -e '1,/HEADER=END/d' \ + -e '/DATA=END/d' \ + -e 's/^ //' > __bdb_dump +else + # Format stores record numbers in Berkeley DB as string keys, + # it's simpler that way. Convert record numbers from strings + # to numbers. + $bdb/db_dump -p __bdb | + sed -e '1,/HEADER=END/d' \ + -e '/DATA=END/d' \ + -e 's/^ //' | + sed -e 's/^0*//' \ + -e 's/\.00$//' \ + -e N > __bdb_dump +fi + cmp __wt_dump __bdb_dump > /dev/null + exit $? diff --git a/test/format/t.c b/test/format/t.c index a7d7c42e62a..a0ca4dadec5 100644 --- a/test/format/t.c +++ b/test/format/t.c @@ -16,9 +16,7 @@ static void usage(void); int main(int argc, char *argv[]) { - int ch, reps, ret; - - ret = 0; + int ch, reps; if ((g.progname = strrchr(argv[0], '/')) == NULL) g.progname = argv[0]; @@ -34,11 +32,14 @@ main(int argc, char *argv[]) config_file("CONFIG"); } + /* Default to a single thread. */ + g.threads = 1; + /* Track progress unless we're re-directing output to a file. */ g.track = isatty(STDOUT_FILENO) ? 1 : 0; /* Set values from the command line. */ - while ((ch = getopt(argc, argv, "1C:c:Llqr")) != EOF) + while ((ch = getopt(argc, argv, "1C:c:Llqrt:")) != EOF) switch (ch) { case '1': /* One run */ g.c_runs = 1; @@ -67,10 +68,20 @@ main(int argc, char *argv[]) g.replay = 1; g.c_runs = 1; break; + case 't': /* Threads */ + g.threads = atoi(optarg); + break; default: usage(); } + /* Multi-threaded runs cannot be replayed. */ + if (g.threads != 1 && g.replay) { + fprintf(stderr, + "%s: -r and -t are mutually exclusive\n", g.progname); + return (EXIT_FAILURE); + } + argc -= optind; argv += optind; for (; *argv != NULL; ++argv) @@ -85,36 +96,28 @@ main(int argc, char *argv[]) config_setup(); /* Run configuration */ config_print(0); /* Dump run configuration */ + key_len_setup(); /* Setup keys */ + + if (SINGLETHREADED) + bdb_open(); /* Initial file config */ + wts_open(); - bdb_startup(); /* Initial file config */ - if (wts_startup(0)) - return (EXIT_FAILURE); + wts_load(); /* Load initial records */ + wts_verify("post-bulk verify"); /* Verify */ - key_gen_setup(); /* Setup keys */ - if (wts_bulk_load()) /* Load initial records */ - goto err; - /* Close, verify */ - if (wts_teardown() || wts_verify("post-bulk verify")) - goto err; /* Loop reading & operations */ for (reps = 0; reps < 3; ++reps) { - if (wts_startup(1)) - goto err; + wts_read_scan(); /* Read scan */ - if (wts_read_scan()) /* Read scan */ - goto err; - - /* Random operations */ - if (g.c_ops != 0 && wts_ops()) - goto err; + if (g.c_ops != 0) /* Random operations */ + wts_ops(); /* Statistics */ - if ((g.c_ops == 0 || reps == 2) && wts_stats()) - goto err; + if (g.c_ops == 0 || reps == 2) + wts_stats(); - /* Close, verify */ - if (wts_teardown() || wts_verify("post-ops verify")) - goto err; + /* Verify */ + wts_verify("post-ops verify"); /* * If no operations scheduled, quit after a single @@ -124,11 +127,14 @@ main(int argc, char *argv[]) break; } - track("shutting down BDB", 0ULL); - bdb_teardown(); + if (SINGLETHREADED) { + track("shutting down BDB", 0ULL, NULL); + bdb_close(); - if (wts_dump("standard", 1)) /* Dump the file */ - goto err; + wts_close(); /* Dump the file */ + wts_dump("standard", 1); + wts_open(); + } /* * If we don't delete any records, we can salvage the file. The @@ -139,36 +145,27 @@ main(int argc, char *argv[]) * Save a copy, salvage, verify, dump. */ if (g.c_delete_pct == 0) { - /* - * Save a copy of the interesting files so we can replay - * the salvage step as necessary. - */ - if (system( - "rm -rf __slvg.copy && " - "mkdir __slvg.copy && " - "cp WiredTiger* __wt __slvg.copy/") != 0) - goto err; - - if (wts_salvage() || - wts_verify("post-salvage verify") || - wts_dump("salvage", 0)) - goto err; + wts_salvage(); /* Salvage & verify */ + wts_verify("post-salvage verify"); + + wts_close(); /* Dump the file */ + wts_dump("salvage", 0); + wts_open(); } - printf("%4d: %-40s\n", g.run_cnt, config_dtype()); - } + wts_close(); /* Close */ - if (0) { -err: ret = 1; + printf("%4d: %-40s\n", g.run_cnt, config_dtype()); } + /* Flush/close any logging information. */ if (g.logfp != NULL) (void)fclose(g.logfp); if (g.rand_log != NULL) (void)fclose(g.rand_log); - config_print(ret); - return (ret == 0 ? EXIT_SUCCESS : EXIT_FAILURE); + config_print(0); + return (EXIT_SUCCESS); } /* @@ -196,7 +193,7 @@ startup(void) /* Open/truncate the logging file. */ if (g.logging != 0) { if ((g.logfp = fopen("__log", "w")) == NULL) - die("__log", errno); + die(errno, "fopen: __log"); (void)setvbuf(g.logfp, NULL, _IOLBF, 0); } } @@ -222,9 +219,29 @@ onint(int signo) * Report an error and quit. */ void -die(const char *m, int e) +die(int e, const char *fmt, ...) { - fprintf(stderr, "%s: %s: %s\n", g.progname, m, wiredtiger_strerror(e)); + va_list ap; + + if (fmt != NULL) { /* Death message. */ + fprintf(stderr, "%s: ", g.progname); + va_start(ap, fmt); + vfprintf(stderr, fmt, ap); + va_end(ap); + if (e != 0) + fprintf(stderr, ": %s", wiredtiger_strerror(e)); + fprintf(stderr, "\n"); + } + + /* Flush/close any logging information. */ + if (g.logfp != NULL) + (void)fclose(g.logfp); + if (g.rand_log != NULL) + (void)fclose(g.rand_log); + + /* Display the configuration that failed. */ + config_print(1); + exit(EXIT_FAILURE); } @@ -236,8 +253,9 @@ static void usage(void) { fprintf(stderr, - "usage: %s [-1Llqr] [-C wiredtiger-config] [-c config-file] " - "[name=value ...]\n", + "usage: %s [-1Llqr]\n " + "[-C wiredtiger-config] [-c config-file] " + "[-t threads] [name=value ...]\n", g.progname); fprintf(stderr, "%s", "\t-1 run once\n" @@ -246,7 +264,8 @@ usage(void) "\t-L output to a log file\n" "\t-l log operations (implies -L)\n" "\t-q run quietly\n" - "\t-r replay the last run\n"); + "\t-r replay the last run\n" + "\t-t threads\n"); fprintf(stderr, "\n"); diff --git a/test/format/util.c b/test/format/util.c index b9435b0add3..648b5d82d9c 100644 --- a/test/format/util.c +++ b/test/format/util.c @@ -8,86 +8,87 @@ #include "format.h" void -key_gen(void *keyp, uint32_t *sizep, uint64_t keyno, int insert) +key_len_setup() { - int len; + size_t i; + + /* + * The key is a variable length item with a leading 10-digit value. + * Since we have to be able re-construct it from the record number + * (when doing row lookups), we pre-load a set of random lengths in + * a lookup table, and then use the record number to choose one of + * the pre-loaded lengths. + * + * Fill in the random key lengths. + */ + for (i = 0; i < sizeof(g.key_rand_len) / sizeof(g.key_rand_len[0]); ++i) + g.key_rand_len[i] = (uint16_t)MMRAND(g.c_key_min, g.c_key_max); +} + +void +key_gen_setup(uint8_t **keyp) +{ + uint8_t *key; + size_t i; + + if ((key = malloc(g.c_key_max)) == NULL) + die(errno, "malloc"); + for (i = 0; i < g.c_key_max; ++i) + key[i] = "abcdefghijklmnopqrstuvwxyz"[i % 26]; + *keyp = key; +} + +void +key_gen(uint8_t *key, uint32_t *sizep, uint64_t keyno, int insert) +{ + int len, suffix; /* * The key always starts with a 10-digit string (the specified cnt) * followed by two digits, a random number between 1 and 15 if it's * an insert, otherwise 00. */ - len = insert ? - sprintf(g.key_gen_buf, "%010" PRIu64 ".%02d", keyno, - (int)MMRAND(1, 15)) : - sprintf(g.key_gen_buf, "%010" PRIu64 ".00", keyno); + suffix = insert ? (int)MMRAND(1, 15) : 0; + len = sprintf((char *)key, "%010" PRIu64 ".%02d", keyno, suffix); /* * In a column-store, the key is only used for BDB, and so it doesn't * need a random length. */ if (g.c_file_type == ROW) { - g.key_gen_buf[len] = '/'; + key[len] = '/'; len = g.key_rand_len[keyno % (sizeof(g.key_rand_len) / sizeof(g.key_rand_len[0]))]; } - *(void **)keyp = g.key_gen_buf; *sizep = (uint32_t)len; } void -key_gen_setup(void) +val_gen_setup(uint8_t **valp) { - size_t i; + uint8_t *val; + size_t i, len; /* - * The key is a variable length item with a leading 10-digit value. - * Since we have to be able re-construct it from the record number - * (when doing row lookups), we pre-load a set of random lengths in - * a lookup table, and then use the record number to choose one of - * the pre-loaded lengths. + * Set initial buffer contents to reconizable text. * - * Fill in the random key lengths. + * Add a few extra bytes in order to guarantee we can always offset + * into the buffer by a few extra bytes, used to generate different + * data for column-store run-length encoded files. */ - if (g.key_gen_buf != NULL) { - free(g.key_gen_buf); - g.key_gen_buf = NULL; - } - for (i = 0; i < sizeof(g.key_rand_len) / sizeof(g.key_rand_len[0]); ++i) - g.key_rand_len[i] = (uint16_t)MMRAND(g.c_key_min, g.c_key_max); + len = g.c_value_max + 20; + if ((val = malloc(len)) == NULL) + die(errno, "malloc"); + for (i = 0; i < len; ++i) + val[i] = (u_char)"ABCDEFGHIJKLMNOPQRSTUVWXYZ"[i % 26]; - if ((g.key_gen_buf = malloc(g.c_key_max)) == NULL) - die("malloc", errno); - for (i = 0; i < g.c_key_max; ++i) - g.key_gen_buf[i] = "abcdefghijklmnopqrstuvwxyz"[i % 26]; + *valp = val; } void -value_gen(void *valuep, uint32_t *sizep, uint64_t keyno) +value_gen(uint8_t *val, uint32_t *sizep, uint64_t keyno) { - static size_t blen = 0; static const char *dup_data = "duplicate data item"; - static u_char *buf = NULL; - size_t i; - - /* - * Set initial buffer contents to reconizable text. - * - * Add a few extra bytes in order to guarantee we can always offset - * into the buffer by a few extra bytes, used to generate different - * data for column-store run-length encoded files. - */ - if (blen < g.c_value_max + 10) { - if (buf != NULL) { - free(buf); - buf = NULL; - } - blen = g.c_value_max + 10; - if ((buf = malloc(blen)) == NULL) - die("malloc", errno); - for (i = 0; i < blen; ++i) - buf[i] = (u_char)"ABCDEFGHIJKLMNOPQRSTUVWXYZ"[i % 26]; - } /* * Fixed-length records: take the low N bits from the last digit of @@ -95,16 +96,15 @@ value_gen(void *valuep, uint32_t *sizep, uint64_t keyno) */ if (g.c_file_type == FIX) { switch (g.c_bitcnt) { - case 8: buf[0] = MMRAND(1, 0xff); break; - case 7: buf[0] = MMRAND(1, 0x7f); break; - case 6: buf[0] = MMRAND(1, 0x3f); break; - case 5: buf[0] = MMRAND(1, 0x1f); break; - case 4: buf[0] = MMRAND(1, 0x0f); break; - case 3: buf[0] = MMRAND(1, 0x07); break; - case 2: buf[0] = MMRAND(1, 0x03); break; - case 1: buf[0] = 1; break; + case 8: val[0] = MMRAND(1, 0xff); break; + case 7: val[0] = MMRAND(1, 0x7f); break; + case 6: val[0] = MMRAND(1, 0x3f); break; + case 5: val[0] = MMRAND(1, 0x1f); break; + case 4: val[0] = MMRAND(1, 0x0f); break; + case 3: val[0] = MMRAND(1, 0x07); break; + case 2: val[0] = MMRAND(1, 0x03); break; + case 1: val[0] = 1; break; } - *(void **)valuep = buf; *sizep = 1; return; } @@ -114,7 +114,7 @@ value_gen(void *valuep, uint32_t *sizep, uint64_t keyno) * test that by inserting a zero-length data item every so often. */ if (++keyno % 63 == 0) { - *(void **)valuep = buf; + val[0] = '\0'; *sizep = 0; return; } @@ -130,33 +130,37 @@ value_gen(void *valuep, uint32_t *sizep, uint64_t keyno) if (g.c_file_type == VAR && g.c_repeat_data_pct != 0 && (u_int)wts_rand() % 100 > g.c_repeat_data_pct) { - *(void **)valuep = (void *)dup_data; + (void)strcpy((char *)val, dup_data); *sizep = (uint32_t)strlen(dup_data); return; } - snprintf((char *)buf, blen, "%010" PRIu64, keyno); - buf[10] = '/'; - *(void **)valuep = buf; + sprintf((char *)val, "%010" PRIu64, keyno); + val[10] = '/'; *sizep = MMRAND(g.c_value_min, g.c_value_max); } void -track(const char *s, uint64_t i) +track(const char *tag, uint64_t cnt, TINFO *tinfo) { static int lastlen = 0; int len; char msg[128]; - if (!g.track || s == NULL) + if (!g.track || tag == NULL) return; - if (i == 0) - len = snprintf(msg, sizeof(msg), "%4d: %s", - g.run_cnt, s); + if (tinfo == NULL && cnt == 0) + len = snprintf(msg, sizeof(msg), "%4d: %s", g.run_cnt, tag); + else if (tinfo == NULL) + len = snprintf( + msg, sizeof(msg), "%4d: %s: %" PRIu64, g.run_cnt, tag, cnt); else - len = snprintf(msg, sizeof(msg), "%4d: %s %" PRIu64, - g.run_cnt, s, i); + len = snprintf(msg, sizeof(msg), + "%4d: %s: " "search %" PRIu64 + ", insert %" PRIu64 ", update %" PRIu64 ", remove %" PRIu64, + g.run_cnt, tag, + tinfo->search, tinfo->insert, tinfo->update, tinfo->remove); if (lastlen > len) { memset(msg + len, ' ', (size_t)(lastlen - len)); @@ -178,6 +182,10 @@ wts_rand(void) char buf[64]; uint32_t r; + /* If we're threaded, it's not repeatable, ignore the log. */ + if (!SINGLETHREADED) + return ((uint32_t)rand()); + /* * We can entirely reproduce a run based on the random numbers used * in the initial run, plus the configuration files. It would be @@ -189,7 +197,7 @@ wts_rand(void) if (g.rand_log == NULL) { if ((g.rand_log = fopen("__rand", g.replay ? "r" : "w")) == NULL) - die("__rand", errno); + die(errno, "fopen: __rand"); if (!g.replay) { srand((u_int)(0xdeadbeef ^ (u_int)time(NULL))); (void)setvbuf(g.rand_log, NULL, _IOLBF, 0); @@ -203,7 +211,7 @@ wts_rand(void) "exiting\n"); exit(EXIT_SUCCESS); } - die("random number log", errno); + die(errno, "feof: random number log"); } r = (uint32_t)strtoul(buf, NULL, 10); diff --git a/test/format/wts.c b/test/format/wts.c index 16623c9bbee..11e1f05d2e8 100644 --- a/test/format/wts.c +++ b/test/format/wts.c @@ -7,9 +7,7 @@ #include "format.h" -static int wts_close(WT_CONNECTION *); -static int wts_open(WT_CONNECTION **, WT_SESSION **session); -static int wts_sync(void); +static void wts_sync(void); static int handle_message(WT_EVENT_HANDLER *handler, const char *message) @@ -28,12 +26,12 @@ handle_message(WT_EVENT_HANDLER *handler, const char *message) * Default WT_EVENT_HANDLER->handle_progress implementation: ignore. */ static int -handle_progress(WT_EVENT_HANDLER *handler, - const char *operation, uint64_t progress) +handle_progress( + WT_EVENT_HANDLER *handler, const char *operation, uint64_t progress) { UNUSED(handler); - track(operation, progress); + track(operation, progress, NULL); return (0); } @@ -43,14 +41,15 @@ static WT_EVENT_HANDLER event_handler = { handle_progress }; -static int -wts_open(WT_CONNECTION **connp, WT_SESSION **sessionp) +void +wts_open(void) { WT_CONNECTION *conn; WT_SESSION *session; + uint32_t maxintlpage, maxintlitem, maxleafpage, maxleafitem; int ret; const char *ext1, *ext2; - char config[256]; + char config[512], *end, *p; /* If the bzip2 compression module has been built, use it. */ ext1 = "../../ext/compressors/bzip2_compress/.libs/bzip2_compress.so"; @@ -70,50 +69,11 @@ wts_open(WT_CONNECTION **connp, WT_SESSION **sessionp) g.progname, g.c_cache, ext1, ext2, g.config_open == NULL ? "" : g.config_open); - if ((ret = wiredtiger_open(NULL, &event_handler, config, &conn)) != 0) { - fprintf(stderr, "%s: wiredtiger_open: %s\n", - g.progname, wiredtiger_strerror(ret)); - return (1); - } - - if ((ret = conn->open_session(conn, NULL, NULL, &session)) != 0) { - fprintf(stderr, "%s: conn.session: %s\n", - g.progname, wiredtiger_strerror(ret)); - (void)conn->close(conn, NULL); - return (1); - } + if ((ret = wiredtiger_open(NULL, &event_handler, config, &conn)) != 0) + die(ret, "wiredtiger_open"); - *sessionp = session; - *connp = conn; - return (0); -} - -static int -wts_close(WT_CONNECTION *conn) -{ - int ret; - if ((ret = conn->close(conn, NULL)) != 0) { - fprintf(stderr, "%s: conn.close: %s\n", - g.progname, wiredtiger_strerror(ret)); - return (1); - } - - return (0); -} - -int -wts_startup(int open_cursors) -{ - time_t now; - WT_CONNECTION *conn; - WT_CURSOR *cursor, *cursor_insert; - WT_SESSION *session; - uint32_t maxintlpage, maxintlitem, maxleafpage, maxleafitem; - int ret; - char config[512], *end, *p; - - if (wts_open(&conn, &session)) - return (1); + if ((ret = conn->open_session(conn, NULL, NULL, &session)) != 0) + die(ret, "connection.open_session"); maxintlpage = 1U << g.c_intl_page_max; maxintlitem = MMRAND(maxintlpage / 50, maxintlpage / 40); @@ -160,94 +120,36 @@ wts_startup(int open_cursors) break; } - if ((ret = session->create(session, WT_TABLENAME, config)) != 0) { - fprintf(stderr, "%s: create table: %s\n", - g.progname, wiredtiger_strerror(ret)); - return (1); - } + if ((ret = session->create(session, WT_TABLENAME, config)) != 0) + die(ret, "session.create: %s", WT_TABLENAME); - cursor = cursor_insert = NULL; - if (open_cursors) { - /* - * We open two cursors: one configured for overwriting and one - * configured for append if we're dealing with a column-store. - * - * The reason is when testing with existing records, we don't - * track if a record was deleted or not, which means we need to - * use cursor->insert with overwriting configured. But, in - * column-store files where we're testing with new, appended - * records, we don't want to have to specify the record number, - * which requires an append configuration. - */ - if ((ret = session->open_cursor( - session, WT_TABLENAME, NULL, "overwrite", &cursor)) != 0) { - fprintf(stderr, "%s: open_cursor: %s\n", - g.progname, wiredtiger_strerror(ret)); - return (1); - } - if ((g.c_file_type == FIX || g.c_file_type == VAR) && - (ret = session->open_cursor(session, - WT_TABLENAME, NULL, "append", &cursor_insert)) != 0) { - fprintf(stderr, "%s: open_cursor: %s\n", - g.progname, wiredtiger_strerror(ret)); - return (1); - } - } - - if (g.logging == LOG_OPS) { - (void)time(&now); - (void)session->msg_printf(session, - "===============\nWT start: %s===============", - ctime(&now)); - } + if ((ret = session->close(session, NULL)) != 0) + die(ret, "session.close"); g.wts_conn = conn; - g.wts_cursor = cursor; - g.wts_cursor_insert = cursor_insert; - g.wts_session = session; - - return (0); } -int -wts_teardown(void) +void +wts_close() { WT_CONNECTION *conn; - WT_CURSOR *cursor, *cursor_insert; - WT_SESSION *session; - time_t now; int ret; conn = g.wts_conn; - cursor = g.wts_cursor; - cursor_insert = g.wts_cursor_insert; - session = g.wts_session; - - if (g.logging == LOG_OPS) { - (void)time(&now); - (void)session->msg_printf(session, - "===============\nWT stop: %s===============", - ctime(&now)); - } - /* - * Close the open cursors -- they will block sync. - */ - if ((cursor_insert != NULL && - (ret = cursor_insert->close(cursor_insert)) != 0) || - (cursor != NULL && (ret = cursor->close(cursor)) != 0)) - die("cursor.close", ret); + wts_sync(); - ret = wts_sync(); - return (wts_close(conn) ? 1 : ret); + if ((ret = conn->close(conn, NULL)) != 0) + die(ret, "connection.close"); } -int +void wts_dump(const char *tag, int dump_bdb) { - char cmd[128]; + int ret; + char cmd[256]; - track("dump files and compare", 0ULL); + track("dump files and compare", 0ULL, NULL); switch (g.c_file_type) { case FIX: case VAR: @@ -258,84 +160,87 @@ wts_dump(const char *tag, int dump_bdb) snprintf(cmd, sizeof(cmd), "sh ./s_dumpcmp%s", dump_bdb ? " -b" : ""); break; - default: - return (1); - } - if (system(cmd) != 0) { - fprintf(stderr, - "%s: %s dump comparison failed\n", g.progname, tag); - return (1); } - - return (0); + if ((ret = system(cmd)) != 0) + die(ret, "%s: dump comparison failed", tag); } -int +void wts_salvage(void) { WT_CONNECTION *conn; WT_SESSION *session; int ret; - track("salvage", 0ULL); + conn = g.wts_conn; - if (wts_open(&conn, &session)) - return (1); + track("salvage", 0ULL, NULL); - if ((ret = session->salvage(session, WT_TABLENAME, NULL)) != 0) { - fprintf(stderr, "%s: salvage: %s\n", - g.progname, wiredtiger_strerror(ret)); - return (1); - } - - return (wts_close(conn)); + /* + * Save a copy of the interesting files so we can replay the salvage + * step as necessary. + */ + if ((ret = system( + "rm -rf __slvg.copy && " + "mkdir __slvg.copy && " + "cp WiredTiger* __wt __slvg.copy/")) != 0) + die(ret, "salvage cleanup step failed"); + + if ((ret = conn->open_session(conn, NULL, NULL, &session)) != 0) + die(ret, "connection.open_session"); + if ((ret = session->salvage(session, WT_TABLENAME, NULL)) != 0) + die(ret, "session.salvage: %s", WT_TABLENAME); + if ((ret = session->close(session, NULL)) != 0) + die(ret, "session.close"); } -static int +static void wts_sync(void) { + WT_CONNECTION *conn; WT_SESSION *session; int ret; - session = g.wts_session; + conn = g.wts_conn; - track("sync", 0ULL); + track("sync", 0ULL, NULL); + if ((ret = conn->open_session(conn, NULL, NULL, &session)) != 0) + die(ret, "connection.open_session"); if ((ret = session->sync( - session, WT_TABLENAME, NULL)) != 0 && ret != EBUSY) { - fprintf(stderr, "%s: sync: %s\n", - g.progname, wiredtiger_strerror(ret)); - return (1); - } - return (0); + session, WT_TABLENAME, NULL)) != 0 && ret != EBUSY) + die(ret, "session.sync: %s", WT_TABLENAME); + if ((ret = session->close(session, NULL)) != 0) + die(ret, "session.close"); } -int +void wts_verify(const char *tag) { WT_CONNECTION *conn; WT_SESSION *session; int ret; - track("verify", 0ULL); + conn = g.wts_conn; - if (wts_open(&conn, &session)) - return (1); + track("verify", 0ULL, NULL); + if ((ret = conn->open_session(conn, NULL, NULL, &session)) != 0) + die(ret, "connection.open_session"); if ((ret = session->verify(session, WT_TABLENAME, NULL)) != 0) - fprintf(stderr, "%s: %s verify: %s\n", - g.progname, tag, wiredtiger_strerror(ret)); - - return (wts_close(conn) ? 1 : ret); + die(ret, "session.verify: %s: %s", WT_TABLENAME, tag); + if ((ret = session->close(session, NULL)) != 0) + die(ret, "session.close"); } /* * wts_stats -- * Dump the run's statistics. */ -int +void wts_stats(void) { + WT_CONNECTION *conn; WT_CURSOR *cursor; WT_SESSION *session; FILE *fp; @@ -343,52 +248,48 @@ wts_stats(void) uint64_t v; int ret; - session = g.wts_session; + track("stat", 0ULL, NULL); - track("stat", 0ULL); + conn = g.wts_conn; + if ((ret = conn->open_session(conn, NULL, NULL, &session)) != 0) + die(ret, "connection.open_session"); if ((fp = fopen("__stats", "w")) == NULL) - die("__stats", errno); + die(errno, "fopen: __stats"); /* Connection statistics. */ if ((ret = session->open_cursor(session, - "statistics:", NULL, NULL, &cursor)) != 0) { - fprintf(stderr, "%s: stat cursor open failed: %s\n", - g.progname, wiredtiger_strerror(ret)); - return (1); - } + "statistics:", NULL, NULL, &cursor)) != 0) + die(ret, "session.open_cursor"); + while ((ret = cursor->next(cursor)) == 0 && (ret = cursor->get_value(cursor, &desc, &pval, &v)) == 0) - if (fprintf(fp, "%s=%s\n", desc, pval) < 0) { - ret = errno; - break; - } + if (fprintf(fp, "%s=%s\n", desc, pval) < 0) + die(errno, "fprintf"); if (ret != WT_NOTFOUND) - die("cursor.next", ret); + die(ret, "cursor.next"); if ((ret = cursor->close(cursor)) != 0) - die("cursor.close", ret); + die(ret, "cursor.close"); /* File statistics. */ if ((ret = session->open_cursor(session, - "statistics:" WT_TABLENAME, NULL, NULL, &cursor)) != 0) { - fprintf(stderr, "%s: stat cursor open failed: %s\n", - g.progname, wiredtiger_strerror(ret)); - return (1); - } + "statistics:" WT_TABLENAME, NULL, NULL, &cursor)) != 0) + die(ret, "session.open_cursor"); + while ((ret = cursor->next(cursor)) == 0 && (ret = cursor->get_value(cursor, &desc, &pval, &v)) == 0) - if (fprintf(fp, "%s=%s\n", desc, pval) < 0) { - ret = errno; - break; - } + if (fprintf(fp, "%s=%s\n", desc, pval) < 0) + die(errno, "fprintf"); if (ret != WT_NOTFOUND) - die("cursor.next", ret); + die(ret, "cursor.next"); if ((ret = cursor->close(cursor)) != 0) - die("cursor.close", ret); + die(ret, "cursor.close"); - (void)fclose(fp); + if ((ret = fclose(fp)) != 0) + die(ret, "fclose"); - return (0); + if ((ret = session->close(session, NULL)) != 0) + die(ret, "session.close"); } diff --git a/test/format/wts_bulk.c b/test/format/wts_bulk.c index 2d514a4878b..9a1ffb55047 100644 --- a/test/format/wts_bulk.c +++ b/test/format/wts_bulk.c @@ -7,19 +7,20 @@ #include "format.h" -static int bulk(WT_ITEM **, WT_ITEM **); - -int -wts_bulk_load(void) +void +wts_load(void) { + WT_CONNECTION *conn; WT_CURSOR *cursor; + WT_ITEM key, value; WT_SESSION *session; - WT_ITEM *key, *value; - uint64_t insert_count; + uint8_t *keybuf, *valbuf; int ret; - session = g.wts_session; - key = value = NULL; /* -Wuninitialized */ + conn = g.wts_conn; + + if ((ret = conn->open_session(conn, NULL, NULL, &session)) != 0) + die(ret, "connection.open_session"); /* * Avoid bulk load with a custom collator, because the order of @@ -27,90 +28,78 @@ wts_bulk_load(void) */ if ((ret = session->open_cursor(session, WT_TABLENAME, NULL, (g.c_file_type == ROW && g.c_reverse) ? NULL : "bulk", - &cursor)) != 0) { - fprintf(stderr, "%s: cursor open failed: %s\n", - g.progname, wiredtiger_strerror(ret)); - return (1); - } + &cursor)) != 0) + die(ret, "session.open_cursor"); - insert_count = 0; - while (bulk(&key, &value) == 0) { - /* Report on progress every 100 inserts. */ - if (++insert_count % 100 == 0) - track("bulk load", insert_count); - - if (key != NULL) - cursor->set_key(cursor, key); - if (g.c_file_type == FIX) - cursor->set_value(cursor, *(uint8_t *)value->data); - else - cursor->set_value(cursor, value); - if ((ret = cursor->insert(cursor)) != 0) { - fprintf(stderr, "%s: cursor insert failed: %s\n", - g.progname, wiredtiger_strerror(ret)); - ret = 1; - goto err; + /* Set up the default key buffer. */ + memset(&key, 0, sizeof(key)); + key_gen_setup(&keybuf); + memset(&value, 0, sizeof(value)); + val_gen_setup(&valbuf); + + for (;;) { + if (++g.key_cnt > g.c_rows) { + g.key_cnt = g.rows = g.c_rows; + break; } - } -err: (void)cursor->close(cursor); - return (ret); -} + /* Report on progress every 100 inserts. */ + if (g.key_cnt % 100 == 0) + track("bulk load", g.key_cnt, NULL); -/* - * bulk -- - * WiredTiger bulk load routine. - */ -static int -bulk(WT_ITEM **keyp, WT_ITEM **valuep) -{ - static WT_ITEM key, value; - WT_SESSION *session; + key_gen(keybuf, &key.size, (uint64_t)g.key_cnt, 0); + key.data = keybuf; + value_gen(valbuf, &value.size, (uint64_t)g.key_cnt); + value.data = valbuf; - session = g.wts_session; + switch (g.c_file_type) { + case FIX: + if (g.logging == LOG_OPS) + (void)session->msg_printf(session, + "%-10s %" PRIu32 " {0x%02" PRIx8 "}", + "bulk V", + g.key_cnt, ((uint8_t *)value.data)[0]); + cursor->set_value(cursor, *(uint8_t *)value.data); + break; + case VAR: + cursor->set_value(cursor, &value); + if (g.logging == LOG_OPS) + (void)session->msg_printf(session, + "%-10s %" PRIu32 " {%.*s}", "bulk V", + g.key_cnt, + (int)value.size, (char *)value.data); + break; + case ROW: + cursor->set_key(cursor, &key); + if (g.logging == LOG_OPS) + (void)session->msg_printf(session, + "%-10s %" PRIu32 " {%.*s}", "bulk K", + g.key_cnt, (int)key.size, (char *)key.data); + cursor->set_value(cursor, &value); + if (g.logging == LOG_OPS) + (void)session->msg_printf(session, + "%-10s %" PRIu32 " {%.*s}", "bulk V", + g.key_cnt, + (int)value.size, (char *)value.data); + break; + } - if (++g.key_cnt > g.c_rows) { - g.key_cnt = g.rows = g.c_rows; - return (1); - } + if ((ret = cursor->insert(cursor)) != 0) + die(ret, "cursor.insert"); + + if (!SINGLETHREADED) + continue; - key_gen(&key.data, &key.size, (uint64_t)g.key_cnt, 0); - value_gen(&value.data, &value.size, (uint64_t)g.key_cnt); - - switch (g.c_file_type) { - case FIX: - *keyp = NULL; - *valuep = &value; - if (g.logging == LOG_OPS) - (void)session->msg_printf(session, - "%-10s %" PRIu32 " {0x%02" PRIx8 "}", - "bulk V", - g.key_cnt, ((uint8_t *)value.data)[0]); - break; - case VAR: - *keyp = NULL; - *valuep = &value; - if (g.logging == LOG_OPS) - (void)session->msg_printf(session, - "%-10s %" PRIu32 " {%.*s}", "bulk V", - g.key_cnt, (int)value.size, (char *)value.data); - break; - case ROW: - *keyp = &key; - if (g.logging == LOG_OPS) - (void)session->msg_printf(session, - "%-10s %" PRIu32 " {%.*s}", "bulk K", - g.key_cnt, (int)key.size, (char *)key.data); - *valuep = &value; - if (g.logging == LOG_OPS) - (void)session->msg_printf(session, - "%-10s %" PRIu32 " {%.*s}", "bulk V", - g.key_cnt, (int)value.size, (char *)value.data); - break; + /* Insert the item into BDB. */ + bdb_insert(key.data, key.size, value.data, value.size); } - /* Insert the item into BDB. */ - bdb_insert(key.data, key.size, value.data, value.size); + if ((ret = cursor->close(cursor)) != 0) + die(ret, "cursor.close"); + + if ((ret = session->close(session, NULL)) != 0) + die(ret, "session.close"); - return (0); + free(keybuf); + free(valbuf); } diff --git a/test/format/wts_ops.c b/test/format/wts_ops.c index b9f4b80772e..0a2c8e343da 100644 --- a/test/format/wts_ops.c +++ b/test/format/wts_ops.c @@ -7,34 +7,150 @@ #include "format.h" -static int wts_col_del(uint64_t, int *); -static int wts_col_insert(uint64_t *); -static int wts_col_put(uint64_t); -static int wts_notfound_chk(const char *, int, int, uint64_t); -static int wts_np(int, int, int *); -static int wts_read(uint64_t); -static int wts_row_del(uint64_t, int *); -static int wts_row_put(uint64_t, int); -static void wts_stream_item(const char *, WT_ITEM *); +static void col_del(WT_CURSOR *, WT_ITEM *, uint64_t, int *); +static void col_insert(WT_CURSOR *, WT_ITEM *, WT_ITEM *, uint64_t *); +static void col_put(WT_CURSOR *, WT_ITEM *, WT_ITEM *, uint64_t); +static void nextprev(WT_CURSOR *, int, int *); +static int notfound_chk(const char *, int, int, uint64_t); +static void *ops(void *); +static void read_row(WT_CURSOR *, WT_ITEM *, uint64_t); +static void row_del(WT_CURSOR *, WT_ITEM *, uint64_t, int *); +static void row_put(WT_CURSOR *, WT_ITEM *, WT_ITEM *, uint64_t, int); +static void print_item(const char *, WT_ITEM *); /* * wts_ops -- - * Perform a number of operations. + * Perform a number of operations in a set of threads. */ -int +void wts_ops(void) { + TINFO *tinfo, total; + WT_CONNECTION *conn; + WT_SESSION *session; + time_t now; + int i, ret, running; + + conn = g.wts_conn; + + /* Open a session. */ + if (g.logging == LOG_OPS) { + if ((ret = conn->open_session(conn, NULL, NULL, &session)) != 0) + die(ret, "connection.open_session"); + + (void)time(&now); + (void)session->msg_printf(session, + "===============\nthread ops start: %s===============", + ctime(&now)); + } + + if (g.threads == 1) { + memset(&total, 0, sizeof(total)); + (void)ops(&total); + } else { + /* Create thread structure. */ + if ((tinfo = calloc((size_t)g.threads, sizeof(*tinfo))) == NULL) + die(errno, "calloc"); + for (i = 0; i < g.threads; ++i) + if ((ret = pthread_create( + &tinfo[i].tid, NULL, ops, &tinfo[i])) != 0) + die(ret, "pthread_create"); + + /* Wait for the threads. */ + for (;;) { + total.search = + total.insert = total.remove = total.update = 0; + for (i = running = 0; i < g.threads; ++i) { + total.search += tinfo[i].search; + total.insert += tinfo[i].insert; + total.remove += tinfo[i].remove; + total.update += tinfo[i].update; + switch (tinfo[i].state) { + case TINFO_RUNNING: + running = 1; + break; + case TINFO_COMPLETE: + tinfo[i].state = TINFO_JOINED; + (void)pthread_join(tinfo[i].tid, NULL); + break; + case TINFO_JOINED: + break; + } + } + track("read/write ops", 0ULL, &total); + if (!running) + break; + usleep(750000); /* 3/4 of a second */ + } + } + + if (g.logging == LOG_OPS) { + (void)time(&now); + (void)session->msg_printf(session, + "===============\nthread ops stop: %s===============", + ctime(&now)); + + if ((ret = session->close(session, NULL)) != 0) + die(ret, "session.close"); + } +} + +static void * +ops(void *arg) +{ + TINFO *tinfo; + WT_CONNECTION *conn; + WT_CURSOR *cursor, *cursor_insert; + WT_SESSION *session; + WT_ITEM key, value; uint64_t cnt, keyno; uint32_t op; u_int np; - int dir, insert, notfound; + int dir, insert, notfound, ret; + uint8_t *keybuf, *valbuf; + + conn = g.wts_conn; + + tinfo = arg; + tinfo->state = TINFO_RUNNING; + + /* Set up the default key and value buffers. */ + memset(&key, 0, sizeof(key)); + key_gen_setup(&keybuf); + memset(&value, 0, sizeof(value)); + val_gen_setup(&valbuf); + + /* Open a session. */ + if ((ret = conn->open_session(conn, NULL, NULL, &session)) != 0) + die(ret, "connection.open_session"); + + /* + * Open two cursors: one configured for overwriting and one configured + * for append if we're dealing with a column-store. + * + * The reason is when testing with existing records, we don't track if + * a record was deleted or not, which means we must use cursor->insert + * with overwriting configured. But, in column-store files where we're + * testing with new, appended records, we don't want to have to specify + * the record number, which requires an append configuration. + */ + if ((ret = session->open_cursor(session, + WT_TABLENAME, NULL, "overwrite", &cursor)) != 0) + die(ret, "session.open_cursor"); + if ((g.c_file_type == FIX || g.c_file_type == VAR) && + (ret = session->open_cursor(session, + WT_TABLENAME, NULL, "append", &cursor_insert)) != 0) + die(ret, "session.open_cursor"); for (cnt = 0; cnt < g.c_ops; ++cnt) { - if (cnt % 10 == 0) - track("read/write ops", cnt); + if (SINGLETHREADED && cnt % 100 == 0) + track("read/write ops", 0ULL, tinfo); insert = notfound = 0; + keyno = MMRAND(1, g.rows); + key.data = keybuf; + value.data = valbuf; /* * Perform some number of operations: the percentage of deletes, @@ -45,50 +161,52 @@ wts_ops(void) */ op = (uint32_t)(wts_rand() % 100); if (op < g.c_delete_pct) { + ++tinfo->remove; switch (g.c_file_type) { case ROW: /* * If deleting a non-existent record, the cursor * won't be positioned, and so can't do a next. */ - if (wts_row_del(keyno, ¬found)) - return (1); + row_del(cursor, &key, keyno, ¬found); break; case FIX: case VAR: - if (wts_col_del(keyno, ¬found)) - return (1); + col_del(cursor, &key, keyno, ¬found); break; } } else if (op < g.c_delete_pct + g.c_insert_pct) { + ++tinfo->insert; switch (g.c_file_type) { case ROW: - if (wts_row_put(keyno, 1)) - return (1); + row_put(cursor, &key, &value, keyno, 1); break; case FIX: case VAR: - if (wts_col_insert(&keyno)) - return (1); + /* + * Reset the standard cursor so it doesn't keep + * pages pinned. + */ + cursor->reset(cursor); + col_insert(cursor_insert, &key, &value, &keyno); insert = 1; break; } } else if ( op < g.c_delete_pct + g.c_insert_pct + g.c_write_pct) { + ++tinfo->update; switch (g.c_file_type) { case ROW: - if (wts_row_put(keyno, 0)) - return (1); + row_put(cursor, &key, &value, keyno, 0); break; case FIX: case VAR: - if (wts_col_put(keyno)) - return (1); + col_put(cursor, &key, &value, keyno); break; } } else { - if (wts_read(keyno)) - return (1); + ++tinfo->search; + read_row(cursor, &key, keyno); continue; } @@ -100,30 +218,54 @@ wts_ops(void) for (np = 0; np < MMRAND(1, 8); ++np) { if (notfound) break; - if (wts_np(dir, insert, ¬found)) - return (1); + nextprev( + insert ? cursor_insert : cursor, dir, ¬found); } - if (insert) { - WT_CURSOR *cursor = g.wts_cursor_insert; - cursor->reset(cursor); - } + if (insert) + cursor_insert->reset(cursor_insert); - /* Then read the value we modified to confirm it worked. */ - if (wts_read(keyno)) - return (1); + /* Read the value we modified to confirm the operation. */ + read_row(cursor, &key, keyno); } - return (0); + + if ((ret = session->close(session, NULL)) != 0) + die(ret, "session.close"); + + free(keybuf); + free(valbuf); + + tinfo->state = TINFO_COMPLETE; + return (NULL); } /* * wts_read_scan -- * Read and verify all elements in a file. */ -int +void wts_read_scan(void) { + WT_CONNECTION *conn; + WT_CURSOR *cursor; + WT_ITEM key; + WT_SESSION *session; uint64_t cnt, last_cnt; + uint8_t *keybuf; + int ret; + + conn = g.wts_conn; + + /* Set up the default key buffer. */ + memset(&key, 0, sizeof(key)); + key_gen_setup(&keybuf); + + /* Open a session and cursor pair. */ + if ((ret = conn->open_session(conn, NULL, NULL, &session)) != 0) + die(ret, "connection.open_session"); + if ((ret = session->open_cursor( + session, WT_TABLENAME, NULL, NULL, &cursor)) != 0) + die(ret, "session.open_cursor"); /* Check a random subset of the records using the key. */ for (last_cnt = cnt = 0; cnt < g.key_cnt;) { @@ -131,14 +273,18 @@ wts_read_scan(void) if (cnt > g.rows) cnt = g.rows; if (cnt - last_cnt > 1000) { - track("read row scan", cnt); + track("read row scan", cnt, NULL); last_cnt = cnt; } - if (wts_read(cnt)) - return (1); + key.data = keybuf; + read_row(cursor, &key, cnt); } - return (0); + + if ((ret = session->close(session, NULL)) != 0) + die(ret, "session.close"); + + free(keybuf); } #define NTF_CHK(a) do { \ @@ -146,37 +292,29 @@ wts_read_scan(void) case 0: \ break; \ case 1: \ - return (1); \ - case 2: \ - return (0); \ + return; \ } \ } while (0) /* - * wts_read -- + * read_row -- * Read and verify a single element in a row- or column-store file. */ -static int -wts_read(uint64_t keyno) +static void +read_row(WT_CURSOR *cursor, WT_ITEM *key, uint64_t keyno) { - static WT_ITEM key, value, bdb_value; - WT_CURSOR *cursor; + WT_ITEM bdb_value, value; WT_SESSION *session; int notfound, ret; uint8_t bitfield; - cursor = g.wts_cursor; - session = g.wts_session; + session = cursor->session; /* Log the operation */ if (g.logging == LOG_OPS) (void)session->msg_printf( session, "%-10s%" PRIu64, "read", keyno); - /* Retrieve the BDB value. */ - if (bdb_read(keyno, &bdb_value.data, &bdb_value.size, ¬found)) - return (1); - /* Retrieve the key/value pair by key. */ switch (g.c_file_type) { case FIX: @@ -184,8 +322,8 @@ wts_read(uint64_t keyno) cursor->set_key(cursor, keyno); break; case ROW: - key_gen(&key.data, &key.size, keyno, 0); - cursor->set_key(cursor, &key); + key_gen((uint8_t *)key->data, &key->size, keyno, 0); + cursor->set_key(cursor, key); break; } @@ -194,14 +332,20 @@ wts_read(uint64_t keyno) ret = cursor->get_value(cursor, &bitfield); value.data = &bitfield; value.size = 1; - } else + } else { + memset(&value, 0, sizeof(value)); ret = cursor->get_value(cursor, &value); + } } - if (ret != 0 && ret != WT_NOTFOUND) { - fprintf(stderr, "%s: wts_read: read row %" PRIu64 ": %s\n", - g.progname, keyno, wiredtiger_strerror(ret)); - return (1); - } + if (ret != 0 && ret != WT_NOTFOUND) + die(ret, "read_row: read row %" PRIu64, keyno); + + if (!SINGLETHREADED) + return; + + /* Retrieve the BDB value. */ + memset(&bdb_value, 0, sizeof(bdb_value)); + bdb_read(keyno, &bdb_value.data, &bdb_value.size, ¬found); /* * Check for not-found status. @@ -215,29 +359,27 @@ wts_read(uint64_t keyno) ret = 0; } - NTF_CHK(wts_notfound_chk("wts_read", ret, notfound, keyno)); + NTF_CHK(notfound_chk("read_row", ret, notfound, keyno)); /* Compare the two. */ if (value.size != bdb_value.size || memcmp(value.data, bdb_value.data, value.size) != 0) { fprintf(stderr, - "wts_read: read row value mismatch %" PRIu64 ":\n", keyno); - wts_stream_item("bdb", &bdb_value); - wts_stream_item(" wt", &value); - return (1); + "read_row: read row value mismatch %" PRIu64 ":\n", keyno); + print_item("bdb", &bdb_value); + print_item(" wt", &value); + die(0, NULL); } - return (0); } /* - * wts_np -- + * nextprev -- * Read and verify the next/prev element in a row- or column-store file. */ -static int -wts_np(int next, int insert, int *notfoundp) +static void +nextprev(WT_CURSOR *cursor, int next, int *notfoundp) { - static WT_ITEM key, value, bdb_key, bdb_value; - WT_CURSOR *cursor; + WT_ITEM key, value, bdb_key, bdb_value; WT_SESSION *session; uint64_t keyno; int notfound, ret; @@ -245,16 +387,9 @@ wts_np(int next, int insert, int *notfoundp) const char *which; char *p; - cursor = insert ? g.wts_cursor_insert : g.wts_cursor; - session = g.wts_session; + session = cursor->session; which = next ? "next" : "prev"; - /* Retrieve the BDB value. */ - if (bdb_np(next, &bdb_key.data, &bdb_key.size, - &bdb_value.data, &bdb_value.size, ¬found)) - return (1); - *notfoundp = notfound; - keyno = 0; ret = next ? cursor->next(cursor) : cursor->prev(cursor); if (ret == 0) @@ -275,42 +410,45 @@ wts_np(int next, int insert, int *notfoundp) ret = cursor->get_value(cursor, &value); break; } - if (ret != 0 && ret != WT_NOTFOUND) { - fprintf(stderr, - "%s: wts_%s: %s\n", - g.progname, which, wiredtiger_strerror(ret)); - return (1); - } + if (ret != 0 && ret != WT_NOTFOUND) + die(ret, "%s", which); + *notfoundp = ret == WT_NOTFOUND; - NTF_CHK(wts_notfound_chk( - next ? "wts_np(next)" : "wts_np(prev)", ret, notfound, keyno)); + if (!SINGLETHREADED) + return; + + /* Retrieve the BDB value. */ + bdb_np(next, &bdb_key.data, &bdb_key.size, + &bdb_value.data, &bdb_value.size, ¬found); + NTF_CHK(notfound_chk( + next ? "nextprev(next)" : "nextprev(prev)", ret, notfound, keyno)); /* Compare the two. */ if (g.c_file_type == ROW) { if (key.size != bdb_key.size || memcmp(key.data, bdb_key.data, key.size) != 0) { - fprintf(stderr, "wts_np: %s key mismatch:\n", which); - wts_stream_item("bdb-key", &bdb_key); - wts_stream_item(" wt-key", &key); - return (1); + fprintf(stderr, "nextprev: %s key mismatch:\n", which); + print_item("bdb-key", &bdb_key); + print_item(" wt-key", &key); + die(0, NULL); } } else { if (keyno != (uint64_t)atoll(bdb_key.data)) { if ((p = strchr((char *)bdb_key.data, '.')) != NULL) *p = '\0'; fprintf(stderr, - "wts_np: %s key mismatch: %.*s != %" PRIu64 "\n", + "nextprev: %s key mismatch: %.*s != %" PRIu64 "\n", which, (int)bdb_key.size, (char *)bdb_key.data, keyno); - return (1); + die(0, NULL); } } if (value.size != bdb_value.size || memcmp(value.data, bdb_value.data, value.size) != 0) { - fprintf(stderr, "wts_np: %s value mismatch:\n", which); - wts_stream_item("bdb-value", &bdb_value); - wts_stream_item(" wt-value", &value); - return (1); + fprintf(stderr, "nextprev: %s value mismatch:\n", which); + print_item("bdb-value", &bdb_value); + print_item(" wt-value", &value); + die(0, NULL); } if (g.logging == LOG_OPS) @@ -332,71 +470,60 @@ wts_np(int next, int insert, int *notfoundp) which, keyno, (int)value.size, (char *)value.data); break; } - - return (0); } /* - * wts_row_put -- + * row_put -- * Update an element in a row-store file. */ -static int -wts_row_put(uint64_t keyno, int insert) +static void +row_put( + WT_CURSOR *cursor, WT_ITEM *key, WT_ITEM *value, uint64_t keyno, int insert) { - static WT_ITEM key, value; - WT_CURSOR *cursor; WT_SESSION *session; int notfound, ret; - cursor = g.wts_cursor; - session = g.wts_session; + session = cursor->session; - key_gen(&key.data, &key.size, keyno, insert); - value_gen(&value.data, &value.size, keyno); + key_gen((uint8_t *)key->data, &key->size, keyno, insert); + value_gen((uint8_t *)value->data, &value->size, keyno); /* Log the operation */ if (g.logging == LOG_OPS) (void)session->msg_printf(session, "%-10s{%.*s}\n%-10s{%.*s}", insert ? "insertK" : "putK", - (int)key.size, (char *)key.data, + (int)key->size, (char *)key->data, insert ? "insertV" : "putV", - (int)value.size, (char *)value.data); - - if (bdb_put(key.data, key.size, value.data, value.size, ¬found)) - return (1); + (int)value->size, (char *)value->data); - cursor->set_key(cursor, &key); - cursor->set_value(cursor, &value); + cursor->set_key(cursor, key); + cursor->set_value(cursor, value); ret = cursor->insert(cursor); - if (ret != 0 && ret != WT_NOTFOUND) { - fprintf(stderr, - "%s: wts_row_put: %s row %" PRIu64 " by key: %s\n", - g.progname, insert ? "insert" : "update", - keyno, wiredtiger_strerror(ret)); - return (1); - } + if (ret != 0 && ret != WT_NOTFOUND) + die(ret, + "row_put: %s row %" PRIu64 " by key", + insert ? "insert" : "update", keyno); - NTF_CHK(wts_notfound_chk("wts_row_put", ret, notfound, keyno)); - return (0); + if (!SINGLETHREADED) + return; + + bdb_put(key->data, key->size, value->data, value->size, ¬found); + NTF_CHK(notfound_chk("row_put", ret, notfound, keyno)); } /* - * wts_col_put -- + * col_put -- * Update an element in a column-store file. */ -static int -wts_col_put(uint64_t keyno) +static void +col_put(WT_CURSOR *cursor, WT_ITEM *key, WT_ITEM *value, uint64_t keyno) { - static WT_ITEM key, value; - WT_CURSOR *cursor; WT_SESSION *session; int notfound, ret; - cursor = g.wts_cursor; - session = g.wts_session; + session = cursor->session; - key_gen(&key.data, &key.size, keyno, 0); - value_gen(&value.data, &value.size, keyno); + value_gen((uint8_t *)value->data, &value->size, keyno); /* Log the operation */ if (g.logging == LOG_OPS) { @@ -404,201 +531,173 @@ wts_col_put(uint64_t keyno) (void)session->msg_printf(session, "%-10s%" PRIu64 " {0x%02" PRIx8 "}", "update", keyno, - ((uint8_t *)value.data)[0]); + ((uint8_t *)value->data)[0]); else (void)session->msg_printf(session, "%-10s%" PRIu64 " {%.*s}", "update", keyno, - (int)value.size, (char *)value.data); + (int)value->size, (char *)value->data); } cursor->set_key(cursor, keyno); if (g.c_file_type == FIX) - cursor->set_value(cursor, *(uint8_t *)value.data); + cursor->set_value(cursor, *(uint8_t *)value->data); else - cursor->set_value(cursor, &value); + cursor->set_value(cursor, value); ret = cursor->insert(cursor); - if (ret != 0 && ret != WT_NOTFOUND) { - fprintf(stderr, - "%s: wts_col_put: %" PRIu64 " : %s\n", - g.progname, keyno, wiredtiger_strerror(ret)); - return (1); - } + if (ret != 0 && ret != WT_NOTFOUND) + die(ret, "col_put: %" PRIu64, keyno); - if (bdb_put(key.data, key.size, value.data, value.size, ¬found)) - return (1); + if (!SINGLETHREADED) + return; - NTF_CHK(wts_notfound_chk("wts_col_put", ret, notfound, keyno)); - return (0); + key_gen((uint8_t *)key->data, &key->size, keyno, 0); + bdb_put(key->data, key->size, value->data, value->size, ¬found); + NTF_CHK(notfound_chk("col_put", ret, notfound, keyno)); } /* - * wts_col_insert -- + * col_insert -- * Insert an element in a column-store file. */ -static int -wts_col_insert(uint64_t *keynop) +static void +col_insert(WT_CURSOR *cursor, WT_ITEM *key, WT_ITEM *value, uint64_t *keynop) { - static WT_ITEM key, value; - WT_CURSOR *cursor; WT_SESSION *session; uint64_t keyno; int notfound, ret; - /* Reset the other cursor so it doesn't keep pages pinned. */ - cursor = g.wts_cursor; - cursor->reset(cursor); + session = cursor->session; - cursor = g.wts_cursor_insert; - session = g.wts_session; - - value_gen(&value.data, &value.size, g.rows + 1); + value_gen((uint8_t *)value->data, &value->size, g.rows + 1); if (g.c_file_type == FIX) - cursor->set_value(cursor, *(uint8_t *)value.data); + cursor->set_value(cursor, *(uint8_t *)value->data); else - cursor->set_value(cursor, &value); - ret = cursor->insert(cursor); - if (ret != 0) { - fprintf(stderr, "%s: wts_col_insert: %s\n", - g.progname, wiredtiger_strerror(ret)); - return (1); - } - if ((ret = cursor->get_key(cursor, &keyno)) != 0) { - fprintf(stderr, "%s: cursor->get_key: %s\n", - g.progname, wiredtiger_strerror(ret)); - return (1); - } - if (keyno <= g.rows) { - fprintf(stderr, - "%s: inserted key did not create new row\n", g.progname); - return (1); - } - *keynop = g.rows = (uint32_t)keyno; + cursor->set_value(cursor, value); + if ((ret = cursor->insert(cursor)) != 0) + die(ret, "cursor.insert"); + if ((ret = cursor->get_key(cursor, &keyno)) != 0) + die(ret, "cursor.get_key"); + *keynop = (uint32_t)keyno; + + /* + * Assign the maximum number of rows to the returned key: that key may + * not be the current maximum value, if we race with another thread, + * but that's OK, we just want it to keep increasing so we don't ignore + * records at the end of the table. + */ + g.rows = (uint32_t)keyno; if (g.logging == LOG_OPS) { if (g.c_file_type == FIX) (void)session->msg_printf(session, "%-10s%" PRIu64 " {0x%02" PRIx8 "}", "insert", keyno, - ((uint8_t *)value.data)[0]); + ((uint8_t *)value->data)[0]); else (void)session->msg_printf(session, "%-10s%" PRIu64 " {%.*s}", "insert", keyno, - (int)value.size, (char *)value.data); + (int)value->size, (char *)value->data); } - key_gen(&key.data, &key.size, keyno, 0); - return (bdb_put( - key.data, key.size, value.data, value.size, ¬found) ? 1 : 0); + if (!SINGLETHREADED) + return; + + key_gen((uint8_t *)key->data, &key->size, keyno, 0); + bdb_put(key->data, key->size, value->data, value->size, ¬found); } /* - * wts_row_del -- + * row_del -- * Delete an element from a row-store file. */ -static int -wts_row_del(uint64_t keyno, int *notfoundp) +static void +row_del(WT_CURSOR *cursor, WT_ITEM *key, uint64_t keyno, int *notfoundp) { - static WT_ITEM key; - WT_CURSOR *cursor; WT_SESSION *session; int notfound, ret; - *notfoundp = 0; - cursor = g.wts_cursor; - session = g.wts_session; + session = cursor->session; - key_gen(&key.data, &key.size, keyno, 0); + key_gen((uint8_t *)key->data, &key->size, keyno, 0); /* Log the operation */ if (g.logging == LOG_OPS) (void)session->msg_printf( session, "%-10s%" PRIu64, "remove", keyno); - if (bdb_del(keyno, ¬found)) - return (1); - *notfoundp = notfound; - - cursor->set_key(cursor, &key); + cursor->set_key(cursor, key); ret = cursor->remove(cursor); - if (ret != 0 && ret != WT_NOTFOUND) { - fprintf(stderr, - "%s: wts_row_del: remove %" PRIu64 " by key: %s\n", - g.progname, keyno, wiredtiger_strerror(ret)); - return (1); - } + if (ret != 0 && ret != WT_NOTFOUND) + die(ret, "row_del: remove %" PRIu64 " by key", keyno); + *notfoundp = ret == WT_NOTFOUND; - NTF_CHK(wts_notfound_chk("wts_row_del", ret, notfound, keyno)); - return (0); + if (!SINGLETHREADED) + return; + + bdb_del(keyno, ¬found); + NTF_CHK(notfound_chk("row_del", ret, notfound, keyno)); } /* - * wts_col_del -- + * col_del -- * Delete an element from a column-store file. */ -static int -wts_col_del(uint64_t keyno, int *notfoundp) +static void +col_del(WT_CURSOR *cursor, WT_ITEM *key, uint64_t keyno, int *notfoundp) { - static WT_ITEM key; - WT_CURSOR *cursor; WT_SESSION *session; int notfound, ret; - cursor = g.wts_cursor; - session = g.wts_session; + session = cursor->session; /* Log the operation */ if (g.logging == LOG_OPS) (void)session->msg_printf( session, "%-10s%" PRIu64, "remove", keyno); + cursor->set_key(cursor, keyno); + ret = cursor->remove(cursor); + if (ret != 0 && ret != WT_NOTFOUND) + die(ret, "col_del: remove %" PRIu64 " by key", keyno); + *notfoundp = ret == WT_NOTFOUND; + + if (!SINGLETHREADED) + return; + /* * Deleting a fixed-length item is the same as setting the bits to 0; * do the same thing for the BDB store. */ if (g.c_file_type == FIX) { - key_gen(&key.data, &key.size, keyno, 0); - if (bdb_put(key.data, key.size, "\0", 1, ¬found)) - return (1); - } else { - if (bdb_del(keyno, ¬found)) - return (1); - *notfoundp = notfound; - } + key_gen((uint8_t *)key->data, &key->size, keyno, 0); + bdb_put(key->data, key->size, "\0", 1, ¬found); + } else + bdb_del(keyno, ¬found); - cursor->set_key(cursor, keyno); - ret = cursor->remove(cursor); - if (ret != 0 && ret != WT_NOTFOUND) { - fprintf(stderr, - "%s: wts_col_del: remove %" PRIu64 " by key: %s\n", - g.progname, keyno, wiredtiger_strerror(ret)); - return (1); - } - - NTF_CHK(wts_notfound_chk("wts_col_del", ret, notfound, keyno)); - return (0); + NTF_CHK(notfound_chk("col_del", ret, notfound, keyno)); } /* - * wts_notfound_chk -- + * notfound_chk -- * Compare notfound returns for consistency. */ static int -wts_notfound_chk(const char *f, int wt_ret, int bdb_notfound, uint64_t keyno) +notfound_chk(const char *f, int wt_ret, int bdb_notfound, uint64_t keyno) { /* Check for not found status. */ - if (bdb_notfound) { - if (wt_ret == WT_NOTFOUND) - return (2); + if (bdb_notfound && wt_ret == WT_NOTFOUND) + return (1); + if (bdb_notfound) { fprintf(stderr, "%s: %s:", g.progname, f); if (keyno != 0) fprintf(stderr, " row %" PRIu64 ":", keyno); fprintf(stderr, " not found in Berkeley DB, found in WiredTiger\n"); - return (1); + die(0, NULL); } if (wt_ret == WT_NOTFOUND) { fprintf(stderr, "%s: %s:", g.progname, f); @@ -606,17 +705,17 @@ wts_notfound_chk(const char *f, int wt_ret, int bdb_notfound, uint64_t keyno) fprintf(stderr, " row %" PRIu64 ":", keyno); fprintf(stderr, " found in Berkeley DB, not found in WiredTiger\n"); - return (1); + die(0, NULL); } return (0); } /* - * wts_stream_item -- - * Dump a single data/size pair, with a tag. + * print_item -- + * Display a single data/size pair, with a tag. */ static void -wts_stream_item(const char *tag, WT_ITEM *item) +print_item(const char *tag, WT_ITEM *item) { static const char hex[] = "0123456789abcdef"; const uint8_t *data; |