diff options
author | Alex Gorrod <alexg@wiredtiger.com> | 2012-11-23 12:00:46 +1100 |
---|---|---|
committer | Alex Gorrod <alexg@wiredtiger.com> | 2012-11-23 12:00:46 +1100 |
commit | 91d9d8ef94513923a446f9fdab596aa2951ff1f2 (patch) | |
tree | 6252eb1893b55e05a4531bff3b1166a5d0669c3b | |
parent | 9676c86afceb0fa2cdaac4c97b5f7c6a0475f7b9 (diff) | |
parent | 43a336b64f8b8108c9f68dd807b65b56cbbfe358 (diff) | |
download | mongo-91d9d8ef94513923a446f9fdab596aa2951ff1f2.tar.gz |
Merge branch 'develop' into trickle
74 files changed, 892 insertions, 810 deletions
@@ -1,3 +1,40 @@ +WiredTiger release 1.3.8, 2012-11-22 +------------------------------------ + +This release improves the performance of LSM trees, changes how statistics are +reported and adds a shared cache implementation: + +New features and API changes: + +[232] Add a "size of checkpoint" statistic. + +* Add a shared cache pool implemention. Manages a single cache among + multiple databases within a process. + +* Merge statistics from file and LSM sources into a "data source" statistic + structure. Rename and regroup some shared stastistics. Add a helper to + the Python API to lookup in a cursor in a simple expression. + +* Add support for sub groups of options in configuration strings. + +Performance tuning for LSM trees: + +* Don't try to merge with a chunk that is much larger than a small chunk. + +* After an LSM merge, fault in some pages before the new tree goes live to + avoid stalling application threads. + +* Don't automatically fail inserts if the write generation check fails: + compare keys instead. + +* Switch the LSM tree lock to a read/write lock, so cursors can read the + state of the tree in parallel. + +Bug fixes: + +* Fix a bug where we could write past the end of a buffer after it was grown. + + WiredTiger release 1.3.7, 2012-11-09 ------------------------------------ @@ -1,6 +1,6 @@ -WiredTiger 1.3.7: (November 9, 2012) +WiredTiger 1.3.8: (November 22, 2012) -This is version 1.3.7 of WiredTiger. +This is version 1.3.8 of WiredTiger. WiredTiger documentation can be found at: @@ -1,6 +1,6 @@ WIREDTIGER_VERSION_MAJOR=1 WIREDTIGER_VERSION_MINOR=3 -WIREDTIGER_VERSION_PATCH=7 +WIREDTIGER_VERSION_PATCH=8 WIREDTIGER_VERSION="$WIREDTIGER_VERSION_MAJOR.$WIREDTIGER_VERSION_MINOR.$WIREDTIGER_VERSION_PATCH" WIREDTIGER_RELEASE_DATE=`date "+%B %e, %Y"` diff --git a/build_posix/aclocal/version-set.m4 b/build_posix/aclocal/version-set.m4 index c8401b642b2..6e09827b33f 100644 --- a/build_posix/aclocal/version-set.m4 +++ b/build_posix/aclocal/version-set.m4 @@ -2,8 +2,8 @@ dnl build by dist/s_version VERSION_MAJOR=1 VERSION_MINOR=3 -VERSION_PATCH=7 -VERSION_STRING='"WiredTiger 1.3.7: (November 9, 2012)"' +VERSION_PATCH=8 +VERSION_STRING='"WiredTiger 1.3.8: (November 22, 2012)"' AC_SUBST(VERSION_MAJOR) AC_SUBST(VERSION_MINOR) diff --git a/build_posix/aclocal/version.m4 b/build_posix/aclocal/version.m4 index 8eae311c9e8..b825142c170 100644 --- a/build_posix/aclocal/version.m4 +++ b/build_posix/aclocal/version.m4 @@ -1,2 +1,2 @@ dnl WiredTiger product version for AC_INIT. Maintained by dist/s_version -1.3.7 +1.3.8 diff --git a/dist/api_data.py b/dist/api_data.py index 47be9faa74a..61f8073db1c 100644 --- a/dist/api_data.py +++ b/dist/api_data.py @@ -565,8 +565,9 @@ flags = { 'SERVER_RUN' ], 'session' : [ - 'SESSION_SCHEMA_LOCKED', 'SESSION_INTERNAL', - 'SESSION_SALVAGE_QUIET_ERR' + 'SESSION_NO_CACHE_CHECK', + 'SESSION_SALVAGE_QUIET_ERR', + 'SESSION_SCHEMA_LOCKED', ], } diff --git a/dist/s_string.ok b/dist/s_string.ok index 7e9825ef609..0e8904ca9d0 100644 --- a/dist/s_string.ok +++ b/dist/s_string.ok @@ -53,6 +53,7 @@ Config CustomersPhone DATAITEMs DESC +DSRC DUPLICATEV Decrement EB diff --git a/dist/serial.py b/dist/serial.py index 8772c4bd6bc..46ebc86d0c2 100644 --- a/dist/serial.py +++ b/dist/serial.py @@ -20,6 +20,7 @@ Serial('col_append', [ SerialArg('uint32_t', 'write_gen'), SerialArg('WT_INSERT_HEAD **', 'insheadp'), SerialArg('WT_INSERT ***', 'ins_stack'), + SerialArg('WT_INSERT **', 'next_stack'), SerialArg('WT_INSERT_HEAD **', 'new_inslist', 1), SerialArg('WT_INSERT_HEAD *', 'new_inshead', 1), SerialArg('WT_INSERT *', 'new_ins', 1), @@ -31,6 +32,7 @@ Serial('insert', [ SerialArg('uint32_t', 'write_gen'), SerialArg('WT_INSERT_HEAD **', 'inshead'), SerialArg('WT_INSERT ***', 'ins_stack'), + SerialArg('WT_INSERT **', 'next_stack'), SerialArg('WT_INSERT_HEAD **', 'new_inslist', 1), SerialArg('WT_INSERT_HEAD *', 'new_inshead', 1), SerialArg('WT_INSERT *', 'new_ins', 1), diff --git a/dist/stat.py b/dist/stat.py index bbaaa08b06c..5d294f51fce 100644 --- a/dist/stat.py +++ b/dist/stat.py @@ -1,24 +1,20 @@ # Read the source files and output the statistics #defines and allocation code. import re, string, sys, textwrap -from operator import attrgetter from dist import compare_srcfile from dist import source_paths_list # Read the source files. -from stat_data import btree_stats, connection_stats, lsm_stats +from stat_data import dsrc_stats, connection_stats -# print_struct -- -# Print the structures for the stat.h file. -def print_struct(title, name, list): +def print_struct(title, name, stats): + '''Print the structures for the stat.h file.''' f.write('/*\n') - f.write(' * Statistics entries for ' + title + ' handle.\n') + f.write(' * Statistics entries for ' + title + '.\n') f.write(' */\n') f.write('struct __wt_' + name + '_stats {\n') - # Sort the structure fields by their description, so the eventual - # disply is sorted by string. - for l in sorted(list, key=attrgetter('desc')): + for l in stats: f.write('\tWT_STATS ' + l.name + ';\n') f.write('};\n\n') @@ -35,59 +31,41 @@ for line in open('../src/include/stat.h', 'r'): elif line.count('Statistics section: BEGIN'): f.write('\n') skip = 1 - print_struct('BTREE', 'btree', btree_stats) - print_struct('CONNECTION', 'connection', connection_stats) - print_struct('LSM', 'lsm', lsm_stats) + print_struct('data sources', 'dsrc', dsrc_stats) + print_struct('connections', 'connection', connection_stats) f.close() compare_srcfile(tmp_file, '../src/include/stat.h') -# print_define -- -# Print the #defines for the wiredtiger.in file. -def print_define(): - # Sort the structure fields by their description so they match - # the structure lists. +def print_defines(): + '''Print the #defines for the wiredtiger.in file.''' f.write(''' /*! - * @name Statistics for connection handles + * @name Connection statistics * @anchor statistics_keys * @anchor statistics_conn - * Statistics in WiredTiger are accessed through cursors with \c "statistics:" - * URIs. Individual statistics can be queried through the cursor using the - * following keys. + * Statistics are accessed through cursors with \c "statistics:" URIs. + * Individual statistics can be queried through the cursor using the following + * keys. See @ref data_statistics for more information. * @{ */ ''') - for v, l in enumerate(sorted(connection_stats, key=attrgetter('desc'))): + for v, l in enumerate(connection_stats): f.write('/*! %s */\n' % '\n * '.join(textwrap.wrap(l.desc, 70))) - f.write('#define\tWT_STAT_' + l.name + "\t" * - max(1, 6 - int((len('WT_STAT_') + len(l.name)) / 8)) + + f.write('#define\tWT_STAT_CONN_' + l.name.upper() + "\t" * + max(1, 6 - int((len('WT_STAT_CONN_' + l.name)) / 8)) + str(v) + '\n') f.write(''' /*! * @} - * @name Statistics for file objects - * @anchor statistics_file + * @name Statistics for data sources + * @anchor statistics_dsrc * @{ */ ''') - for v, l in enumerate(sorted(btree_stats, key=attrgetter('desc'))): + for v, l in enumerate(dsrc_stats): f.write('/*! %s */\n' % '\n * '.join(textwrap.wrap(l.desc, 70))) - f.write('#define\tWT_STAT_' + l.name + "\t" * - max(1, 6 - int((len('WT_STAT_') + len(l.name)) / 8)) + - str(v) + '\n') - f.write('/*! @} */\n') - f.write(''' -/*! - * @} - * @name Statistics for lsm objects - * @anchor statistics_lsm - * @{ - */ -''') - for v, l in enumerate(sorted(lsm_stats, key=attrgetter('desc'))): - f.write('/*! %s */\n' % '\n * '.join(textwrap.wrap(l.desc, 70))) - f.write('#define\tWT_STAT_' + l.name + "\t" * - max(1, 6 - int((len('WT_STAT_') + len(l.name)) / 8)) + + f.write('#define\tWT_STAT_DSRC_' + l.name.upper() + "\t" * + max(1, 6 - int((len('WT_STAT_DSRC_' + l.name)) / 8)) + str(v) + '\n') f.write('/*! @} */\n') @@ -104,14 +82,13 @@ for line in open('../src/include/wiredtiger.in', 'r'): elif line.count('Statistics section: BEGIN'): f.write(' */\n') skip = 1 - print_define() + print_defines() f.write('/*\n') f.close() compare_srcfile(tmp_file, '../src/include/wiredtiger.in') -# print_func -- -# Print the functions for the stat.c file. def print_func(name, list): + '''Print the functions for the stat.c file.''' f.write(''' int __wt_stat_alloc_''' + name + '''_stats(WT_SESSION_IMPL *session, WT_''' + @@ -145,7 +122,7 @@ __wt_stat_clear_''' + name + '''_stats(WT_STATS *stats_arg) for l in sorted(list): # Items marked permanent aren't cleared by the stat clear # methods. - if not l.config.count('perm'): + if not l.flags.get('perm', 0): f.write('\tstats->' + l.name + '.v = 0;\n'); f.write('}\n') @@ -154,8 +131,7 @@ f = open(tmp_file, 'w') f.write('/* DO NOT EDIT: automatically built by dist/stat.py. */\n\n') f.write('#include "wt_internal.h"\n') -print_func('btree', btree_stats) +print_func('dsrc', dsrc_stats) print_func('connection', connection_stats) -print_func('lsm', lsm_stats) f.close() compare_srcfile(tmp_file, '../src/support/stat.c') diff --git a/dist/stat_data.py b/dist/stat_data.py index f7663bd3c42..2129b72dfa9 100644 --- a/dist/stat_data.py +++ b/dist/stat_data.py @@ -5,11 +5,13 @@ # are: # perm -- Field is not cleared by the stat clear function. +from operator import attrgetter + class Stat: - def __init__(self, name, desc, config=None): + def __init__(self, name, desc, **flags): self.name = name self.desc = desc - self.config = config or [] + self.flags = flags def __cmp__(self, other): return cmp(self.name, other.name) @@ -20,14 +22,18 @@ class Stat: connection_stats = [ Stat('block_read', 'blocks read from a file'), Stat('block_write', 'blocks written to a file'), - Stat('cache_bytes_inuse', 'cache: bytes currently held in the cache', 'perm'), - Stat('cache_bytes_max', 'cache: maximum bytes configured', 'perm'), - Stat('cache_evict_hazard', 'cache: pages selected for eviction not evicted because of a hazard reference'), + Stat('cache_bytes_inuse', + 'cache: bytes currently held in the cache', perm=1), + Stat('cache_bytes_max', 'cache: maximum bytes configured', perm=1), + Stat('cache_evict_hazard', 'cache: pages selected for eviction not ' + + 'evicted because of a hazard reference'), Stat('cache_evict_internal', 'cache: internal pages evicted'), Stat('cache_evict_modified', 'cache: modified pages evicted'), - Stat('cache_evict_slow', 'cache: eviction server unable to reach eviction goal'), + Stat('cache_evict_slow', + 'cache: eviction server unable to reach eviction goal'), Stat('cache_evict_unmodified', 'cache: unmodified pages evicted'), - Stat('cache_pages_inuse', 'cache: pages currently held in the cache', 'perm'), + Stat('cache_pages_inuse', + 'cache: pages currently held in the cache', perm=1), Stat('checkpoint', 'checkpoints'), Stat('cond_wait', 'condition wait calls'), Stat('file_open', 'files currently open'), @@ -44,28 +50,28 @@ connection_stats = [ Stat('txn_rollback', 'transactions rolled-back'), ] +connection_stats = sorted(connection_stats, key=attrgetter('name')) + ########################################## -# BTREE statistics +# Data source statistics ########################################## -btree_stats = [ - Stat('alloc', 'file: block allocations'), - Stat('cursor_inserts', 'cursor-inserts'), +dsrc_stats = [ + Stat('block_alloc', 'block allocations'), + Stat('block_extend', 'block allocations required file extension'), + Stat('block_free', 'block frees'), + Stat('ckpt_size', 'checkpoint size'), + Stat('cursor_insert', 'cursor-inserts'), Stat('cursor_read', 'cursor-read'), Stat('cursor_read_near', 'cursor-read-near'), Stat('cursor_read_next', 'cursor-read-next'), Stat('cursor_read_prev', 'cursor-read-prev'), - Stat('cursor_removes', 'cursor-removes'), - Stat('cursor_resets', 'cursor-resets'), - Stat('cursor_updates', 'cursor-updates'), - Stat('extend', 'file: block allocations required file extension'), + Stat('cursor_remove', 'cursor-removes'), + Stat('cursor_reset', 'cursor-resets'), + Stat('cursor_update', 'cursor-updates'), + Stat('entries', 'total entries'), Stat('file_allocsize', 'page size allocation unit'), Stat('file_bulk_loaded', 'bulk-loaded entries'), - Stat('file_col_deleted', 'column-store deleted values'), - Stat('file_col_fix_pages', 'column-store fixed-size leaf pages'), - Stat('file_col_int_pages', 'column-store internal pages'), - Stat('file_col_var_pages', 'column-store variable-size leaf pages'), Stat('file_compact_rewrite', 'pages rewritten by compaction'), - Stat('file_entries', 'total entries'), Stat('file_fixed_len', 'fixed-record size'), Stat('file_magic', 'magic number'), Stat('file_major', 'major version number'), @@ -74,49 +80,48 @@ btree_stats = [ Stat('file_maxleafitem', 'maximum leaf page item size'), Stat('file_maxleafpage', 'maximum leaf page size'), Stat('file_minor', 'minor version number'), - Stat('file_overflow', 'overflow pages'), - Stat('file_row_int_pages', 'row-store internal pages'), - Stat('file_row_leaf_pages', 'row-store leaf pages'), - Stat('file_size', 'file: size'), - Stat('file_write_conflicts', 'write generation conflicts'), - Stat('free', 'file: block frees'), - Stat('overflow_read', 'file: overflow pages read from the file'), - Stat('overflow_value_cache', 'file: overflow values cached in memory'), - Stat('page_evict', 'file: pages evicted from the file'), - Stat('page_evict_fail', 'file: pages that were selected for eviction that could not be evicted'), - Stat('page_read', 'file: pages read from the file'), - Stat('page_write', 'file: pages written to the file'), + Stat('file_size', 'file size'), + Stat('page_col_deleted', 'column-store deleted values'), + Stat('page_col_fix', 'column-store fixed-size leaf pages'), + Stat('page_col_int', 'column-store internal pages'), + Stat('page_col_var', 'column-store variable-size leaf pages'), + Stat('page_evict', 'pages evicted from the data source'), + Stat('page_evict_fail', + 'pages that were selected for eviction that could not be evicted'), + Stat('page_read', 'pages read into cache'), + Stat('page_row_int', 'row-store internal pages'), + Stat('page_row_leaf', 'row-store leaf pages'), + Stat('page_write', 'pages written from cache'), + Stat('overflow_page', 'overflow pages'), + Stat('overflow_read', 'overflow pages read into cache'), + Stat('overflow_value_cache', 'overflow values cached in memory'), Stat('rec_dictionary', 'reconcile: dictionary match'), - Stat('rec_hazard', 'reconcile: unable to acquire hazard reference'), - Stat('rec_ovfl_key', 'reconcile: overflow key'), - Stat('rec_ovfl_value', 'reconcile: overflow value'), - Stat('rec_page_delete', 'reconcile: pages deleted'), - Stat('rec_page_merge', 'reconcile: deleted or temporary pages merged'), - Stat('rec_split_intl', 'reconcile: internal pages split'), - Stat('rec_split_leaf', 'reconcile: leaf pages split'), - Stat('rec_written', 'reconcile: pages written'), - Stat('update_conflict', 'update conflicts'), -] - + Stat('rec_hazard', 'reconciliation unable to acquire hazard reference'), + Stat('rec_ovfl_key', 'reconciliation overflow key'), + Stat('rec_ovfl_value', 'reconciliation overflow value'), + Stat('rec_page_delete', 'pages deleted'), + Stat('rec_page_merge', 'deleted or temporary pages merged'), + Stat('rec_split_intl', 'internal pages split'), + Stat('rec_split_leaf', 'leaf pages split'), + Stat('rec_written', 'pages written'), + Stat('txn_update_conflict', 'update conflicts'), + Stat('txn_write_conflict', 'write generation conflicts'), ########################################## # LSM statistics ########################################## -lsm_stats = [ - Stat('chunk_cache_evict', 'Number of pages evicted from LSM chunks'), - Stat('chunk_cache_read', 'Number of pages read into LSM chunks'), - Stat('bloom_false_positives', 'Number of bloom filter false positives'), - Stat('bloom_hits', 'Number of bloom filter hits'), - Stat('bloom_misses', 'Number of bloom filter misses'), - Stat('search_miss_no_bloom', 'Number of queries that could have benefited from a bloom filter that did not exist'), - Stat('bloom_space', 'Total space used by bloom filters'), - Stat('bloom_cache_evict', 'Number of bloom pages evicted from cache'), - Stat('bloom_cache_read', 'Number of bloom pages read into cache'), - Stat('chunk_count', 'Number of chunks in the LSM tree'), - Stat('bloom_count', 'Number of bloom filters in the LSM tree'), - Stat('cache_evict', 'Number of pages evicted from cache'), - Stat('cache_evict_fail', 'Number of pages selected for eviction that could not be evicted'), - Stat('cache_read', 'Number of pages read into cache'), - Stat('cache_write', 'Number of pages written from cache'), - Stat('generation_max', 'Highest merge generation in the LSM tree'), + Stat('bloom_false_positive', + 'Number of Bloom filter false positives'), + Stat('bloom_hit', 'Number of Bloom filter hits'), + Stat('bloom_miss', 'Number of Bloom filter misses'), + Stat('bloom_size', 'Total size of Bloom filters'), + Stat('bloom_page_evict', 'Number of Bloom pages evicted from cache'), + Stat('bloom_page_read', 'Number of Bloom pages read into cache'), + Stat('bloom_count', + 'Number of Bloom filters in the LSM tree'), + Stat('lsm_chunk_count', 'Number of chunks in the LSM tree'), + Stat('lsm_generation_max', 'Highest merge generation in the LSM tree'), + Stat('lsm_lookup_no_bloom', 'Number of queries that could have benefited ' + + 'from a Bloom filter that did not exist'), ] +dsrc_stats = sorted(dsrc_stats, key=attrgetter('name')) diff --git a/examples/c/ex_stat.c b/examples/c/ex_stat.c index 9399f45f775..6bada3d8d1c 100644 --- a/examples/c/ex_stat.c +++ b/examples/c/ex_stat.c @@ -122,7 +122,7 @@ print_overflow_pages(WT_SESSION *session) "statistics:file:access.wt", NULL, NULL, &cursor)) != 0) return (ret); - cursor->set_key(cursor, WT_STAT_file_overflow); + cursor->set_key(cursor, WT_STAT_DSRC_OVERFLOW_PAGE); ret = cursor->search(cursor); ret = cursor->get_value(cursor, &desc, &pvalue, &value); printf("%s=%s\n", desc, pvalue); diff --git a/lang/python/wiredtiger.i b/lang/python/wiredtiger.i index 163b7c9a740..7372213cb77 100644 --- a/lang/python/wiredtiger.i +++ b/lang/python/wiredtiger.i @@ -89,6 +89,7 @@ DESTRUCTOR(__wt_session, close) /* Don't require empty config strings. */ %typemap(default) const char *config { $1 = NULL; } +%typemap(default) WT_CURSOR *to_dup { $1 = NULL; } /* * Error returns other than WT_NOTFOUND generate an exception. @@ -334,7 +335,10 @@ typedef int int_void; @copydoc WT_CURSOR::get_key Returns only the first column.''' - return self.get_keys()[0] + k = self.get_keys() + if len(k) == 1: + return k[0] + return k def get_keys(self): '''get_keys(self) -> (object, ...) @@ -350,7 +354,10 @@ typedef int int_void; @copydoc WT_CURSOR::get_value Returns only the first column.''' - return self.get_values()[0] + v = self.get_values() + if len(v) == 1: + return v[0] + return v def get_values(self): '''get_values(self) -> (object, ...) @@ -362,6 +369,8 @@ typedef int int_void; '''set_key(self) -> None @copydoc WT_CURSOR::set_key''' + if len(args) == 1 and type(args[0]) == tuple: + args = args[0] if self.is_column: self._set_recno(long(args[0])) else: @@ -373,6 +382,8 @@ typedef int int_void; '''set_value(self) -> None @copydoc WT_CURSOR::set_value''' + if len(args) == 1 and type(args[0]) == tuple: + args = args[0] # Keep the Python string pinned self._value = pack(self.value_format, *args) self._set_value(self._value) @@ -383,6 +394,13 @@ typedef int int_void; if not hasattr(self, '_iterable'): self._iterable = IterableCursor(self) return self._iterable + + def __getitem__(self, key): + '''Python convenience for searching''' + self.set_key(key) + if self.search() != 0: + raise KeyError + return self.get_value() %} }; @@ -427,27 +445,30 @@ typedef int int_void; ## @} class stat: - """ a set of static defines used by statistics cursor """ - pass + '''keys for statistics cursors''' + + class conn: + '''keys for cursors on connection statistics''' + pass -class filestat: - """ a set of static defines used by statistics cursor """ - pass + class dsrc: + '''keys for cursors on data source statistics''' + pass import sys -# All names starting with 'WT_STAT_file_' are renamed to -# the wiredtiger.filestat class, those starting with 'WT_STAT_' are -# renamed to wiredtiger.stat . +# All names starting with 'WT_STAT_DSRC_' are renamed to +# the wiredtiger.stat.dsrc class, those starting with 'WT_STAT_CONN' are +# renamed to wiredtiger.stat.conn class. def _rename_with_prefix(prefix, toclass): curmodule = sys.modules[__name__] for name in dir(curmodule): if name.startswith(prefix): - shortname = name[len(prefix):] + shortname = name[len(prefix):].lower() setattr(toclass, shortname, getattr(curmodule, name)) delattr(curmodule, name) -_rename_with_prefix('WT_STAT_file_', filestat) -_rename_with_prefix('WT_STAT_', stat) +_rename_with_prefix('WT_STAT_CONN_', stat.conn) +_rename_with_prefix('WT_STAT_DSRC_', stat.dsrc) del _rename_with_prefix %} diff --git a/src/block/block_ckpt.c b/src/block/block_ckpt.c index a581945aab0..b4876ea00c1 100644 --- a/src/block/block_ckpt.c +++ b/src/block/block_ckpt.c @@ -87,7 +87,7 @@ __wt_block_checkpoint_load(WT_SESSION_IMPL *session, } WT_VERBOSE_ERR(session, ckpt, "%s: load-checkpoint: %s", block->name, - addr == NULL ? "[Empty]" : (char *)tmp->data); + addr == NULL ? "[Empty]" : (const char *)tmp->data); } /* If not loading a checkpoint from disk, we're done. */ @@ -112,8 +112,7 @@ __wt_block_checkpoint_load(WT_SESSION_IMPL *session, WT_ERR(__ckpt_string( session, block, addr, tmp)); } - WT_ERR( - __wt_verify_dsk(session, (char *)tmp->data, dsk)); + WT_ERR(__wt_verify_dsk(session, tmp->data, dsk)); } } @@ -393,7 +392,7 @@ __ckpt_process( session, block, ckpt->raw.data, tmp)); WT_VERBOSE_ERR(session, ckpt, "%s: delete-checkpoint: %s: %s", - block->name, ckpt->name, (char *)tmp->data); + block->name, ckpt->name, (const char *)tmp->data); } /* @@ -648,7 +647,7 @@ __ckpt_update( WT_ERR(__ckpt_string(session, block, ckpt->raw.data, tmp)); WT_VERBOSE_ERR(session, ckpt, "%s: create-checkpoint: %s: %s", - block->name, ckpt->name, (char *)tmp->data); + block->name, ckpt->name, (const char *)tmp->data); } err: __wt_scr_free(&tmp); diff --git a/src/block/block_ext.c b/src/block/block_ext.c index c2596823951..c11190dcc93 100644 --- a/src/block/block_ext.c +++ b/src/block/block_ext.c @@ -386,7 +386,7 @@ __wt_block_alloc( WT_EXT *ext; WT_SIZE *szp, **sstack[WT_SKIP_MAXDEPTH]; - WT_BSTAT_INCR(session, alloc); + WT_BSTAT_INCR(session, block_alloc); if (size % block->allocsize != 0) WT_RET_MSG(session, EINVAL, "cannot allocate a block size %" PRIdMAX " that is not " @@ -474,7 +474,7 @@ __wt_block_extend( *offp = fh->file_size; fh->file_size += size; - WT_BSTAT_INCR(session, extend); + WT_BSTAT_INCR(session, block_extend); WT_VERBOSE_RET(session, block, "file extend %" PRIdMAX "B @ %" PRIdMAX, (intmax_t)size, (intmax_t)*offp); @@ -495,7 +495,7 @@ __wt_block_free(WT_SESSION_IMPL *session, uint32_t cksum, size; WT_UNUSED(addr_size); - WT_BSTAT_INCR(session, free); + WT_BSTAT_INCR(session, block_free); /* Crack the cookie. */ WT_RET(__wt_block_buffer_to_addr(block, addr, &offset, &size, &cksum)); diff --git a/src/block/block_mgr.c b/src/block/block_mgr.c index d7f9ad8cb92..8601e2ef30c 100644 --- a/src/block/block_mgr.c +++ b/src/block/block_mgr.c @@ -48,7 +48,7 @@ __wt_bm_addr_stderr( WT_RET(__wt_scr_alloc(session, 0, &buf)); ret = __wt_block_addr_string(session, block, buf, addr, addr_size); if (ret == 0) - fprintf(stderr, "%s\n", (char *)buf->data); + fprintf(stderr, "%s\n", (const char *)buf->data); __wt_scr_free(&buf); return (ret); } diff --git a/src/block/block_open.c b/src/block/block_open.c index 58f8f74ad61..b43c7abcdc1 100644 --- a/src/block/block_open.c +++ b/src/block/block_open.c @@ -248,6 +248,7 @@ err: __wt_scr_free(&buf); void __wt_block_stat(WT_SESSION_IMPL *session, WT_BLOCK *block) { + WT_BSTAT_SET(session, ckpt_size, block->live.ckpt_size); WT_BSTAT_SET(session, file_size, block->fh->file_size); WT_BSTAT_SET(session, file_magic, WT_BLOCK_MAGIC); WT_BSTAT_SET(session, file_major, WT_BLOCK_MAJOR_VERSION); diff --git a/src/block/block_vrfy.c b/src/block/block_vrfy.c index f32a02892e2..8ca97328301 100644 --- a/src/block/block_vrfy.c +++ b/src/block/block_vrfy.c @@ -293,7 +293,7 @@ __wt_block_verify(WT_SESSION_IMPL *session, WT_BLOCK *block, WT_ITEM *buf, */ WT_RET(__wt_scr_alloc(session, 0, &tmp)); WT_ERR(__wt_block_addr_string(session, block, tmp, addr, addr_size)); - WT_ERR(__wt_verify_dsk(session, (char *)tmp->data, buf)); + WT_ERR(__wt_verify_dsk(session, (const char *)tmp->data, buf)); err: __wt_scr_free(&tmp); return (ret); diff --git a/src/bloom/bloom.c b/src/bloom/bloom.c index 796461230b7..8fa613b2ac9 100644 --- a/src/bloom/bloom.c +++ b/src/bloom/bloom.c @@ -250,7 +250,7 @@ __wt_bloom_hash_get(WT_BLOOM *bloom, WT_BLOOM_HASH *bhash) result = 0; for (i = 0; i < bloom->k; i++, h1 += h2) { /* - * Add 1 to the hash because Wired Tiger tables are 1 based and + * Add 1 to the hash because WiredTiger tables are 1 based and * the original bitstring array was 0 based. */ c->set_key(c, (h1 % bloom->m) + 1); diff --git a/src/btree/bt_bulk.c b/src/btree/bt_bulk.c index 10eee023ff8..3537fa8dcf2 100644 --- a/src/btree/bt_bulk.c +++ b/src/btree/bt_bulk.c @@ -154,7 +154,8 @@ __bulk_row_keycmp_err(WT_CURSOR_BULK *cbulk) WT_ERR_MSG(session, EINVAL, "bulk-load presented with out-of-order keys: %.*s compares smaller " "than previously inserted key %.*s", - (int)a->size, (char *)a->data, (int)b->size, (char *)b->data); + (int)a->size, (const char *)a->data, + (int)b->size, (const char *)b->data); err: __wt_scr_free(&a); __wt_scr_free(&b); diff --git a/src/btree/bt_curnext.c b/src/btree/bt_curnext.c index d08f113fd90..6bfc4a8ad89 100644 --- a/src/btree/bt_curnext.c +++ b/src/btree/bt_curnext.c @@ -100,7 +100,7 @@ __cursor_fix_next(WT_CURSOR_BTREE *cbt, int newpage) new_page: /* Check any insert list for a matching record. */ cbt->ins_head = WT_COL_UPDATE_SINGLE(cbt->page); cbt->ins = __col_insert_search( - cbt->ins_head, cbt->ins_stack, cbt->recno); + cbt->ins_head, cbt->ins_stack, cbt->next_stack, cbt->recno); if (cbt->ins != NULL && cbt->recno != WT_INSERT_RECNO(cbt->ins)) cbt->ins = NULL; diff --git a/src/btree/bt_curprev.c b/src/btree/bt_curprev.c index 650f09f1a87..e836615c4bc 100644 --- a/src/btree/bt_curprev.c +++ b/src/btree/bt_curprev.c @@ -54,7 +54,8 @@ restart: session, cbt, cbt->ins_head, &key)); } else cbt->ins = __col_insert_search(cbt->ins_head, - cbt->ins_stack, WT_INSERT_RECNO(current)); + cbt->ins_stack, cbt->next_stack, + WT_INSERT_RECNO(current)); } /* @@ -83,6 +84,7 @@ restart: if (ins == NULL || ins == current) for (; i >= 0; i--) { cbt->ins_stack[i] = NULL; + cbt->next_stack[i] = NULL; ins = cbt->ins_head->head[i]; if (ins != NULL && ins != current) break; @@ -96,12 +98,14 @@ restart: */ if (ins == NULL) { cbt->ins_stack[0] = NULL; + cbt->next_stack[0] = NULL; goto restart; } if (ins->next[i] != current) /* Stay at this level */ ins = ins->next[i]; else { /* Drop down a level */ cbt->ins_stack[i] = &ins->next[i]; + cbt->next_stack[i] = ins->next[i]; --i; } } @@ -231,7 +235,7 @@ __cursor_fix_prev(WT_CURSOR_BTREE *cbt, int newpage) new_page: /* Check any insert list for a matching record. */ cbt->ins_head = WT_COL_UPDATE_SINGLE(cbt->page); cbt->ins = __col_insert_search( - cbt->ins_head, cbt->ins_stack, cbt->recno); + cbt->ins_head, cbt->ins_stack, cbt->next_stack, cbt->recno); if (cbt->ins != NULL && cbt->recno != WT_INSERT_RECNO(cbt->ins)) cbt->ins = NULL; diff --git a/src/btree/bt_cursor.c b/src/btree/bt_cursor.c index 25d65d557a8..15a411eab0c 100644 --- a/src/btree/bt_cursor.c +++ b/src/btree/bt_cursor.c @@ -108,7 +108,7 @@ __wt_btcur_reset(WT_CURSOR_BTREE *cbt) WT_SESSION_IMPL *session; session = (WT_SESSION_IMPL *)cbt->iface.session; - WT_BSTAT_INCR(session, cursor_resets); + WT_BSTAT_INCR(session, cursor_reset); __cursor_leave(cbt); __cursor_search_clear(cbt); @@ -246,7 +246,7 @@ __wt_btcur_insert(WT_CURSOR_BTREE *cbt) btree = cbt->btree; cursor = &cbt->iface; session = (WT_SESSION_IMPL *)cursor->session; - WT_BSTAT_INCR(session, cursor_inserts); + WT_BSTAT_INCR(session, cursor_insert); if (btree->type == BTREE_ROW) WT_RET(__cursor_size_chk(session, &cursor->key)); @@ -336,7 +336,7 @@ __wt_btcur_remove(WT_CURSOR_BTREE *cbt) btree = cbt->btree; cursor = &cbt->iface; session = (WT_SESSION_IMPL *)cursor->session; - WT_BSTAT_INCR(session, cursor_removes); + WT_BSTAT_INCR(session, cursor_remove); if (btree->type == BTREE_ROW) WT_RET(__cursor_size_chk(session, &cursor->key)); @@ -398,7 +398,7 @@ __wt_btcur_update(WT_CURSOR_BTREE *cbt) btree = cbt->btree; cursor = &cbt->iface; session = (WT_SESSION_IMPL *)cursor->session; - WT_BSTAT_INCR(session, cursor_updates); + WT_BSTAT_INCR(session, cursor_update); if (btree->type == BTREE_ROW) WT_RET(__cursor_size_chk(session, &cursor->key)); diff --git a/src/btree/bt_debug.c b/src/btree/bt_debug.c index 98f457d4891..7bcb8acb6a8 100644 --- a/src/btree/bt_debug.c +++ b/src/btree/bt_debug.c @@ -893,7 +893,7 @@ __debug_cell(WT_DBG *ds, WT_PAGE_HEADER *dsk, WT_CELL_UNPACK *unpack) addr: WT_RET(__wt_scr_alloc(session, 128, &buf)); if ((ret = __wt_bm_addr_string( session, buf, unpack->data, unpack->size)) == 0) - __dmsg(ds, ", %s %s", type, (char *)buf->data); + __dmsg(ds, ", %s %s", type, (const char *)buf->data); __wt_scr_free(&buf); WT_RET(ret); break; diff --git a/src/btree/bt_handle.c b/src/btree/bt_handle.c index 873edb1ea8f..64e692d1cbb 100644 --- a/src/btree/bt_handle.c +++ b/src/btree/bt_handle.c @@ -260,7 +260,7 @@ __btree_conf(WT_SESSION_IMPL *session, const char *cfg[]) WT_RET(__wt_rwlock_alloc( session, "btree overflow lock", &btree->val_ovfl_lock)); - WT_RET(__wt_stat_alloc_btree_stats(session, &btree->stats)); + WT_RET(__wt_stat_alloc_dsrc_stats(session, &btree->stats)); /* The tree has not been modified. */ btree->modified = 0; diff --git a/src/btree/bt_misc.c b/src/btree/bt_misc.c index 8ddda9d4b69..cc862303236 100644 --- a/src/btree/bt_misc.c +++ b/src/btree/bt_misc.c @@ -111,5 +111,5 @@ __wt_addr_string( buf->data = "[Error]"; buf->size = WT_STORE_SIZE(strlen("[Error]")); } - return ((char *)buf->data); + return (buf->data); } diff --git a/src/btree/bt_page.c b/src/btree/bt_page.c index 5a0c17dcf5f..cd74e3cdc87 100644 --- a/src/btree/bt_page.c +++ b/src/btree/bt_page.c @@ -399,13 +399,13 @@ __inmem_row_int(WT_SESSION_IMPL *session, WT_PAGE *page, size_t *inmem_sizep) /* * If Huffman decoding is required or it's an overflow record, - * use the heavy-weight __wt_cell_unpack_copy() call to build - * the key. Else, we can do it faster internally as we don't - * have to shuffle memory around as much. + * unpack the cell to build the key, then resolve the prefix. + * Else, we can do it faster internally as we don't have to + * shuffle memory around as much. */ prefix = unpack->prefix; if (huffman != NULL || unpack->ovfl) { - WT_ERR(__wt_cell_unpack_copy(session, unpack, current)); + WT_ERR(__wt_cell_unpack_ref(session, unpack, current)); /* * If there's a prefix, make sure there's enough buffer @@ -415,10 +415,10 @@ __inmem_row_int(WT_SESSION_IMPL *session, WT_PAGE *page, size_t *inmem_sizep) if (prefix != 0) { WT_ERR(__wt_buf_grow( session, current, prefix + current->size)); - memmove((uint8_t *)current->data + - prefix, current->data, current->size); - memcpy( - (void *)current->data, last->data, prefix); + memmove((uint8_t *)current->mem + prefix, + current->data, current->size); + memcpy(current->mem, last->data, prefix); + current->data = current->mem; current->size += prefix; } } else { @@ -426,15 +426,14 @@ __inmem_row_int(WT_SESSION_IMPL *session, WT_PAGE *page, size_t *inmem_sizep) * Get the cell's data/length and make sure we have * enough buffer space. */ - WT_ERR(__wt_buf_grow( + WT_ERR(__wt_buf_init( session, current, prefix + unpack->size)); /* Copy the prefix then the data into place. */ if (prefix != 0) - memcpy((void *) - current->data, last->data, prefix); - memcpy((uint8_t *) - current->data + prefix, unpack->data, unpack->size); + memcpy(current->mem, last->data, prefix); + memcpy((uint8_t *)current->mem + prefix, unpack->data, + unpack->size); current->size = prefix + unpack->size; } diff --git a/src/btree/bt_slvg.c b/src/btree/bt_slvg.c index dde9492e75b..4a1e5c4a77e 100644 --- a/src/btree/bt_slvg.c +++ b/src/btree/bt_slvg.c @@ -350,7 +350,8 @@ __slvg_read(WT_SESSION_IMPL *session, WT_STUFF *ss) case WT_PAGE_ROW_INT: WT_VERBOSE_ERR(session, salvage, "%s page ignored %s", - __wt_page_type_string(dsk->type), (char *)as->data); + __wt_page_type_string(dsk->type), + (const char *)as->data); WT_ERR(__wt_bm_free(session, addrbuf, addrbuf_size)); continue; } @@ -364,17 +365,19 @@ __slvg_read(WT_SESSION_IMPL *session, WT_STUFF *ss) * overflow references to non-existent pages, might as well * discard these pages now. */ - if (__wt_verify_dsk(session, (char *)as->data, buf) != 0) { + if (__wt_verify_dsk(session, as->data, buf) != 0) { WT_VERBOSE_ERR(session, salvage, "%s page failed verify %s", - __wt_page_type_string(dsk->type), (char *)as->data); + __wt_page_type_string(dsk->type), + (const char *)as->data); WT_ERR(__wt_bm_free(session, addrbuf, addrbuf_size)); continue; } WT_VERBOSE_ERR(session, salvage, "tracking %s page, generation %" PRIu64 " %s", - __wt_page_type_string(dsk->type), gen, (char *)as->data); + __wt_page_type_string(dsk->type), gen, + (const char *)as->data); switch (dsk->type) { case WT_PAGE_COL_FIX: diff --git a/src/btree/bt_stat.c b/src/btree/bt_stat.c index c9ea8c84869..d8a4d95e33a 100644 --- a/src/btree/bt_stat.c +++ b/src/btree/bt_stat.c @@ -55,22 +55,22 @@ __stat_page(WT_SESSION_IMPL *session, WT_PAGE *page) */ switch (page->type) { case WT_PAGE_COL_FIX: - WT_BSTAT_INCR(session, file_col_fix_pages); - WT_BSTAT_INCRV(session, file_entries, page->entries); + WT_BSTAT_INCR(session, page_col_fix); + WT_BSTAT_INCRV(session, entries, page->entries); break; case WT_PAGE_COL_INT: - WT_BSTAT_INCR(session, file_col_int_pages); - WT_BSTAT_INCRV(session, file_entries, page->entries); + WT_BSTAT_INCR(session, page_col_int); + WT_BSTAT_INCRV(session, entries, page->entries); break; case WT_PAGE_COL_VAR: WT_RET(__stat_page_col_var(session, page)); break; case WT_PAGE_OVFL: - WT_BSTAT_INCR(session, file_overflow); + WT_BSTAT_INCR(session, overflow_page); break; case WT_PAGE_ROW_INT: - WT_BSTAT_INCR(session, file_row_int_pages); - WT_BSTAT_INCRV(session, file_entries, page->entries); + WT_BSTAT_INCR(session, page_row_int); + WT_BSTAT_INCRV(session, entries, page->entries); break; case WT_PAGE_ROW_LEAF: WT_RET(__stat_page_row_leaf(session, page)); @@ -97,7 +97,7 @@ __stat_page_col_var(WT_SESSION_IMPL *session, WT_PAGE *page) unpack = &_unpack; - WT_BSTAT_INCR(session, file_col_var_pages); + WT_BSTAT_INCR(session, page_col_var); /* * Walk the page, counting regular and overflow data items, and checking @@ -109,12 +109,12 @@ __stat_page_col_var(WT_SESSION_IMPL *session, WT_PAGE *page) WT_COL_FOREACH(page, cip, i) { if ((cell = WT_COL_PTR(page, cip)) == NULL) { orig_deleted = 1; - WT_BSTAT_INCR(session, file_col_deleted); + WT_BSTAT_INCR(session, page_col_deleted); } else { orig_deleted = 0; __wt_cell_unpack(cell, unpack); WT_BSTAT_INCRV( - session, file_entries, __wt_cell_rle(unpack)); + session, entries, __wt_cell_rle(unpack)); } /* @@ -126,13 +126,13 @@ __stat_page_col_var(WT_SESSION_IMPL *session, WT_PAGE *page) if (WT_UPDATE_DELETED_ISSET(upd)) { if (orig_deleted) continue; - WT_BSTAT_INCR(session, file_col_deleted); - WT_BSTAT_DECR(session, file_entries); + WT_BSTAT_INCR(session, page_col_deleted); + WT_BSTAT_DECR(session, entries); } else { if (!orig_deleted) continue; - WT_BSTAT_DECR(session, file_col_deleted); - WT_BSTAT_INCR(session, file_entries); + WT_BSTAT_DECR(session, page_col_deleted); + WT_BSTAT_INCR(session, entries); } } } @@ -151,7 +151,7 @@ __stat_page_row_leaf(WT_SESSION_IMPL *session, WT_PAGE *page) WT_UPDATE *upd; uint32_t cnt, i; - WT_BSTAT_INCR(session, file_row_leaf_pages); + WT_BSTAT_INCR(session, page_row_leaf); /* * Stat any K/V pairs inserted into the page before the first from-disk @@ -174,7 +174,7 @@ __stat_page_row_leaf(WT_SESSION_IMPL *session, WT_PAGE *page) ++cnt; } - WT_BSTAT_INCRV(session, file_entries, cnt); + WT_BSTAT_INCRV(session, entries, cnt); return (0); } diff --git a/src/btree/bt_vrfy_dsk.c b/src/btree/bt_vrfy_dsk.c index 7f1ad32dc2c..f0ffa207a0a 100644 --- a/src/btree/bt_vrfy_dsk.c +++ b/src/btree/bt_vrfy_dsk.c @@ -239,7 +239,7 @@ __verify_dsk_row( case WT_CELL_KEY: break; case WT_CELL_KEY_OVFL: - WT_ERR(__wt_cell_unpack_copy(session, unpack, current)); + WT_ERR(__wt_cell_unpack_ref(session, unpack, current)); goto key_compare; default: /* Not a key -- continue with the next cell. */ @@ -270,43 +270,43 @@ __verify_dsk_row( cell_num, addr, prefix, last->size); /* - * If Huffman decoding required, use the heavy-weight call to - * __wt_cell_unpack_copy() to build the key, up to the prefix. - * Else, we can do it faster internally because we don't have - * to shuffle memory around as much. + * If Huffman decoding required, unpack the cell to build the + * key, then resolve the prefix. Else, we can do it faster + * internally because we don't have to shuffle memory around as + * much. */ - if (huffman == NULL) { - /* - * Get the cell's data/length and make sure we have - * enough buffer space. - */ - WT_ERR(__wt_buf_grow( - session, current, prefix + unpack->size)); - - /* Copy the prefix then the data into place. */ - if (prefix != 0) - memcpy((void *) - current->data, last->data, prefix); - memcpy((uint8_t *) - current->data + prefix, unpack->data, unpack->size); - current->size = prefix + unpack->size; - } else { - WT_ERR(__wt_cell_unpack_copy(session, unpack, current)); + if (huffman != NULL) { + WT_ERR(__wt_cell_unpack_ref(session, unpack, current)); /* * If there's a prefix, make sure there's enough buffer * space, then shift the decoded data past the prefix - * and copy the prefix into place. + * and copy the prefix into place. Take care with the + * pointers: current->data may be pointing inside the */ if (prefix != 0) { WT_ERR(__wt_buf_grow( session, current, prefix + current->size)); - memmove((uint8_t *)current->data + - prefix, current->data, current->size); - memcpy( - (void *)current->data, last->data, prefix); + memmove((uint8_t *)current->mem + prefix, + current->data, current->size); + memcpy(current->mem, last->data, prefix); + current->data = current->mem; current->size += prefix; } + } else { + /* + * Get the cell's data/length and make sure we have + * enough buffer space. + */ + WT_ERR(__wt_buf_init( + session, current, prefix + unpack->size)); + + /* Copy the prefix then the data into place. */ + if (prefix != 0) + memcpy(current->mem, last->data, prefix); + memcpy((uint8_t *)current->mem + prefix, unpack->data, + unpack->size); + current->size = prefix + unpack->size; } key_compare: /* diff --git a/src/btree/col_modify.c b/src/btree/col_modify.c index 7035ccdc74b..fbe7898dc40 100644 --- a/src/btree/col_modify.c +++ b/src/btree/col_modify.c @@ -134,8 +134,10 @@ __wt_col_modify(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt, int op) if (*inshead == NULL) { new_inshead_size = sizeof(WT_INSERT_HEAD); WT_ERR(__wt_calloc_def(session, 1, &new_inshead)); - for (i = 0; i < WT_SKIP_MAXDEPTH; i++) + for (i = 0; i < WT_SKIP_MAXDEPTH; i++) { cbt->ins_stack[i] = &new_inshead->head[i]; + cbt->next_stack[i] = NULL; + } cbt->ins_head = new_inshead; } @@ -164,7 +166,8 @@ __wt_col_modify(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt, int op) ins_copy = ins; WT_ERR(__wt_col_append_serial(session, - page, cbt->write_gen, inshead, cbt->ins_stack, + page, cbt->write_gen, inshead, + cbt->ins_stack, cbt->next_stack, &new_inslist, new_inslist_size, &new_inshead, new_inshead_size, &ins, ins_size, skipdepth)); @@ -173,8 +176,8 @@ __wt_col_modify(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt, int op) cbt->recno = WT_INSERT_RECNO(ins_copy); } else WT_ERR(__wt_insert_serial(session, - page, cbt->write_gen, - inshead, cbt->ins_stack, + page, cbt->write_gen, inshead, + cbt->ins_stack, cbt->next_stack, &new_inslist, new_inslist_size, &new_inshead, new_inshead_size, &ins, ins_size, skipdepth)); @@ -230,7 +233,7 @@ int __wt_col_append_serial_func(WT_SESSION_IMPL *session, void *args) { WT_BTREE *btree; - WT_INSERT *ins, *new_ins, ***ins_stack; + WT_INSERT *ins, *new_ins, ***ins_stack, **next_stack; WT_INSERT_HEAD *inshead, **insheadp, **new_inslist, *new_inshead; WT_PAGE *page; uint64_t recno; @@ -239,8 +242,9 @@ __wt_col_append_serial_func(WT_SESSION_IMPL *session, void *args) btree = session->btree; - __wt_col_append_unpack(args, &page, &write_gen, &insheadp, - &ins_stack, &new_inslist, &new_inshead, &new_ins, &skipdepth); + __wt_col_append_unpack(args, + &page, &write_gen, &insheadp, &ins_stack, &next_stack, + &new_inslist, &new_inshead, &new_ins, &skipdepth); /* Check the page's write-generation. */ WT_RET(__wt_page_write_gen_check(session, page, write_gen)); @@ -259,7 +263,7 @@ __wt_col_append_serial_func(WT_SESSION_IMPL *session, void *args) if ((recno = WT_INSERT_RECNO(new_ins)) == 0) recno = WT_INSERT_RECNO(new_ins) = ++btree->last_recno; - ins = __col_insert_search(inshead, ins_stack, recno); + ins = __col_insert_search(inshead, ins_stack, next_stack, recno); /* If we find the record number, there's been a race. */ if (ins != NULL && WT_INSERT_RECNO(ins) == recno) diff --git a/src/btree/col_srch.c b/src/btree/col_srch.c index c2fbf80c2e2..ffcca036cab 100644 --- a/src/btree/col_srch.c +++ b/src/btree/col_srch.c @@ -117,8 +117,8 @@ __wt_col_search(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt, int is_modify) * For that reason, don't set the cursor's WT_INSERT_HEAD/WT_INSERT pair * until we know we have a useful entry. */ - if ((ins = - __col_insert_search(ins_head, cbt->ins_stack, recno)) != NULL) + if ((ins = __col_insert_search( + ins_head, cbt->ins_stack, cbt->next_stack, recno)) != NULL) if (recno == WT_INSERT_RECNO(ins)) { cbt->ins_head = ins_head; cbt->ins = ins; @@ -135,8 +135,8 @@ past_end: * past the end of the table. */ cbt->ins_head = WT_COL_APPEND(page); - if ((cbt->ins = - __col_insert_search(cbt->ins_head, cbt->ins_stack, recno)) == NULL) + if ((cbt->ins = __col_insert_search( + cbt->ins_head, cbt->ins_stack, cbt->next_stack, recno)) == NULL) cbt->compare = -1; else { cbt->recno = WT_INSERT_RECNO(cbt->ins); diff --git a/src/btree/rec_write.c b/src/btree/rec_write.c index 982623a951b..360e184da75 100644 --- a/src/btree/rec_write.c +++ b/src/btree/rec_write.c @@ -2980,7 +2980,7 @@ __rec_row_leaf(WT_SESSION_IMPL *session, unpack->prefix); tmpkey->data = tmpkey->mem; } - memcpy((uint8_t *)tmpkey->data + unpack->prefix, + memcpy((uint8_t *)tmpkey->mem + unpack->prefix, unpack->data, unpack->size); tmpkey->size = unpack->prefix + unpack->size; } else @@ -3321,7 +3321,7 @@ __rec_write_wrapup(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_PAGE *page) "split: starting key " "%.*s", (int)tkey->size, - (char *)tkey->data); + (const char *)tkey->data); break; case WT_PAGE_COL_FIX: case WT_PAGE_COL_INT: diff --git a/src/btree/row_modify.c b/src/btree/row_modify.c index 2ad6947d6de..64123bc53e5 100644 --- a/src/btree/row_modify.c +++ b/src/btree/row_modify.c @@ -121,8 +121,10 @@ __wt_row_modify(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt, int is_remove) if (*inshead == NULL) { new_inshead_size = sizeof(WT_INSERT_HEAD); WT_ERR(__wt_calloc_def(session, 1, &new_inshead)); - for (i = 0; i < WT_SKIP_MAXDEPTH; i++) + for (i = 0; i < WT_SKIP_MAXDEPTH; i++) { cbt->ins_stack[i] = &new_inshead->head[i]; + cbt->next_stack[i] = NULL; + } cbt->ins_head = new_inshead; } @@ -144,7 +146,7 @@ __wt_row_modify(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt, int is_remove) /* Insert the WT_INSERT structure. */ WT_ERR(__wt_insert_serial(session, page, cbt->write_gen, - inshead, cbt->ins_stack, + inshead, cbt->ins_stack, cbt->next_stack, &new_inslist, new_inslist_size, &new_inshead, new_inshead_size, &ins, ins_size, skipdepth)); @@ -205,17 +207,38 @@ __wt_row_insert_alloc(WT_SESSION_IMPL *session, int __wt_insert_serial_func(WT_SESSION_IMPL *session, void *args) { - WT_INSERT *new_ins, ***ins_stack; + WT_INSERT *new_ins, ***ins_stack, **next_stack; WT_INSERT_HEAD *inshead, **insheadp, **new_inslist, *new_inshead; WT_PAGE *page; uint32_t write_gen; u_int i, skipdepth; __wt_insert_unpack(args, &page, &write_gen, &insheadp, - &ins_stack, &new_inslist, &new_inshead, &new_ins, &skipdepth); + &ins_stack, &next_stack, + &new_inslist, &new_inshead, &new_ins, &skipdepth); - /* Check the page's write-generation. */ - WT_RET(__wt_page_write_gen_check(session, page, write_gen)); + if ((inshead = *insheadp) == NULL) + inshead = new_inshead; + + /* + * Check the page's write-generation: if that fails, check whether we + * are still in the expected position, and no item has been added where + * our insert belongs. + */ + if (page->modify->write_gen + 1 == page->modify->disk_gen) + return (WT_RESTART); + + if (page->modify->write_gen != write_gen) { + for (i = 0; i < skipdepth; i++) { + if (ins_stack[i] == NULL || + *ins_stack[i] != next_stack[i]) + return (WT_RESTART); + if (next_stack[i] == NULL && + inshead->tail[i] != NULL && + ins_stack[i] != &inshead->tail[i]->next[i]) + return (WT_RESTART); + } + } /* * Publish: First, point the new WT_INSERT item's skiplist references @@ -223,10 +246,9 @@ __wt_insert_serial_func(WT_SESSION_IMPL *session, void *args) * update the skiplist elements that reference the new WT_INSERT item, * this ensures the list is never inconsistent. */ - if ((inshead = *insheadp) == NULL) - inshead = new_inshead; for (i = 0; i < skipdepth; i++) new_ins->next[i] = *ins_stack[i]; + WT_WRITE_BARRIER(); for (i = 0; i < skipdepth; i++) { if (inshead->tail[i] == NULL || diff --git a/src/btree/row_srch.c b/src/btree/row_srch.c index 885979bffbf..f005db24a1e 100644 --- a/src/btree/row_srch.c +++ b/src/btree/row_srch.c @@ -16,27 +16,39 @@ __wt_search_insert(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt, WT_INSERT_HEAD *inshead, WT_ITEM *srch_key) { WT_BTREE *btree; - WT_INSERT **insp, *ret_ins; + WT_INSERT **insp, *last_ins, *ret_ins; WT_ITEM insert_key; int cmp, i; + btree = session->btree; + /* If there's no insert chain to search, we're done. */ if ((ret_ins = WT_SKIP_LAST(inshead)) == NULL) { cbt->ins = NULL; + cbt->next_stack[0] = NULL; return (0); } - btree = session->btree; - /* Fast-path appends. */ insert_key.data = WT_INSERT_KEY(ret_ins); insert_key.size = WT_INSERT_KEY_SIZE(ret_ins); (void)WT_BTREE_CMP(session, btree, srch_key, &insert_key, cmp); if (cmp >= 0) { - for (i = WT_SKIP_MAXDEPTH - 1; i >= 0; i--) - cbt->ins_stack[i] = (inshead->tail[i] != NULL) ? - &inshead->tail[i]->next[i] : - &inshead->head[i]; + /* + * XXX We may race with another appending thread. + * + * To catch that case, rely on the atomic pointer read above + * and set the next stack to NULL here. If we have raced with + * another thread, one of the next pointers will not be NULL by + * the time they are checked against the next stack inside the + * serialized insert function. + */ + for (i = WT_SKIP_MAXDEPTH - 1; i >= 0; i--) { + cbt->ins_stack[i] = (i == 0) ? &ret_ins->next[0] : + (inshead->tail[i] != NULL) ? + &inshead->tail[i]->next[i] : &inshead->head[i]; + cbt->next_stack[i] = NULL; + } cbt->compare = -cmp; cbt->ins = ret_ins; return (0); @@ -46,9 +58,10 @@ __wt_search_insert(WT_SESSION_IMPL *session, * The insert list is a skip list: start at the highest skip level, then * go as far as possible at each level before stepping down to the next. */ - ret_ins = NULL; + last_ins = ret_ins = NULL; for (i = WT_SKIP_MAXDEPTH - 1, insp = &inshead->head[i]; i >= 0;) { - if (*insp == NULL) { + if ((ret_ins = *insp) == NULL) { + cbt->next_stack[i] = NULL; cbt->ins_stack[i--] = insp--; continue; } @@ -57,8 +70,8 @@ __wt_search_insert(WT_SESSION_IMPL *session, * Comparisons may be repeated as we drop down skiplist levels; * don't repeat comparisons, they might be expensive. */ - if (ret_ins != *insp) { - ret_ins = *insp; + if (ret_ins != last_ins) { + last_ins = ret_ins; insert_key.data = WT_INSERT_KEY(ret_ins); insert_key.size = WT_INSERT_KEY_SIZE(ret_ins); WT_RET(WT_BTREE_CMP( @@ -68,10 +81,14 @@ __wt_search_insert(WT_SESSION_IMPL *session, if (cmp > 0) /* Keep going at this level */ insp = &ret_ins->next[i]; else if (cmp == 0) - for (; i >= 0; i--) + for (; i >= 0; i--) { + cbt->next_stack[i] = ret_ins->next[i]; cbt->ins_stack[i] = &ret_ins->next[i]; - else /* Drop down a level */ + } + else { /* Drop down a level */ + cbt->next_stack[i] = ret_ins; cbt->ins_stack[i--] = insp--; + } } /* diff --git a/src/conn/conn_api.c b/src/conn/conn_api.c index f2d2c68ec1a..ddbcdb1125c 100644 --- a/src/conn/conn_api.c +++ b/src/conn/conn_api.c @@ -521,7 +521,7 @@ __conn_config_file(WT_SESSION_IMPL *session, const char **cfg, WT_ITEM **cbufp) *t = '\0'; #if 0 - fprintf(stderr, "file config: {%s}\n", (char *)cbuf->data); + fprintf(stderr, "file config: {%s}\n", (const char *)cbuf->data); exit(0); #endif diff --git a/src/cursor/cur_backup.c b/src/cursor/cur_backup.c index ecf90f6dc6e..6fdaf072c5a 100644 --- a/src/cursor/cur_backup.c +++ b/src/cursor/cur_backup.c @@ -479,11 +479,11 @@ __backup_table_element(WT_SESSION_IMPL *session, WT_CURSOR_BACKUP *cb, (int)(cval.len - strlen("file:")), cval.str + strlen("file:"))); WT_ERR(__backup_list_append( - session, cb, (char *)tmp->data)); + session, cb, (const char *)tmp->data)); } else WT_ERR_MSG(session, EINVAL, "%s: unknown data source '%.*s'", - (char *)tmp->data, (int)cval.len, cval.str); + (const char *)tmp->data, (int)cval.len, cval.str); } err: __wt_scr_free(&tmp); diff --git a/src/cursor/cur_stat.c b/src/cursor/cur_stat.c index a66d41d354d..e37170200ce 100644 --- a/src/cursor/cur_stat.c +++ b/src/cursor/cur_stat.c @@ -309,7 +309,7 @@ __curstat_conn_init( cst->btree = NULL; cst->notpositioned = 1; cst->stats_first = (WT_STATS *)S2C(session)->stats; - cst->stats_count = sizeof(WT_CONNECTION_STATS) / sizeof(WT_STATS); + cst->stats_count = sizeof(*S2C(session)->stats) / sizeof(WT_STATS); cst->clear_func = LF_ISSET(WT_STATISTICS_CLEAR) ? __wt_stat_clear_connection_stats : NULL; } @@ -331,9 +331,9 @@ __curstat_file_init(WT_SESSION_IMPL *session, cst->btree = btree; cst->notpositioned = 1; cst->stats_first = (WT_STATS *)session->btree->stats; - cst->stats_count = sizeof(WT_BTREE_STATS) / sizeof(WT_STATS); + cst->stats_count = sizeof(*btree->stats) / sizeof(WT_STATS); cst->clear_func = LF_ISSET(WT_STATISTICS_CLEAR) ? - __wt_stat_clear_btree_stats : NULL; + __wt_stat_clear_dsrc_stats : NULL; return (0); } @@ -359,9 +359,9 @@ __curstat_lsm_init(WT_SESSION_IMPL *session, cst->btree = NULL; cst->notpositioned = 1; cst->stats_first = (WT_STATS *)lsm_tree->stats; - cst->stats_count = sizeof(WT_LSM_STATS) / sizeof(WT_STATS); + cst->stats_count = sizeof(*lsm_tree->stats) / sizeof(WT_STATS); cst->clear_func = LF_ISSET(WT_STATISTICS_CLEAR) ? - __wt_stat_clear_lsm_stats : NULL; + __wt_stat_clear_dsrc_stats : NULL; return (0); } diff --git a/src/cursor/cur_std.c b/src/cursor/cur_std.c index 478ba0a4fa2..a77b2c76159 100644 --- a/src/cursor/cur_std.c +++ b/src/cursor/cur_std.c @@ -214,7 +214,7 @@ __wt_cursor_set_keyv(WT_CURSOR *cursor, uint32_t flags, va_list ap) if (LF_ISSET(WT_CURSOR_RAW_OK) || strcmp(fmt, "u") == 0) { item = va_arg(ap, WT_ITEM *); sz = item->size; - cursor->key.data = (void *)item->data; + cursor->key.data = item->data; } else if (strcmp(fmt, "S") == 0) { str = va_arg(ap, const char *); sz = strlen(str) + 1; diff --git a/src/docs/data_sources.dox b/src/docs/data_sources.dox index 644268f7fb5..1ad5c4fcfa4 100644 --- a/src/docs/data_sources.dox +++ b/src/docs/data_sources.dox @@ -88,4 +88,8 @@ Both examples can use a common display routine that iterates through the statistics until the cursor returns the end of the list. @snippet ex_stat.c statistics display function + +Individual statistics values can be retrieved by searching for the corresponding key, as shown in the following example: + +@snippet ex_stat.c statistics retrieve by key */ diff --git a/src/docs/top/main.dox b/src/docs/top/main.dox index 8f523d669fe..f7d897857f0 100644 --- a/src/docs/top/main.dox +++ b/src/docs/top/main.dox @@ -13,7 +13,7 @@ To ask questions or discuss issues related to using WiredTiger, visit our View the documentation online: -- <a href="1.3.7/index.html"><b>WiredTiger 1.3.7 (current release)</b></a> +- <a href="1.3.8/index.html"><b>WiredTiger 1.3.8 (current release)</b></a> - <a href="1.2.2/index.html"><b>WiredTiger 1.2.2</b></a> - <a href="1.1.5/index.html"><b>WiredTiger 1.1.5</b></a> diff --git a/src/docs/upgrading.dox b/src/docs/upgrading.dox index ad6578717e5..107826c5907 100644 --- a/src/docs/upgrading.dox +++ b/src/docs/upgrading.dox @@ -1,5 +1,15 @@ /*! @page upgrading Upgrading WiredTiger applications +@section version_138 Upgrading to Version 1.3.8 + +<dl> +<dt>Statistics keys</dt> +<dd> +The @ref statistics_keys "statistics key constants" have been renamed to use +all capitals, and use consistent prefixes to distinguish between connection +statistics and statistics for data sources. +</dd> + @section version_136 Upgrading to Version 1.3.6 <dl> diff --git a/src/include/api.h b/src/include/api.h index 3bccf3f44d9..4dafb83d88d 100644 --- a/src/include/api.h +++ b/src/include/api.h @@ -398,7 +398,8 @@ extern WT_PROCESS __wt_process; #define WT_PAGE_FREE_IGNORE_DISK 0x00000001 #define WT_REC_SINGLE 0x00000001 #define WT_SERVER_RUN 0x00000001 -#define WT_SESSION_INTERNAL 0x00000004 +#define WT_SESSION_INTERNAL 0x00000008 +#define WT_SESSION_NO_CACHE_CHECK 0x00000004 #define WT_SESSION_SALVAGE_QUIET_ERR 0x00000002 #define WT_SESSION_SCHEMA_LOCKED 0x00000001 #define WT_VERB_block 0x00004000 diff --git a/src/include/btree.h b/src/include/btree.h index 6a2de1ae509..178de6879d2 100644 --- a/src/include/btree.h +++ b/src/include/btree.h @@ -122,7 +122,7 @@ struct __wt_btree { uint64_t evict_priority; /* Relative priority of cached pages. */ volatile uint32_t lru_count; /* Count of threads in LRU eviction */ - WT_BTREE_STATS *stats; /* Btree statistics */ + WT_DSRC_STATS *stats; /* Btree statistics */ #define WT_BTREE_BULK 0x0001 /* Bulk-load handle */ #define WT_BTREE_DISCARD 0x0002 /* Discard on release */ diff --git a/src/include/btree.i b/src/include/btree.i index 642ce7b7db4..c98c3a33355 100644 --- a/src/include/btree.i +++ b/src/include/btree.i @@ -245,7 +245,7 @@ __wt_page_write_gen_check( if (mod->write_gen == write_gen && mod->write_gen + 1 != mod->disk_gen) return (0); - WT_BSTAT_INCR(session, file_write_conflicts); + WT_BSTAT_INCR(session, txn_write_conflict); return (WT_RESTART); } diff --git a/src/include/cache.i b/src/include/cache.i index 0ea01d0fc07..78fbc6f88d9 100644 --- a/src/include/cache.i +++ b/src/include/cache.i @@ -57,8 +57,8 @@ __wt_cache_full_check(WT_SESSION_IMPL *session) */ for (wake = 0;; wake = (wake + 1) % 100) { __wt_eviction_check(session, &lockout, wake == 0); - if (!lockout || - F_ISSET(session, WT_SESSION_SCHEMA_LOCKED)) + if (!lockout || F_ISSET(session, + WT_SESSION_NO_CACHE_CHECK | WT_SESSION_SCHEMA_LOCKED)) return (0); if ((btree = session->btree) != NULL && F_ISSET(btree, WT_BTREE_BULK | WT_BTREE_NO_CACHE | WT_BTREE_NO_EVICTION)) diff --git a/src/include/cell.i b/src/include/cell.i index 7a919d5ebe3..e20e4182440 100644 --- a/src/include/cell.i +++ b/src/include/cell.i @@ -674,7 +674,7 @@ __wt_cell_unpack_copy( WT_RET(__wt_cell_unpack_ref(session, unpack, store)); if (store->mem != NULL && store->data >= store->mem && - (uint8_t *)store->data < (uint8_t *)store->mem + store->memsize) + WT_PTRDIFF(store->data, store->mem) < store->memsize) return (0); return (__wt_buf_set(session, store, store->data, store->size)); } diff --git a/src/include/column.i b/src/include/column.i index 83b2783dbf0..0cf83a6d2e9 100644 --- a/src/include/column.i +++ b/src/include/column.i @@ -58,8 +58,8 @@ __col_insert_search_match(WT_INSERT_HEAD *inshead, uint64_t recno) * Search a column-store insert list, creating a skiplist stack as we go. */ static inline WT_INSERT * -__col_insert_search( - WT_INSERT_HEAD *inshead, WT_INSERT ***ins_stack, uint64_t recno) +__col_insert_search(WT_INSERT_HEAD *inshead, + WT_INSERT ***ins_stack, WT_INSERT **next_stack, uint64_t recno) { WT_INSERT **insp, *ret_ins; uint64_t ins_recno; @@ -71,9 +71,12 @@ __col_insert_search( /* Fast path appends. */ if (recno >= WT_INSERT_RECNO(ret_ins)) { - for (i = 0; i < WT_SKIP_MAXDEPTH; i++) - ins_stack[i] = (inshead->tail[i] != NULL) ? + for (i = 0; i < WT_SKIP_MAXDEPTH; i++) { + ins_stack[i] = (i == 0) ? &ret_ins->next[0] : + (inshead->tail[i] != NULL) ? &inshead->tail[i]->next[i] : &inshead->head[i]; + next_stack[i] = NULL; + } return (ret_ins); } @@ -82,22 +85,26 @@ __col_insert_search( * go as far as possible at each level before stepping down to the next. */ for (i = WT_SKIP_MAXDEPTH - 1, insp = &inshead->head[i]; i >= 0; ) { - if (*insp == NULL) { + if ((ret_ins = *insp) == NULL) { + next_stack[i] = NULL; ins_stack[i--] = insp--; continue; } - ret_ins = *insp; ins_recno = WT_INSERT_RECNO(ret_ins); cmp = (recno == ins_recno) ? 0 : (recno < ins_recno) ? -1 : 1; if (cmp > 0) /* Keep going at this level */ - insp = &(*insp)->next[i]; + insp = &ret_ins->next[i]; else if (cmp == 0) /* Exact match: return */ - for (; i >= 0; i--) + for (; i >= 0; i--) { + next_stack[i] = ret_ins->next[i]; ins_stack[i] = &ret_ins->next[i]; - else /* Drop down a level */ + } + else { /* Drop down a level */ + next_stack[i] = ret_ins; ins_stack[i--] = insp--; + } } return (ret_ins); } diff --git a/src/include/cursor.h b/src/include/cursor.h index 3afc5a8ea7c..85c61076b75 100644 --- a/src/include/cursor.h +++ b/src/include/cursor.h @@ -36,6 +36,9 @@ struct __wt_cursor_btree { /* Search stack */ WT_INSERT **ins_stack[WT_SKIP_MAXDEPTH]; + /* Next item(s) found during search */ + WT_INSERT *next_stack[WT_SKIP_MAXDEPTH]; + uint64_t recno; /* Record number */ uint32_t write_gen; /* Saved leaf page's write generation */ diff --git a/src/include/error.h b/src/include/error.h index dd428d70601..35a8407aa0b 100644 --- a/src/include/error.h +++ b/src/include/error.h @@ -69,6 +69,11 @@ if ((__ret = (a)) != 0 && ret == 0) \ ret = __ret; \ } while (0) +#define WT_TRET_NOTFOUND_OK(a) do { \ + int __ret; \ + if ((__ret = (a)) != 0 && __ret != WT_NOTFOUND && ret == 0) \ + ret = __ret; \ +} while (0) /* * WT_ASSERT, WT_ASSERT_ERR, WT_ASSERT_RET -- diff --git a/src/include/extern.h b/src/include/extern.h index d0414f03869..09826b23a4d 100644 --- a/src/include/extern.h +++ b/src/include/extern.h @@ -1239,15 +1239,12 @@ extern void *__wt_scr_alloc_ext(WT_SESSION *wt_session, size_t size); extern void __wt_scr_free_ext(WT_SESSION *wt_session, void *p); extern void __wt_session_dump_all(WT_SESSION_IMPL *session); extern void __wt_session_dump(WT_SESSION_IMPL *session); -extern int __wt_stat_alloc_btree_stats(WT_SESSION_IMPL *session, - WT_BTREE_STATS **statsp); -extern void __wt_stat_clear_btree_stats(WT_STATS *stats_arg); +extern int __wt_stat_alloc_dsrc_stats(WT_SESSION_IMPL *session, + WT_DSRC_STATS **statsp); +extern void __wt_stat_clear_dsrc_stats(WT_STATS *stats_arg); extern int __wt_stat_alloc_connection_stats(WT_SESSION_IMPL *session, WT_CONNECTION_STATS **statsp); extern void __wt_stat_clear_connection_stats(WT_STATS *stats_arg); -extern int __wt_stat_alloc_lsm_stats(WT_SESSION_IMPL *session, - WT_LSM_STATS **statsp); -extern void __wt_stat_clear_lsm_stats(WT_STATS *stats_arg); extern int __wt_txnid_cmp(const void *v1, const void *v2); extern void __wt_txn_release_snapshot(WT_SESSION_IMPL *session); extern void __wt_txn_get_oldest(WT_SESSION_IMPL *session); diff --git a/src/include/lsm.h b/src/include/lsm.h index 190cf51c513..24abc0bef1c 100644 --- a/src/include/lsm.h +++ b/src/include/lsm.h @@ -65,9 +65,8 @@ struct __wt_lsm_tree { WT_RWLOCK *rwlock; TAILQ_ENTRY(__wt_lsm_tree) q; - WT_LSM_STATS *stats; /* LSM statistics */ + WT_DSRC_STATS *stats; /* LSM statistics */ - WT_SPINLOCK lock; uint64_t dsk_gen; uint32_t *memsizep; diff --git a/src/include/serial_funcs.i b/src/include/serial_funcs.i index bbddf47042b..4b39b877088 100644 --- a/src/include/serial_funcs.i +++ b/src/include/serial_funcs.i @@ -5,6 +5,7 @@ typedef struct { uint32_t write_gen; WT_INSERT_HEAD **insheadp; WT_INSERT ***ins_stack; + WT_INSERT **next_stack; WT_INSERT_HEAD **new_inslist; size_t new_inslist_size; int new_inslist_taken; @@ -20,10 +21,10 @@ typedef struct { static inline int __wt_col_append_serial( WT_SESSION_IMPL *session, WT_PAGE *page, uint32_t write_gen, - WT_INSERT_HEAD **insheadp, WT_INSERT ***ins_stack, WT_INSERT_HEAD - ***new_inslistp, size_t new_inslist_size, WT_INSERT_HEAD - **new_insheadp, size_t new_inshead_size, WT_INSERT **new_insp, size_t - new_ins_size, u_int skipdepth) + WT_INSERT_HEAD **insheadp, WT_INSERT ***ins_stack, WT_INSERT + **next_stack, WT_INSERT_HEAD ***new_inslistp, size_t new_inslist_size, + WT_INSERT_HEAD **new_insheadp, size_t new_inshead_size, WT_INSERT + **new_insp, size_t new_ins_size, u_int skipdepth) { __wt_col_append_args _args, *args = &_args; WT_DECL_RET; @@ -36,6 +37,8 @@ __wt_col_append_serial( args->ins_stack = ins_stack; + args->next_stack = next_stack; + if (new_inslistp == NULL) args->new_inslist = NULL; else { @@ -81,9 +84,9 @@ __wt_col_append_serial( static inline void __wt_col_append_unpack( void *untyped_args, WT_PAGE **pagep, uint32_t *write_genp, - WT_INSERT_HEAD ***insheadpp, WT_INSERT ****ins_stackp, WT_INSERT_HEAD - ***new_inslistp, WT_INSERT_HEAD **new_insheadp, WT_INSERT **new_insp, - u_int *skipdepthp) + WT_INSERT_HEAD ***insheadpp, WT_INSERT ****ins_stackp, WT_INSERT + ***next_stackp, WT_INSERT_HEAD ***new_inslistp, WT_INSERT_HEAD + **new_insheadp, WT_INSERT **new_insp, u_int *skipdepthp) { __wt_col_append_args *args = (__wt_col_append_args *)untyped_args; @@ -91,6 +94,7 @@ __wt_col_append_unpack( *write_genp = args->write_gen; *insheadpp = args->insheadp; *ins_stackp = args->ins_stack; + *next_stackp = args->next_stack; *new_inslistp = args->new_inslist; *new_insheadp = args->new_inshead; *new_insp = args->new_ins; @@ -138,6 +142,7 @@ typedef struct { uint32_t write_gen; WT_INSERT_HEAD **inshead; WT_INSERT ***ins_stack; + WT_INSERT **next_stack; WT_INSERT_HEAD **new_inslist; size_t new_inslist_size; int new_inslist_taken; @@ -153,10 +158,10 @@ typedef struct { static inline int __wt_insert_serial( WT_SESSION_IMPL *session, WT_PAGE *page, uint32_t write_gen, - WT_INSERT_HEAD **inshead, WT_INSERT ***ins_stack, WT_INSERT_HEAD - ***new_inslistp, size_t new_inslist_size, WT_INSERT_HEAD - **new_insheadp, size_t new_inshead_size, WT_INSERT **new_insp, size_t - new_ins_size, u_int skipdepth) + WT_INSERT_HEAD **inshead, WT_INSERT ***ins_stack, WT_INSERT + **next_stack, WT_INSERT_HEAD ***new_inslistp, size_t new_inslist_size, + WT_INSERT_HEAD **new_insheadp, size_t new_inshead_size, WT_INSERT + **new_insp, size_t new_ins_size, u_int skipdepth) { __wt_insert_args _args, *args = &_args; WT_DECL_RET; @@ -169,6 +174,8 @@ __wt_insert_serial( args->ins_stack = ins_stack; + args->next_stack = next_stack; + if (new_inslistp == NULL) args->new_inslist = NULL; else { @@ -214,9 +221,9 @@ __wt_insert_serial( static inline void __wt_insert_unpack( void *untyped_args, WT_PAGE **pagep, uint32_t *write_genp, - WT_INSERT_HEAD ***insheadp, WT_INSERT ****ins_stackp, WT_INSERT_HEAD - ***new_inslistp, WT_INSERT_HEAD **new_insheadp, WT_INSERT **new_insp, - u_int *skipdepthp) + WT_INSERT_HEAD ***insheadp, WT_INSERT ****ins_stackp, WT_INSERT + ***next_stackp, WT_INSERT_HEAD ***new_inslistp, WT_INSERT_HEAD + **new_insheadp, WT_INSERT **new_insp, u_int *skipdepthp) { __wt_insert_args *args = (__wt_insert_args *)untyped_args; @@ -224,6 +231,7 @@ __wt_insert_unpack( *write_genp = args->write_gen; *insheadp = args->inshead; *ins_stackp = args->ins_stack; + *next_stackp = args->next_stack; *new_inslistp = args->new_inslist; *new_insheadp = args->new_inshead; *new_insp = args->new_ins; diff --git a/src/include/stat.h b/src/include/stat.h index 0eea8ced061..9f8445cc5e8 100644 --- a/src/include/stat.h +++ b/src/include/stat.h @@ -68,32 +68,32 @@ struct __wt_stats { /* Statistics section: BEGIN */ /* - * Statistics entries for BTREE handle. + * Statistics entries for data sources. */ -struct __wt_btree_stats { - WT_STATS file_bulk_loaded; - WT_STATS file_col_deleted; - WT_STATS file_col_fix_pages; - WT_STATS file_col_int_pages; - WT_STATS file_col_var_pages; - WT_STATS cursor_inserts; +struct __wt_dsrc_stats { + WT_STATS block_alloc; + WT_STATS block_extend; + WT_STATS block_free; + WT_STATS bloom_count; + WT_STATS bloom_false_positive; + WT_STATS bloom_hit; + WT_STATS bloom_miss; + WT_STATS bloom_page_evict; + WT_STATS bloom_page_read; + WT_STATS bloom_size; + WT_STATS ckpt_size; + WT_STATS cursor_insert; WT_STATS cursor_read; WT_STATS cursor_read_near; WT_STATS cursor_read_next; WT_STATS cursor_read_prev; - WT_STATS cursor_removes; - WT_STATS cursor_resets; - WT_STATS cursor_updates; - WT_STATS alloc; - WT_STATS extend; - WT_STATS free; - WT_STATS overflow_read; - WT_STATS overflow_value_cache; - WT_STATS page_evict; - WT_STATS page_read; - WT_STATS page_evict_fail; - WT_STATS page_write; - WT_STATS file_size; + WT_STATS cursor_remove; + WT_STATS cursor_reset; + WT_STATS cursor_update; + WT_STATS entries; + WT_STATS file_allocsize; + WT_STATS file_bulk_loaded; + WT_STATS file_compact_rewrite; WT_STATS file_fixed_len; WT_STATS file_magic; WT_STATS file_major; @@ -102,75 +102,64 @@ struct __wt_btree_stats { WT_STATS file_maxleafitem; WT_STATS file_maxleafpage; WT_STATS file_minor; - WT_STATS file_overflow; - WT_STATS file_allocsize; - WT_STATS file_compact_rewrite; - WT_STATS rec_page_merge; + WT_STATS file_size; + WT_STATS lsm_chunk_count; + WT_STATS lsm_generation_max; + WT_STATS lsm_lookup_no_bloom; + WT_STATS overflow_page; + WT_STATS overflow_read; + WT_STATS overflow_value_cache; + WT_STATS page_col_deleted; + WT_STATS page_col_fix; + WT_STATS page_col_int; + WT_STATS page_col_var; + WT_STATS page_evict; + WT_STATS page_evict_fail; + WT_STATS page_read; + WT_STATS page_row_int; + WT_STATS page_row_leaf; + WT_STATS page_write; WT_STATS rec_dictionary; - WT_STATS rec_split_intl; - WT_STATS rec_split_leaf; + WT_STATS rec_hazard; WT_STATS rec_ovfl_key; WT_STATS rec_ovfl_value; WT_STATS rec_page_delete; + WT_STATS rec_page_merge; + WT_STATS rec_split_intl; + WT_STATS rec_split_leaf; WT_STATS rec_written; - WT_STATS rec_hazard; - WT_STATS file_row_int_pages; - WT_STATS file_row_leaf_pages; - WT_STATS file_entries; - WT_STATS update_conflict; - WT_STATS file_write_conflicts; + WT_STATS txn_update_conflict; + WT_STATS txn_write_conflict; }; /* - * Statistics entries for CONNECTION handle. + * Statistics entries for connections. */ struct __wt_connection_stats { - WT_STATS txn_ancient; WT_STATS block_read; WT_STATS block_write; WT_STATS cache_bytes_inuse; - WT_STATS cache_evict_slow; - WT_STATS cache_evict_internal; WT_STATS cache_bytes_max; - WT_STATS cache_evict_modified; - WT_STATS cache_pages_inuse; WT_STATS cache_evict_hazard; + WT_STATS cache_evict_internal; + WT_STATS cache_evict_modified; + WT_STATS cache_evict_slow; WT_STATS cache_evict_unmodified; + WT_STATS cache_pages_inuse; WT_STATS checkpoint; WT_STATS cond_wait; WT_STATS file_open; - WT_STATS rwlock_rdlock; - WT_STATS rwlock_wrlock; WT_STATS memalloc; WT_STATS memfree; + WT_STATS rwlock_rdlock; + WT_STATS rwlock_wrlock; WT_STATS total_read_io; WT_STATS total_write_io; - WT_STATS txn_fail_cache; + WT_STATS txn_ancient; WT_STATS txn_begin; WT_STATS txn_commit; + WT_STATS txn_fail_cache; WT_STATS txn_rollback; }; -/* - * Statistics entries for LSM handle. - */ -struct __wt_lsm_stats { - WT_STATS generation_max; - WT_STATS bloom_false_positives; - WT_STATS bloom_hits; - WT_STATS bloom_misses; - WT_STATS bloom_count; - WT_STATS bloom_cache_evict; - WT_STATS bloom_cache_read; - WT_STATS chunk_count; - WT_STATS chunk_cache_evict; - WT_STATS cache_evict; - WT_STATS chunk_cache_read; - WT_STATS cache_read; - WT_STATS cache_evict_fail; - WT_STATS cache_write; - WT_STATS search_miss_no_bloom; - WT_STATS bloom_space; -}; - /* Statistics section: END */ diff --git a/src/include/txn.i b/src/include/txn.i index 0f0425a5961..be58e027fe2 100644 --- a/src/include/txn.i +++ b/src/include/txn.i @@ -189,7 +189,7 @@ __wt_txn_update_check(WT_SESSION_IMPL *session, WT_UPDATE *upd) if (txn->isolation == TXN_ISO_SNAPSHOT) while (upd != NULL && !__wt_txn_visible(session, upd->txnid)) { if (upd->txnid != WT_TXN_ABORTED) { - WT_BSTAT_INCR(session, update_conflict); + WT_BSTAT_INCR(session, txn_update_conflict); return (WT_DEADLOCK); } upd = upd->next; diff --git a/src/include/wiredtiger.in b/src/include/wiredtiger.in index f568618c87a..dfbab5be762 100644 --- a/src/include/wiredtiger.in +++ b/src/include/wiredtiger.in @@ -1694,207 +1694,189 @@ extern int wiredtiger_extension_init(WT_SESSION *session, */ /*! - * @name Statistics for connection handles + * @name Connection statistics * @anchor statistics_keys * @anchor statistics_conn - * Statistics in WiredTiger are accessed through cursors with \c "statistics:" - * URIs. Individual statistics can be queried through the cursor using the - * following keys. + * Statistics are accessed through cursors with \c "statistics:" URIs. + * Individual statistics can be queried through the cursor using the following + * keys. See @ref data_statistics for more information. * @{ */ -/*! ancient transactions */ -#define WT_STAT_txn_ancient 0 /*! blocks read from a file */ -#define WT_STAT_block_read 1 +#define WT_STAT_CONN_BLOCK_READ 0 /*! blocks written to a file */ -#define WT_STAT_block_write 2 +#define WT_STAT_CONN_BLOCK_WRITE 1 /*! cache: bytes currently held in the cache */ -#define WT_STAT_cache_bytes_inuse 3 -/*! cache: eviction server unable to reach eviction goal */ -#define WT_STAT_cache_evict_slow 4 -/*! cache: internal pages evicted */ -#define WT_STAT_cache_evict_internal 5 +#define WT_STAT_CONN_CACHE_BYTES_INUSE 2 /*! cache: maximum bytes configured */ -#define WT_STAT_cache_bytes_max 6 -/*! cache: modified pages evicted */ -#define WT_STAT_cache_evict_modified 7 -/*! cache: pages currently held in the cache */ -#define WT_STAT_cache_pages_inuse 8 +#define WT_STAT_CONN_CACHE_BYTES_MAX 3 /*! cache: pages selected for eviction not evicted because of a hazard * reference */ -#define WT_STAT_cache_evict_hazard 9 +#define WT_STAT_CONN_CACHE_EVICT_HAZARD 4 +/*! cache: internal pages evicted */ +#define WT_STAT_CONN_CACHE_EVICT_INTERNAL 5 +/*! cache: modified pages evicted */ +#define WT_STAT_CONN_CACHE_EVICT_MODIFIED 6 +/*! cache: eviction server unable to reach eviction goal */ +#define WT_STAT_CONN_CACHE_EVICT_SLOW 7 /*! cache: unmodified pages evicted */ -#define WT_STAT_cache_evict_unmodified 10 +#define WT_STAT_CONN_CACHE_EVICT_UNMODIFIED 8 +/*! cache: pages currently held in the cache */ +#define WT_STAT_CONN_CACHE_PAGES_INUSE 9 /*! checkpoints */ -#define WT_STAT_checkpoint 11 +#define WT_STAT_CONN_CHECKPOINT 10 /*! condition wait calls */ -#define WT_STAT_cond_wait 12 +#define WT_STAT_CONN_COND_WAIT 11 /*! files currently open */ -#define WT_STAT_file_open 13 -/*! rwlock readlock calls */ -#define WT_STAT_rwlock_rdlock 14 -/*! rwlock writelock calls */ -#define WT_STAT_rwlock_wrlock 15 +#define WT_STAT_CONN_FILE_OPEN 12 /*! total memory allocations */ -#define WT_STAT_memalloc 16 +#define WT_STAT_CONN_MEMALLOC 13 /*! total memory frees */ -#define WT_STAT_memfree 17 +#define WT_STAT_CONN_MEMFREE 14 +/*! rwlock readlock calls */ +#define WT_STAT_CONN_RWLOCK_RDLOCK 15 +/*! rwlock writelock calls */ +#define WT_STAT_CONN_RWLOCK_WRLOCK 16 /*! total read I/Os */ -#define WT_STAT_total_read_io 18 +#define WT_STAT_CONN_TOTAL_READ_IO 17 /*! total write I/Os */ -#define WT_STAT_total_write_io 19 -/*! transaction failures due to cache overflow */ -#define WT_STAT_txn_fail_cache 20 +#define WT_STAT_CONN_TOTAL_WRITE_IO 18 +/*! ancient transactions */ +#define WT_STAT_CONN_TXN_ANCIENT 19 /*! transactions */ -#define WT_STAT_txn_begin 21 +#define WT_STAT_CONN_TXN_BEGIN 20 /*! transactions committed */ -#define WT_STAT_txn_commit 22 +#define WT_STAT_CONN_TXN_COMMIT 21 +/*! transaction failures due to cache overflow */ +#define WT_STAT_CONN_TXN_FAIL_CACHE 22 /*! transactions rolled-back */ -#define WT_STAT_txn_rollback 23 +#define WT_STAT_CONN_TXN_ROLLBACK 23 /*! * @} - * @name Statistics for file objects - * @anchor statistics_file + * @name Statistics for data sources + * @anchor statistics_dsrc * @{ */ -/*! bulk-loaded entries */ -#define WT_STAT_file_bulk_loaded 0 -/*! column-store deleted values */ -#define WT_STAT_file_col_deleted 1 -/*! column-store fixed-size leaf pages */ -#define WT_STAT_file_col_fix_pages 2 -/*! column-store internal pages */ -#define WT_STAT_file_col_int_pages 3 -/*! column-store variable-size leaf pages */ -#define WT_STAT_file_col_var_pages 4 +/*! block allocations */ +#define WT_STAT_DSRC_BLOCK_ALLOC 0 +/*! block allocations required file extension */ +#define WT_STAT_DSRC_BLOCK_EXTEND 1 +/*! block frees */ +#define WT_STAT_DSRC_BLOCK_FREE 2 +/*! Number of Bloom filters in the LSM tree */ +#define WT_STAT_DSRC_BLOOM_COUNT 3 +/*! Number of Bloom filter false positives */ +#define WT_STAT_DSRC_BLOOM_FALSE_POSITIVE 4 +/*! Number of Bloom filter hits */ +#define WT_STAT_DSRC_BLOOM_HIT 5 +/*! Number of Bloom filter misses */ +#define WT_STAT_DSRC_BLOOM_MISS 6 +/*! Number of Bloom pages evicted from cache */ +#define WT_STAT_DSRC_BLOOM_PAGE_EVICT 7 +/*! Number of Bloom pages read into cache */ +#define WT_STAT_DSRC_BLOOM_PAGE_READ 8 +/*! Total size of Bloom filters */ +#define WT_STAT_DSRC_BLOOM_SIZE 9 +/*! checkpoint size */ +#define WT_STAT_DSRC_CKPT_SIZE 10 /*! cursor-inserts */ -#define WT_STAT_cursor_inserts 5 +#define WT_STAT_DSRC_CURSOR_INSERT 11 /*! cursor-read */ -#define WT_STAT_cursor_read 6 +#define WT_STAT_DSRC_CURSOR_READ 12 /*! cursor-read-near */ -#define WT_STAT_cursor_read_near 7 +#define WT_STAT_DSRC_CURSOR_READ_NEAR 13 /*! cursor-read-next */ -#define WT_STAT_cursor_read_next 8 +#define WT_STAT_DSRC_CURSOR_READ_NEXT 14 /*! cursor-read-prev */ -#define WT_STAT_cursor_read_prev 9 +#define WT_STAT_DSRC_CURSOR_READ_PREV 15 /*! cursor-removes */ -#define WT_STAT_cursor_removes 10 +#define WT_STAT_DSRC_CURSOR_REMOVE 16 /*! cursor-resets */ -#define WT_STAT_cursor_resets 11 +#define WT_STAT_DSRC_CURSOR_RESET 17 /*! cursor-updates */ -#define WT_STAT_cursor_updates 12 -/*! file: block allocations */ -#define WT_STAT_alloc 13 -/*! file: block allocations required file extension */ -#define WT_STAT_extend 14 -/*! file: block frees */ -#define WT_STAT_free 15 -/*! file: overflow pages read from the file */ -#define WT_STAT_overflow_read 16 -/*! file: overflow values cached in memory */ -#define WT_STAT_overflow_value_cache 17 -/*! file: pages evicted from the file */ -#define WT_STAT_page_evict 18 -/*! file: pages read from the file */ -#define WT_STAT_page_read 19 -/*! file: pages that were selected for eviction that could not be evicted */ -#define WT_STAT_page_evict_fail 20 -/*! file: pages written to the file */ -#define WT_STAT_page_write 21 -/*! file: size */ -#define WT_STAT_file_size 22 +#define WT_STAT_DSRC_CURSOR_UPDATE 18 +/*! total entries */ +#define WT_STAT_DSRC_ENTRIES 19 +/*! page size allocation unit */ +#define WT_STAT_DSRC_FILE_ALLOCSIZE 20 +/*! bulk-loaded entries */ +#define WT_STAT_DSRC_FILE_BULK_LOADED 21 +/*! pages rewritten by compaction */ +#define WT_STAT_DSRC_FILE_COMPACT_REWRITE 22 /*! fixed-record size */ -#define WT_STAT_file_fixed_len 23 +#define WT_STAT_DSRC_FILE_FIXED_LEN 23 /*! magic number */ -#define WT_STAT_file_magic 24 +#define WT_STAT_DSRC_FILE_MAGIC 24 /*! major version number */ -#define WT_STAT_file_major 25 +#define WT_STAT_DSRC_FILE_MAJOR 25 /*! maximum internal page item size */ -#define WT_STAT_file_maxintlitem 26 +#define WT_STAT_DSRC_FILE_MAXINTLITEM 26 /*! maximum internal page size */ -#define WT_STAT_file_maxintlpage 27 +#define WT_STAT_DSRC_FILE_MAXINTLPAGE 27 /*! maximum leaf page item size */ -#define WT_STAT_file_maxleafitem 28 +#define WT_STAT_DSRC_FILE_MAXLEAFITEM 28 /*! maximum leaf page size */ -#define WT_STAT_file_maxleafpage 29 +#define WT_STAT_DSRC_FILE_MAXLEAFPAGE 29 /*! minor version number */ -#define WT_STAT_file_minor 30 +#define WT_STAT_DSRC_FILE_MINOR 30 +/*! file size */ +#define WT_STAT_DSRC_FILE_SIZE 31 +/*! Number of chunks in the LSM tree */ +#define WT_STAT_DSRC_LSM_CHUNK_COUNT 32 +/*! Highest merge generation in the LSM tree */ +#define WT_STAT_DSRC_LSM_GENERATION_MAX 33 +/*! Number of queries that could have benefited from a Bloom filter that + * did not exist */ +#define WT_STAT_DSRC_LSM_LOOKUP_NO_BLOOM 34 /*! overflow pages */ -#define WT_STAT_file_overflow 31 -/*! page size allocation unit */ -#define WT_STAT_file_allocsize 32 -/*! pages rewritten by compaction */ -#define WT_STAT_file_compact_rewrite 33 -/*! reconcile: deleted or temporary pages merged */ -#define WT_STAT_rec_page_merge 34 -/*! reconcile: dictionary match */ -#define WT_STAT_rec_dictionary 35 -/*! reconcile: internal pages split */ -#define WT_STAT_rec_split_intl 36 -/*! reconcile: leaf pages split */ -#define WT_STAT_rec_split_leaf 37 -/*! reconcile: overflow key */ -#define WT_STAT_rec_ovfl_key 38 -/*! reconcile: overflow value */ -#define WT_STAT_rec_ovfl_value 39 -/*! reconcile: pages deleted */ -#define WT_STAT_rec_page_delete 40 -/*! reconcile: pages written */ -#define WT_STAT_rec_written 41 -/*! reconcile: unable to acquire hazard reference */ -#define WT_STAT_rec_hazard 42 +#define WT_STAT_DSRC_OVERFLOW_PAGE 35 +/*! overflow pages read into cache */ +#define WT_STAT_DSRC_OVERFLOW_READ 36 +/*! overflow values cached in memory */ +#define WT_STAT_DSRC_OVERFLOW_VALUE_CACHE 37 +/*! column-store deleted values */ +#define WT_STAT_DSRC_PAGE_COL_DELETED 38 +/*! column-store fixed-size leaf pages */ +#define WT_STAT_DSRC_PAGE_COL_FIX 39 +/*! column-store internal pages */ +#define WT_STAT_DSRC_PAGE_COL_INT 40 +/*! column-store variable-size leaf pages */ +#define WT_STAT_DSRC_PAGE_COL_VAR 41 +/*! pages evicted from the data source */ +#define WT_STAT_DSRC_PAGE_EVICT 42 +/*! pages that were selected for eviction that could not be evicted */ +#define WT_STAT_DSRC_PAGE_EVICT_FAIL 43 +/*! pages read into cache */ +#define WT_STAT_DSRC_PAGE_READ 44 /*! row-store internal pages */ -#define WT_STAT_file_row_int_pages 43 +#define WT_STAT_DSRC_PAGE_ROW_INT 45 /*! row-store leaf pages */ -#define WT_STAT_file_row_leaf_pages 44 -/*! total entries */ -#define WT_STAT_file_entries 45 +#define WT_STAT_DSRC_PAGE_ROW_LEAF 46 +/*! pages written from cache */ +#define WT_STAT_DSRC_PAGE_WRITE 47 +/*! reconcile: dictionary match */ +#define WT_STAT_DSRC_REC_DICTIONARY 48 +/*! reconciliation unable to acquire hazard reference */ +#define WT_STAT_DSRC_REC_HAZARD 49 +/*! reconciliation overflow key */ +#define WT_STAT_DSRC_REC_OVFL_KEY 50 +/*! reconciliation overflow value */ +#define WT_STAT_DSRC_REC_OVFL_VALUE 51 +/*! pages deleted */ +#define WT_STAT_DSRC_REC_PAGE_DELETE 52 +/*! deleted or temporary pages merged */ +#define WT_STAT_DSRC_REC_PAGE_MERGE 53 +/*! internal pages split */ +#define WT_STAT_DSRC_REC_SPLIT_INTL 54 +/*! leaf pages split */ +#define WT_STAT_DSRC_REC_SPLIT_LEAF 55 +/*! pages written */ +#define WT_STAT_DSRC_REC_WRITTEN 56 /*! update conflicts */ -#define WT_STAT_update_conflict 46 +#define WT_STAT_DSRC_TXN_UPDATE_CONFLICT 57 /*! write generation conflicts */ -#define WT_STAT_file_write_conflicts 47 -/*! @} */ - -/*! - * @} - * @name Statistics for lsm objects - * @anchor statistics_lsm - * @{ - */ -/*! Highest merge generation in the LSM tree */ -#define WT_STAT_generation_max 0 -/*! Number of bloom filter false positives */ -#define WT_STAT_bloom_false_positives 1 -/*! Number of bloom filter hits */ -#define WT_STAT_bloom_hits 2 -/*! Number of bloom filter misses */ -#define WT_STAT_bloom_misses 3 -/*! Number of bloom filters in the LSM tree */ -#define WT_STAT_bloom_count 4 -/*! Number of bloom pages evicted from cache */ -#define WT_STAT_bloom_cache_evict 5 -/*! Number of bloom pages read into cache */ -#define WT_STAT_bloom_cache_read 6 -/*! Number of chunks in the LSM tree */ -#define WT_STAT_chunk_count 7 -/*! Number of pages evicted from LSM chunks */ -#define WT_STAT_chunk_cache_evict 8 -/*! Number of pages evicted from cache */ -#define WT_STAT_cache_evict 9 -/*! Number of pages read into LSM chunks */ -#define WT_STAT_chunk_cache_read 10 -/*! Number of pages read into cache */ -#define WT_STAT_cache_read 11 -/*! Number of pages selected for eviction that could not be evicted */ -#define WT_STAT_cache_evict_fail 12 -/*! Number of pages written from cache */ -#define WT_STAT_cache_write 13 -/*! Number of queries that could have benefited from a bloom filter that - * did not exist */ -#define WT_STAT_search_miss_no_bloom 14 -/*! Total space used by bloom filters */ -#define WT_STAT_bloom_space 15 +#define WT_STAT_DSRC_TXN_WRITE_CONFLICT 58 /*! @} */ /* * Statistics section: END diff --git a/src/include/wt_internal.h b/src/include/wt_internal.h index eefa18cb447..68791cfa3b1 100644 --- a/src/include/wt_internal.h +++ b/src/include/wt_internal.h @@ -65,8 +65,6 @@ struct __wt_btree; typedef struct __wt_btree WT_BTREE; struct __wt_btree_session; typedef struct __wt_btree_session WT_BTREE_SESSION; -struct __wt_btree_stats; - typedef struct __wt_btree_stats WT_BTREE_STATS; struct __wt_cache; typedef struct __wt_cache WT_CACHE; struct __wt_cache_pool; @@ -115,6 +113,8 @@ struct __wt_cursor_table; typedef struct __wt_cursor_table WT_CURSOR_TABLE; struct __wt_dlh; typedef struct __wt_dlh WT_DLH; +struct __wt_dsrc_stats; + typedef struct __wt_dsrc_stats WT_DSRC_STATS; struct __wt_evict_entry; typedef struct __wt_evict_entry WT_EVICT_ENTRY; struct __wt_ext; @@ -137,8 +137,6 @@ struct __wt_lsm_chunk; typedef struct __wt_lsm_chunk WT_LSM_CHUNK; struct __wt_lsm_data_source; typedef struct __wt_lsm_data_source WT_LSM_DATA_SOURCE; -struct __wt_lsm_stats; - typedef struct __wt_lsm_stats WT_LSM_STATS; struct __wt_lsm_tree; typedef struct __wt_lsm_tree WT_LSM_TREE; struct __wt_lsm_worker_args; diff --git a/src/lsm/lsm_cursor.c b/src/lsm/lsm_cursor.c index 21511d20b2a..f4227fbd9d1 100644 --- a/src/lsm/lsm_cursor.c +++ b/src/lsm/lsm_cursor.c @@ -139,7 +139,8 @@ __clsm_open_cursors( WT_RET(__clsm_close_cursors(clsm)); - __wt_spin_lock(session, &lsm_tree->lock); + __wt_readlock(session, lsm_tree->rwlock); + F_SET(session, WT_SESSION_NO_CACHE_CHECK); /* Merge cursors have already figured out how many chunks they need. */ if (F_ISSET(clsm, WT_CLSM_MERGE)) { @@ -218,7 +219,8 @@ __clsm_open_cursors( } clsm->dsk_gen = lsm_tree->dsk_gen; -err: __wt_spin_unlock(session, &lsm_tree->lock); +err: F_CLR(session, WT_SESSION_NO_CACHE_CHECK); + __wt_rwunlock(session, lsm_tree->rwlock); return (ret); } @@ -561,10 +563,10 @@ __clsm_search(WT_CURSOR *cursor) ret = __wt_bloom_hash_get(bloom, &bhash); if (ret == WT_NOTFOUND) { WT_STAT_INCR( - clsm->lsm_tree->stats, bloom_misses); + clsm->lsm_tree->stats, bloom_miss); continue; } else if (ret == 0) - WT_STAT_INCR(clsm->lsm_tree->stats, bloom_hits); + WT_STAT_INCR(clsm->lsm_tree->stats, bloom_hit); WT_ERR(ret); } c->set_key(c, &cursor->key); @@ -579,11 +581,11 @@ __clsm_search(WT_CURSOR *cursor) goto err; else if (bloom != NULL) WT_STAT_INCR( - clsm->lsm_tree->stats, bloom_false_positives); + clsm->lsm_tree->stats, bloom_false_positive); /* The active chunk can't have a bloom filter. */ - else if (i != clsm->nchunks) + else if (clsm->primary_chunk == NULL || i != clsm->nchunks) WT_STAT_INCR( - clsm->lsm_tree->stats, search_miss_no_bloom); + clsm->lsm_tree->stats, lsm_lookup_no_bloom); } ret = WT_NOTFOUND; @@ -781,11 +783,11 @@ __clsm_put( * chunk is needed. */ if (clsm->primary_chunk == NULL) { - __wt_spin_lock(session, &lsm_tree->lock); + __wt_writelock(session, lsm_tree->rwlock); if (clsm->dsk_gen == lsm_tree->dsk_gen) WT_WITH_SCHEMA_LOCK(session, ret = __wt_lsm_tree_switch(session, lsm_tree)); - __wt_spin_unlock(session, &lsm_tree->lock); + __wt_rwunlock(session, lsm_tree->rwlock); WT_RET(ret); /* We changed the structure, or someone else did: update. */ @@ -830,12 +832,12 @@ __clsm_put( * Take the LSM lock first: we can't acquire it while * holding the schema lock, or we will deadlock. */ - __wt_spin_lock(session, &lsm_tree->lock); + __wt_writelock(session, lsm_tree->rwlock); /* Make sure we don't race. */ if (clsm->dsk_gen == lsm_tree->dsk_gen) WT_WITH_SCHEMA_LOCK(session, ret = __wt_lsm_tree_switch(session, lsm_tree)); - __wt_spin_unlock(session, &lsm_tree->lock); + __wt_rwunlock(session, lsm_tree->rwlock); } return (ret); diff --git a/src/lsm/lsm_merge.c b/src/lsm/lsm_merge.c index 91ddbc087ad..d554f6311cb 100644 --- a/src/lsm/lsm_merge.c +++ b/src/lsm/lsm_merge.c @@ -72,10 +72,12 @@ __wt_lsm_merge( WT_DECL_RET; WT_ITEM buf, key, value; WT_LSM_CHUNK *chunk; - const char *cur_cfg[] = - API_CONF_DEFAULTS(session, open_cursor, "bulk,raw"); + const char *cur_cfg[] = API_CONF_DEFAULTS(session, open_cursor, + "bulk,raw"); + const char *rand_cfg[] = API_CONF_DEFAULTS(session, open_cursor, + "checkpoint=WiredTigerCheckpoint,next_random"); uint32_t generation, start_id; - uint64_t insert_count, record_count; + uint64_t insert_count, record_count, r; int create_bloom, dest_id, end_chunk, i; int max_chunks, nchunks, start_chunk; @@ -98,7 +100,7 @@ __wt_lsm_merge( * avoid holding it while the merge is in progress: that may take a * long time. */ - __wt_spin_lock(session, &lsm_tree->lock); + __wt_writelock(session, lsm_tree->rwlock); /* * Only include chunks that are stable on disk and not involved in a @@ -134,10 +136,18 @@ __wt_lsm_merge( break; /* + * Only merge across more than 2 generations if there are no + * new chunks being created. + */ + if (stalls < 50 && chunk->generation >= + lsm_tree->chunk[end_chunk]->generation + 2) + break; + + /* * If the next chunk is more than double the average size of * the chunks we have so far, stop. */ - if (nchunks > 2 && chunk->count > 2 * record_count / nchunks) + if (nchunks > 1 && chunk->count > 2 * record_count / nchunks) break; /* @@ -177,7 +187,7 @@ __wt_lsm_merge( generation = lsm_tree->chunk[i]->generation; start_id = lsm_tree->chunk[start_chunk]->id; - __wt_spin_unlock(session, &lsm_tree->lock); + __wt_rwunlock(session, lsm_tree->rwlock); if (nchunks == 0) return (WT_NOTFOUND); @@ -186,8 +196,9 @@ __wt_lsm_merge( dest_id = WT_ATOMIC_ADD(lsm_tree->last, 1); WT_VERBOSE_RET(session, lsm, - "Merging chunks %d-%d into %d (%" PRIu64 " records)\n", - start_chunk, end_chunk, dest_id, record_count); + "Merging chunks %d-%d into %d (%" PRIu64 " records)" + ", generation %d\n", + start_chunk, end_chunk, dest_id, record_count, generation); WT_RET(__wt_calloc_def(session, 1, &chunk)); chunk->id = dest_id; @@ -249,12 +260,35 @@ __wt_lsm_merge( src = dest = NULL; if (create_bloom) { WT_TRET(__wt_bloom_finalize(bloom)); + + /* + * Read in a key to make sure the Bloom filters btree handle is + * open before it becomes visible to application threads. + * Otherwise application threads will stall while it is opened + * and internal pages are read into cache. + */ + WT_CLEAR(key); + WT_TRET_NOTFOUND_OK(__wt_bloom_get(bloom, &key)); + WT_TRET(__wt_bloom_close(bloom)); bloom = NULL; } WT_ERR(ret); - __wt_spin_lock(session, &lsm_tree->lock); + /* + * Fault in some pages. We use a random cursor to jump around in the + * tree. The count here is fairly arbitrary: what we want is to have + * enough internal pages in cache so that application threads don't + * stall and block each other reading them in. + */ + WT_ERR(__wt_open_cursor(session, chunk->uri, NULL, rand_cfg, &dest)); + for (r = 0; ret == 0 && r < 1 + (insert_count >> 20); r++) + WT_TRET(dest->next(dest)); + WT_TRET(dest->close(dest)); + dest = NULL; + WT_ERR_NOTFOUND_OK(ret); + + __wt_writelock(session, lsm_tree->rwlock); /* * Check whether we raced with another merge, and adjust the chunk @@ -278,7 +312,7 @@ __wt_lsm_merge( F_SET(chunk, WT_LSM_CHUNK_ONDISK); ret = __wt_lsm_meta_write(session, lsm_tree); - __wt_spin_unlock(session, &lsm_tree->lock); + __wt_rwunlock(session, lsm_tree->rwlock); err: if (src != NULL) WT_TRET(src->close(src)); diff --git a/src/lsm/lsm_stat.c b/src/lsm/lsm_stat.c index fe0717f3436..aa3ff1548c6 100644 --- a/src/lsm/lsm_stat.c +++ b/src/lsm/lsm_stat.c @@ -35,29 +35,27 @@ __wt_lsm_stat_init( * consistent view? If so should the copy belong to the stat cursor? */ /* Clear the statistics we are about to recalculate. */ - WT_STAT_SET(lsm_tree->stats, bloom_cache_read, 0); - WT_STAT_SET(lsm_tree->stats, bloom_cache_evict, 0); + WT_STAT_SET(lsm_tree->stats, bloom_page_read, 0); + WT_STAT_SET(lsm_tree->stats, bloom_page_evict, 0); WT_STAT_SET(lsm_tree->stats, bloom_count, 0); - WT_STAT_SET(lsm_tree->stats, bloom_space, 0); - WT_STAT_SET(lsm_tree->stats, cache_evict, 0); - WT_STAT_SET(lsm_tree->stats, cache_evict_fail, 0); - WT_STAT_SET(lsm_tree->stats, cache_read, 0); - WT_STAT_SET(lsm_tree->stats, cache_write, 0); - WT_STAT_SET(lsm_tree->stats, chunk_cache_evict, 0); - WT_STAT_SET(lsm_tree->stats, chunk_cache_read, 0); - WT_STAT_SET(lsm_tree->stats, generation_max, 0); + WT_STAT_SET(lsm_tree->stats, bloom_size, 0); + WT_STAT_SET(lsm_tree->stats, page_evict, 0); + WT_STAT_SET(lsm_tree->stats, page_evict_fail, 0); + WT_STAT_SET(lsm_tree->stats, page_read, 0); + WT_STAT_SET(lsm_tree->stats, page_write, 0); + WT_STAT_SET(lsm_tree->stats, lsm_generation_max, 0); /* Hold the LSM lock so that we can safely walk through the chunks. */ - __wt_spin_lock(session, &lsm_tree->lock); + __wt_readlock(session, lsm_tree->rwlock); /* Set the stats for this run. */ - WT_STAT_SET(lsm_tree->stats, chunk_count, lsm_tree->nchunks); + WT_STAT_SET(lsm_tree->stats, lsm_chunk_count, lsm_tree->nchunks); for (i = 0; i < lsm_tree->nchunks; i++) { chunk = lsm_tree->chunk[i]; if (chunk->generation > - (uint32_t)WT_STAT(lsm_tree->stats, generation_max)) + (uint32_t)WT_STAT(lsm_tree->stats, lsm_generation_max)) WT_STAT_SET(lsm_tree->stats, - generation_max, chunk->generation); + lsm_generation_max, chunk->generation); /* * LSM chunk reads happen from a checkpoint, so get the @@ -78,38 +76,36 @@ __wt_lsm_stat_init( session, uribuf->data, cfg, &stat_cursor); WT_ERR(ret); - stat_cursor->set_key(stat_cursor, WT_STAT_page_evict_fail); + stat_cursor->set_key(stat_cursor, WT_STAT_DSRC_PAGE_EVICT_FAIL); WT_ERR(stat_cursor->search(stat_cursor)); WT_ERR(stat_cursor->get_value( stat_cursor, &desc, &pvalue, &value)); - WT_STAT_INCRV(lsm_tree->stats, cache_evict_fail, value); + WT_STAT_INCRV(lsm_tree->stats, page_evict_fail, value); - stat_cursor->set_key(stat_cursor, WT_STAT_page_evict); + stat_cursor->set_key(stat_cursor, WT_STAT_DSRC_PAGE_EVICT); WT_ERR(stat_cursor->search(stat_cursor)); WT_ERR(stat_cursor->get_value( stat_cursor, &desc, &pvalue, &value)); - WT_STAT_INCRV(lsm_tree->stats, cache_evict, value); - WT_STAT_INCRV(lsm_tree->stats, chunk_cache_evict, value); + WT_STAT_INCRV(lsm_tree->stats, page_evict, value); - stat_cursor->set_key(stat_cursor, WT_STAT_page_read); + stat_cursor->set_key(stat_cursor, WT_STAT_DSRC_PAGE_READ); WT_ERR(stat_cursor->search(stat_cursor)); WT_ERR(stat_cursor->get_value( stat_cursor, &desc, &pvalue, &value)); - WT_STAT_INCRV(lsm_tree->stats, cache_read, value); - WT_STAT_INCRV(lsm_tree->stats, chunk_cache_read, value); + WT_STAT_INCRV(lsm_tree->stats, page_read, value); - stat_cursor->set_key(stat_cursor, WT_STAT_page_write); + stat_cursor->set_key(stat_cursor, WT_STAT_DSRC_PAGE_WRITE); WT_ERR(stat_cursor->search(stat_cursor)); WT_ERR(stat_cursor->get_value( stat_cursor, &desc, &pvalue, &value)); - WT_STAT_INCRV(lsm_tree->stats, cache_write, value); + WT_STAT_INCRV(lsm_tree->stats, page_write, value); WT_ERR(stat_cursor->close(stat_cursor)); if (!F_ISSET(chunk, WT_LSM_CHUNK_BLOOM)) continue; WT_STAT_INCR(lsm_tree->stats, bloom_count); - WT_STAT_INCRV(lsm_tree->stats, bloom_space, + WT_STAT_INCRV(lsm_tree->stats, bloom_size, (chunk->count * lsm_tree->bloom_bit_count) / 8); WT_ERR(__wt_buf_fmt( @@ -117,35 +113,35 @@ __wt_lsm_stat_init( WT_ERR(__wt_curstat_open(session, uribuf->data, cfg, &stat_cursor)); - stat_cursor->set_key(stat_cursor, WT_STAT_page_evict); + stat_cursor->set_key(stat_cursor, WT_STAT_DSRC_PAGE_EVICT); WT_ERR(stat_cursor->search(stat_cursor)); WT_ERR(stat_cursor->get_value( stat_cursor, &desc, &pvalue, &value)); - WT_STAT_INCRV(lsm_tree->stats, cache_evict, value); - WT_STAT_INCRV(lsm_tree->stats, bloom_cache_evict, value); + WT_STAT_INCRV(lsm_tree->stats, page_evict, value); + WT_STAT_INCRV(lsm_tree->stats, bloom_page_evict, value); - stat_cursor->set_key(stat_cursor, WT_STAT_page_evict_fail); + stat_cursor->set_key(stat_cursor, WT_STAT_DSRC_PAGE_EVICT_FAIL); WT_ERR(stat_cursor->search(stat_cursor)); WT_ERR(stat_cursor->get_value( stat_cursor, &desc, &pvalue, &value)); - WT_STAT_INCRV(lsm_tree->stats, cache_evict_fail, value); + WT_STAT_INCRV(lsm_tree->stats, page_evict_fail, value); - stat_cursor->set_key(stat_cursor, WT_STAT_page_read); + stat_cursor->set_key(stat_cursor, WT_STAT_DSRC_PAGE_READ); WT_ERR(stat_cursor->search(stat_cursor)); WT_ERR(stat_cursor->get_value( stat_cursor, &desc, &pvalue, &value)); - WT_STAT_INCRV(lsm_tree->stats, cache_read, value); - WT_STAT_INCRV(lsm_tree->stats, bloom_cache_read, value); + WT_STAT_INCRV(lsm_tree->stats, page_read, value); + WT_STAT_INCRV(lsm_tree->stats, bloom_page_read, value); - stat_cursor->set_key(stat_cursor, WT_STAT_page_write); + stat_cursor->set_key(stat_cursor, WT_STAT_DSRC_PAGE_WRITE); WT_ERR(stat_cursor->search(stat_cursor)); WT_ERR(stat_cursor->get_value( stat_cursor, &desc, &pvalue, &value)); - WT_STAT_INCRV(lsm_tree->stats, cache_write, value); + WT_STAT_INCRV(lsm_tree->stats, page_write, value); WT_ERR(stat_cursor->close(stat_cursor)); } -err: __wt_spin_unlock(session, &lsm_tree->lock); +err: __wt_rwunlock(session, lsm_tree->rwlock); __wt_scr_free(&uribuf); return (ret); diff --git a/src/lsm/lsm_tree.c b/src/lsm/lsm_tree.c index e2a712be54a..ea70889df5f 100644 --- a/src/lsm/lsm_tree.c +++ b/src/lsm/lsm_tree.c @@ -35,7 +35,6 @@ __lsm_tree_discard(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree) __wt_rwlock_destroy(session, &lsm_tree->rwlock); __wt_free(session, lsm_tree->stats); - __wt_spin_destroy(session, &lsm_tree->lock); for (i = 0; i < lsm_tree->nchunks; i++) { if ((chunk = lsm_tree->chunk[i]) == NULL) @@ -423,10 +422,10 @@ __lsm_tree_open( /* Try to open the tree. */ WT_RET(__wt_calloc_def(session, 1, &lsm_tree)); - __wt_spin_init(session, &lsm_tree->lock); + WT_ERR(__wt_rwlock_alloc(session, "lsm tree", &lsm_tree->rwlock)); WT_ERR(__wt_strdup(session, uri, &lsm_tree->name)); lsm_tree->filename = lsm_tree->name + strlen("lsm:"); - WT_ERR(__wt_stat_alloc_lsm_stats(session, &lsm_tree->stats)); + WT_ERR(__wt_stat_alloc_dsrc_stats(session, &lsm_tree->stats)); WT_ERR(__wt_lsm_meta_read(session, lsm_tree)); @@ -555,7 +554,7 @@ __wt_lsm_tree_drop( WT_RET(__lsm_tree_close(session, lsm_tree)); /* Prevent any new opens. */ - WT_RET(__wt_spin_trylock(session, &lsm_tree->lock)); + WT_RET(__wt_try_writelock(session, lsm_tree->rwlock)); /* Drop the chunks. */ for (i = 0; i < lsm_tree->nchunks; i++) { @@ -575,11 +574,11 @@ __wt_lsm_tree_drop( __wt_schema_drop(session, chunk->bloom_uri, cfg)); } - __wt_spin_unlock(session, &lsm_tree->lock); + __wt_rwunlock(session, lsm_tree->rwlock); WT_ERR(__wt_metadata_remove(session, name)); if (0) { -err: __wt_spin_unlock(session, &lsm_tree->lock); +err: __wt_rwunlock(session, lsm_tree->rwlock); } __lsm_tree_discard(session, lsm_tree); return (ret); @@ -610,7 +609,7 @@ __wt_lsm_tree_rename(WT_SESSION_IMPL *session, WT_RET(__lsm_tree_close(session, lsm_tree)); /* Prevent any new opens. */ - WT_RET(__wt_spin_trylock(session, &lsm_tree->lock)); + WT_RET(__wt_try_writelock(session, lsm_tree->rwlock)); /* Set the new name. */ __wt_free(session, lsm_tree->name); @@ -641,12 +640,12 @@ __wt_lsm_tree_rename(WT_SESSION_IMPL *session, } } - __wt_spin_unlock(session, &lsm_tree->lock); + __wt_rwunlock(session, lsm_tree->rwlock); WT_ERR(__wt_lsm_meta_write(session, lsm_tree)); WT_ERR(__wt_metadata_remove(session, oldname)); if (0) { -err: __wt_spin_unlock(session, &lsm_tree->lock); +err: __wt_rwunlock(session, lsm_tree->rwlock); } if (old != NULL) __wt_free(session, old); @@ -675,7 +674,7 @@ __wt_lsm_tree_truncate( WT_RET(__lsm_tree_close(session, lsm_tree)); /* Prevent any new opens. */ - WT_RET(__wt_spin_trylock(session, &lsm_tree->lock)); + WT_RET(__wt_try_writelock(session, lsm_tree->rwlock)); /* Create the new chunk. */ WT_ERR(__wt_calloc_def(session, 1, &chunk)); @@ -689,11 +688,11 @@ __wt_lsm_tree_truncate( WT_ERR(__wt_lsm_meta_write(session, lsm_tree)); WT_ERR(__lsm_tree_start_worker(session, lsm_tree)); - __wt_spin_unlock(session, &lsm_tree->lock); + __wt_rwunlock(session, lsm_tree->rwlock); __wt_lsm_tree_release(session, lsm_tree); if (0) { -err: __wt_spin_unlock(session, &lsm_tree->lock); +err: __wt_rwunlock(session, lsm_tree->rwlock); __lsm_tree_discard(session, lsm_tree); } return (ret); diff --git a/src/lsm/lsm_worker.c b/src/lsm/lsm_worker.c index 54a8298e877..327bcdcc3f9 100644 --- a/src/lsm/lsm_worker.c +++ b/src/lsm/lsm_worker.c @@ -87,7 +87,7 @@ __wt_lsm_bloom_worker(void *arg) for (;;) { WT_ERR(__wt_lsm_copy_chunks(session, lsm_tree, &cookie)); - /* Write checkpoints in all completed files. */ + /* Create bloom filters in all checkpointed chunks. */ for (i = 0, j = 0; i < cookie.nchunks; i++) { if (!F_ISSET(lsm_tree, WT_LSM_TREE_WORKING)) goto err; @@ -104,27 +104,13 @@ __wt_lsm_bloom_worker(void *arg) chunk->count == 0) continue; - if ((ret = __lsm_bloom_create( - session, lsm_tree, chunk)) != 0) { - (void)__wt_err( - session, ret, "bloom creation failed"); + /* + * If a bloom filter create fails restart at the + * beginning of the chunk array. Don't exit the thread. + */ + if (__lsm_bloom_create(session, lsm_tree, chunk) != 0) break; - } - ++j; - __wt_spin_lock(session, &lsm_tree->lock); - ++lsm_tree->dsk_gen; - ret = __wt_lsm_meta_write(session, lsm_tree); - __wt_spin_unlock(session, &lsm_tree->lock); - - if (ret != 0) { - (void)__wt_err(session, ret, - "LSM bloom worker metadata write failed"); - break; - } - - WT_VERBOSE_ERR(session, lsm, - "LSM worker created bloom filter for %d.", i); } if (j == 0) __wt_sleep(0, 100000); @@ -186,11 +172,11 @@ __wt_lsm_checkpoint_worker(void *arg) } ++j; - __wt_spin_lock(session, &lsm_tree->lock); + __wt_writelock(session, lsm_tree->rwlock); F_SET(chunk, WT_LSM_CHUNK_ONDISK); ++lsm_tree->dsk_gen; ret = __wt_lsm_meta_write(session, lsm_tree); - __wt_spin_unlock(session, &lsm_tree->lock); + __wt_rwunlock(session, lsm_tree->rwlock); if (ret != 0) { (void)__wt_err(session, ret, @@ -224,9 +210,9 @@ __wt_lsm_copy_chunks(WT_SESSION_IMPL *session, /* Always return zero chunks on error. */ cookie->nchunks = 0; - __wt_spin_lock(session, &lsm_tree->lock); + __wt_readlock(session, lsm_tree->rwlock); if (!F_ISSET(lsm_tree, WT_LSM_TREE_WORKING)) { - __wt_spin_unlock(session, &lsm_tree->lock); + __wt_rwunlock(session, lsm_tree->rwlock); /* The actual error value is ignored. */ return (WT_ERROR); } @@ -245,7 +231,7 @@ __wt_lsm_copy_chunks(WT_SESSION_IMPL *session, if (ret == 0 && nchunks > 0) memcpy(cookie->chunk_array, lsm_tree->chunk, nchunks * sizeof(*lsm_tree->chunk)); - __wt_spin_unlock(session, &lsm_tree->lock); + __wt_rwunlock(session, lsm_tree->rwlock); if (ret == 0) cookie->nchunks = nchunks; @@ -307,11 +293,22 @@ __lsm_bloom_create(WT_SESSION_IMPL *session, WT_ERR(ret); WT_VERBOSE_ERR(session, lsm, - "LSM checkpoint worker created bloom filter. " + "LSM worker created bloom filter %s. " "Expected %" PRIu64 " items, got %" PRIu64, - chunk->count, insert_count); + chunk->bloom_uri, chunk->count, insert_count); F_SET(chunk, WT_LSM_CHUNK_BLOOM); + + /* Ensure the bloom filter is in the metadata. */ + __wt_writelock(session, lsm_tree->rwlock); + ++lsm_tree->dsk_gen; + ret = __wt_lsm_meta_write(session, lsm_tree); + __wt_rwunlock(session, lsm_tree->rwlock); + + if (ret != 0) + WT_ERR_MSG(session, ret, + "LSM bloom worker metadata write failed"); + err: if (bloom != NULL) WT_TRET(__wt_bloom_close(bloom)); return (ret); @@ -332,7 +329,7 @@ __lsm_free_chunks(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree) if (!locked) { locked = 1; /* TODO: Do we need the lsm_tree lock for all drops? */ - __wt_spin_lock(session, &lsm_tree->lock); + __wt_writelock(session, lsm_tree->rwlock); } if (F_ISSET(chunk, WT_LSM_CHUNK_BLOOM)) { WT_WITH_SCHEMA_LOCK(session, ret = __wt_schema_drop( @@ -375,7 +372,7 @@ __lsm_free_chunks(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree) } if (locked) { err: WT_TRET(__wt_lsm_meta_write(session, lsm_tree)); - __wt_spin_unlock(session, &lsm_tree->lock); + __wt_rwunlock(session, lsm_tree->rwlock); } /* Returning non-zero means there is no work to do. */ diff --git a/src/meta/meta_table.c b/src/meta/meta_table.c index 57a3003c551..49a33aa28b5 100644 --- a/src/meta/meta_table.c +++ b/src/meta/meta_table.c @@ -81,8 +81,7 @@ __wt_metadata_load_backup(WT_SESSION_IMPL *session) WT_ERR(__wt_getline(session, value, fp)); if (value->size == 0) WT_ERR(__wt_illegal_value(session, WT_METADATA_BACKUP)); - WT_ERR(__wt_metadata_update( - session, (char *)key->data, (char *)value->data)); + WT_ERR(__wt_metadata_update(session, key->data, value->data)); } /* Remove the hot backup file, it's only read (successfully) once. */ diff --git a/src/os_posix/os_getline.c b/src/os_posix/os_getline.c index 7291ed4f54d..9d8c8909127 100644 --- a/src/os_posix/os_getline.c +++ b/src/os_posix/os_getline.c @@ -38,12 +38,12 @@ __wt_getline(WT_SESSION_IMPL *session, WT_ITEM *buf, FILE *fp) continue; break; } - ((char *)buf->data)[buf->size++] = (char)c; + ((char *)buf->mem)[buf->size++] = (char)c; } if (c == EOF && ferror(fp)) WT_RET_MSG(session, __wt_errno(), "file read"); - ((char *)buf->data)[buf->size] = '\0'; + ((char *)buf->mem)[buf->size] = '\0'; return (0); } diff --git a/src/schema/schema_project.c b/src/schema/schema_project.c index 411ec64f880..73b5e8bb9cc 100644 --- a/src/schema/schema_project.c +++ b/src/schema/schema_project.c @@ -56,7 +56,7 @@ __wt_schema_project_in(WT_SESSION_IMPL *session, WT_RET(__pack_init( session, &pack, c->key_format)); buf = &c->key; - p = (uint8_t *)buf->data; + p = (uint8_t *)buf->mem; end = p + buf->size; continue; @@ -64,7 +64,7 @@ __wt_schema_project_in(WT_SESSION_IMPL *session, c = cp[arg]; WT_RET(__pack_init(session, &pack, c->value_format)); buf = &c->value; - p = (uint8_t *)buf->data; + p = (uint8_t *)buf->mem; end = p + buf->size; continue; } @@ -96,12 +96,12 @@ __wt_schema_project_in(WT_SESSION_IMPL *session, len = __pack_size(session, &pv); WT_RET(__wt_buf_grow(session, buf, buf->size + len)); - p = (uint8_t *)buf->data + + p = (uint8_t *)buf->mem + buf->size; WT_RET(__pack_write( session, &pv, &p, len)); buf->size += WT_STORE_SIZE(len); - end = (uint8_t *)buf->data + + end = (uint8_t *)buf->mem + buf->size; } else if (*proj == WT_PROJ_SKIP) WT_RET(__unpack_read(session, @@ -122,11 +122,11 @@ __wt_schema_project_in(WT_SESSION_IMPL *session, old_len = (size_t)(next - p); len = __pack_size(session, &pv); - offset = WT_PTRDIFF(p, buf->data); + offset = WT_PTRDIFF(p, buf->mem); WT_RET(__wt_buf_grow(session, buf, buf->size + len)); - p = (uint8_t *)buf->data + offset; - end = (uint8_t *)buf->data + buf->size + len; + p = (uint8_t *)buf->mem + offset; + end = (uint8_t *)buf->mem + buf->size + len; /* Make room if we're inserting out-of-order. */ if (offset + old_len < buf->size) memmove(p + len, p + old_len, @@ -243,7 +243,7 @@ __wt_schema_project_slice(WT_SESSION_IMPL *session, WT_CURSOR **cp, p = end = NULL; /* -Wuninitialized */ WT_RET(__pack_init(session, &vpack, vformat)); - vp = (uint8_t *)value->data; + vp = value->data; vend = vp + value->size; /* Reset any of the buffers we will be setting. */ @@ -392,7 +392,8 @@ __wt_schema_project_merge(WT_SESSION_IMPL *session, WT_PACK pack, vpack; WT_PACK_VALUE pv, vpv; char *proj; - uint8_t *p, *end, *vp; + const uint8_t *p, *end; + uint8_t *vp; size_t len; uint32_t arg; @@ -418,7 +419,7 @@ __wt_schema_project_merge(WT_SESSION_IMPL *session, WT_RET(__pack_init( session, &pack, c->key_format)); buf = &c->key; - p = (uint8_t *)buf->data; + p = buf->data; end = p + buf->size; continue; @@ -426,7 +427,7 @@ __wt_schema_project_merge(WT_SESSION_IMPL *session, c = cp[arg]; WT_RET(__pack_init(session, &pack, c->value_format)); buf = &c->value; - p = (uint8_t *)buf->data; + p = buf->data; end = p + buf->size; continue; } @@ -441,8 +442,7 @@ __wt_schema_project_merge(WT_SESSION_IMPL *session, case WT_PROJ_SKIP: WT_RET(__pack_next(&pack, &pv)); WT_RET(__unpack_read(session, &pv, - (const uint8_t **)&p, - (size_t)(end - p))); + &p, (size_t)(end - p))); if (*proj == WT_PROJ_SKIP) break; @@ -451,7 +451,7 @@ __wt_schema_project_merge(WT_SESSION_IMPL *session, len = __pack_size(session, &vpv); WT_RET(__wt_buf_grow(session, value, value->size + len)); - vp = (uint8_t *)value->data + value->size; + vp = (uint8_t *)value->mem + value->size; WT_RET(__pack_write(session, &vpv, &vp, len)); value->size += WT_STORE_SIZE(len); /* FALLTHROUGH */ diff --git a/src/schema/schema_rename.c b/src/schema/schema_rename.c index de45a49c976..bbb57674369 100644 --- a/src/schema/schema_rename.c +++ b/src/schema/schema_rename.c @@ -144,7 +144,7 @@ __rename_tree(WT_SESSION_IMPL *session, WT_ERR(__wt_scr_alloc(session, 0, &nv)); WT_ERR(__wt_buf_fmt(session, nv, "%.*s%s%s", (int)WT_PTRDIFF(cval.str, value), value, - (char *)ns->data, + (const char *)ns->data, cval.str + cval.len)); /* diff --git a/src/support/scratch.c b/src/support/scratch.c index f9cff2d0343..637cfb0e769 100644 --- a/src/support/scratch.c +++ b/src/support/scratch.c @@ -47,7 +47,7 @@ __wt_buf_grow(WT_SESSION_IMPL *session, WT_ITEM *buf, size_t size) offset = 0; set_data = 1; } else if (buf->data >= buf->mem && - (uint8_t *)buf->data < (uint8_t *)buf->mem + buf->memsize) { + WT_PTRDIFF(buf->data, buf->mem) < buf->memsize) { offset = WT_PTRDIFF(buf->data, buf->mem); set_data = 1; } else { @@ -75,8 +75,8 @@ __wt_buf_grow(WT_SESSION_IMPL *session, WT_ITEM *buf, size_t size) int __wt_buf_init(WT_SESSION_IMPL *session, WT_ITEM *buf, size_t size) { - WT_RET(__wt_buf_grow(session, buf, size)); buf->data = buf->mem; + WT_RET(__wt_buf_grow(session, buf, size)); buf->size = 0; return (0); @@ -145,12 +145,9 @@ __wt_buf_steal(WT_SESSION_IMPL *session, WT_ITEM *buf, uint32_t *sizep) * the page header, so buf->data references a location past buf->mem. */ if (buf->data != buf->mem) { - WT_ASSERT(session, - buf->data > buf->mem && - (uint8_t *)buf->data < - (uint8_t *)buf->mem + buf->memsize && - (uint8_t *)buf->data + buf->size <= - (uint8_t *)buf->mem + buf->memsize); + WT_ASSERT(session, buf->data > buf->mem && + WT_PTRDIFF(buf->data, buf->mem) + buf->size <= + buf->memsize); memmove(buf->mem, buf->data, buf->size); } diff --git a/src/support/stat.c b/src/support/stat.c index 23065e706a5..b79f7f3e8e5 100644 --- a/src/support/stat.c +++ b/src/support/stat.c @@ -3,31 +3,37 @@ #include "wt_internal.h" int -__wt_stat_alloc_btree_stats(WT_SESSION_IMPL *session, WT_BTREE_STATS **statsp) +__wt_stat_alloc_dsrc_stats(WT_SESSION_IMPL *session, WT_DSRC_STATS **statsp) { - WT_BTREE_STATS *stats; + WT_DSRC_STATS *stats; WT_RET(__wt_calloc_def(session, 1, &stats)); - stats->alloc.desc = "file: block allocations"; - stats->cursor_inserts.desc = "cursor-inserts"; + stats->block_alloc.desc = "block allocations"; + stats->block_extend.desc = "block allocations required file extension"; + stats->block_free.desc = "block frees"; + stats->bloom_count.desc = "Number of Bloom filters in the LSM tree"; + stats->bloom_false_positive.desc = + "Number of Bloom filter false positives"; + stats->bloom_hit.desc = "Number of Bloom filter hits"; + stats->bloom_miss.desc = "Number of Bloom filter misses"; + stats->bloom_page_evict.desc = + "Number of Bloom pages evicted from cache"; + stats->bloom_page_read.desc = "Number of Bloom pages read into cache"; + stats->bloom_size.desc = "Total size of Bloom filters"; + stats->ckpt_size.desc = "checkpoint size"; + stats->cursor_insert.desc = "cursor-inserts"; stats->cursor_read.desc = "cursor-read"; stats->cursor_read_near.desc = "cursor-read-near"; stats->cursor_read_next.desc = "cursor-read-next"; stats->cursor_read_prev.desc = "cursor-read-prev"; - stats->cursor_removes.desc = "cursor-removes"; - stats->cursor_resets.desc = "cursor-resets"; - stats->cursor_updates.desc = "cursor-updates"; - stats->extend.desc = "file: block allocations required file extension"; + stats->cursor_remove.desc = "cursor-removes"; + stats->cursor_reset.desc = "cursor-resets"; + stats->cursor_update.desc = "cursor-updates"; + stats->entries.desc = "total entries"; stats->file_allocsize.desc = "page size allocation unit"; stats->file_bulk_loaded.desc = "bulk-loaded entries"; - stats->file_col_deleted.desc = "column-store deleted values"; - stats->file_col_fix_pages.desc = "column-store fixed-size leaf pages"; - stats->file_col_int_pages.desc = "column-store internal pages"; - stats->file_col_var_pages.desc = - "column-store variable-size leaf pages"; stats->file_compact_rewrite.desc = "pages rewritten by compaction"; - stats->file_entries.desc = "total entries"; stats->file_fixed_len.desc = "fixed-record size"; stats->file_magic.desc = "magic number"; stats->file_major.desc = "major version number"; @@ -36,61 +42,72 @@ __wt_stat_alloc_btree_stats(WT_SESSION_IMPL *session, WT_BTREE_STATS **statsp) stats->file_maxleafitem.desc = "maximum leaf page item size"; stats->file_maxleafpage.desc = "maximum leaf page size"; stats->file_minor.desc = "minor version number"; - stats->file_overflow.desc = "overflow pages"; - stats->file_row_int_pages.desc = "row-store internal pages"; - stats->file_row_leaf_pages.desc = "row-store leaf pages"; - stats->file_size.desc = "file: size"; - stats->file_write_conflicts.desc = "write generation conflicts"; - stats->free.desc = "file: block frees"; - stats->overflow_read.desc = "file: overflow pages read from the file"; - stats->overflow_value_cache.desc = - "file: overflow values cached in memory"; - stats->page_evict.desc = "file: pages evicted from the file"; + stats->file_size.desc = "file size"; + stats->lsm_chunk_count.desc = "Number of chunks in the LSM tree"; + stats->lsm_generation_max.desc = + "Highest merge generation in the LSM tree"; + stats->lsm_lookup_no_bloom.desc = + "Number of queries that could have benefited from a Bloom filter that did not exist"; + stats->overflow_page.desc = "overflow pages"; + stats->overflow_read.desc = "overflow pages read into cache"; + stats->overflow_value_cache.desc = "overflow values cached in memory"; + stats->page_col_deleted.desc = "column-store deleted values"; + stats->page_col_fix.desc = "column-store fixed-size leaf pages"; + stats->page_col_int.desc = "column-store internal pages"; + stats->page_col_var.desc = "column-store variable-size leaf pages"; + stats->page_evict.desc = "pages evicted from the data source"; stats->page_evict_fail.desc = - "file: pages that were selected for eviction that could not be evicted"; - stats->page_read.desc = "file: pages read from the file"; - stats->page_write.desc = "file: pages written to the file"; + "pages that were selected for eviction that could not be evicted"; + stats->page_read.desc = "pages read into cache"; + stats->page_row_int.desc = "row-store internal pages"; + stats->page_row_leaf.desc = "row-store leaf pages"; + stats->page_write.desc = "pages written from cache"; stats->rec_dictionary.desc = "reconcile: dictionary match"; stats->rec_hazard.desc = - "reconcile: unable to acquire hazard reference"; - stats->rec_ovfl_key.desc = "reconcile: overflow key"; - stats->rec_ovfl_value.desc = "reconcile: overflow value"; - stats->rec_page_delete.desc = "reconcile: pages deleted"; - stats->rec_page_merge.desc = - "reconcile: deleted or temporary pages merged"; - stats->rec_split_intl.desc = "reconcile: internal pages split"; - stats->rec_split_leaf.desc = "reconcile: leaf pages split"; - stats->rec_written.desc = "reconcile: pages written"; - stats->update_conflict.desc = "update conflicts"; + "reconciliation unable to acquire hazard reference"; + stats->rec_ovfl_key.desc = "reconciliation overflow key"; + stats->rec_ovfl_value.desc = "reconciliation overflow value"; + stats->rec_page_delete.desc = "pages deleted"; + stats->rec_page_merge.desc = "deleted or temporary pages merged"; + stats->rec_split_intl.desc = "internal pages split"; + stats->rec_split_leaf.desc = "leaf pages split"; + stats->rec_written.desc = "pages written"; + stats->txn_update_conflict.desc = "update conflicts"; + stats->txn_write_conflict.desc = "write generation conflicts"; *statsp = stats; return (0); } void -__wt_stat_clear_btree_stats(WT_STATS *stats_arg) +__wt_stat_clear_dsrc_stats(WT_STATS *stats_arg) { - WT_BTREE_STATS *stats; + WT_DSRC_STATS *stats; - stats = (WT_BTREE_STATS *)stats_arg; - stats->alloc.v = 0; - stats->cursor_inserts.v = 0; + stats = (WT_DSRC_STATS *)stats_arg; + stats->block_alloc.v = 0; + stats->block_extend.v = 0; + stats->block_free.v = 0; + stats->bloom_count.v = 0; + stats->bloom_false_positive.v = 0; + stats->bloom_hit.v = 0; + stats->bloom_miss.v = 0; + stats->bloom_page_evict.v = 0; + stats->bloom_page_read.v = 0; + stats->bloom_size.v = 0; + stats->ckpt_size.v = 0; + stats->cursor_insert.v = 0; stats->cursor_read.v = 0; stats->cursor_read_near.v = 0; stats->cursor_read_next.v = 0; stats->cursor_read_prev.v = 0; - stats->cursor_removes.v = 0; - stats->cursor_resets.v = 0; - stats->cursor_updates.v = 0; - stats->extend.v = 0; + stats->cursor_remove.v = 0; + stats->cursor_reset.v = 0; + stats->cursor_update.v = 0; + stats->entries.v = 0; stats->file_allocsize.v = 0; stats->file_bulk_loaded.v = 0; - stats->file_col_deleted.v = 0; - stats->file_col_fix_pages.v = 0; - stats->file_col_int_pages.v = 0; - stats->file_col_var_pages.v = 0; stats->file_compact_rewrite.v = 0; - stats->file_entries.v = 0; stats->file_fixed_len.v = 0; stats->file_magic.v = 0; stats->file_major.v = 0; @@ -99,17 +116,22 @@ __wt_stat_clear_btree_stats(WT_STATS *stats_arg) stats->file_maxleafitem.v = 0; stats->file_maxleafpage.v = 0; stats->file_minor.v = 0; - stats->file_overflow.v = 0; - stats->file_row_int_pages.v = 0; - stats->file_row_leaf_pages.v = 0; stats->file_size.v = 0; - stats->file_write_conflicts.v = 0; - stats->free.v = 0; + stats->lsm_chunk_count.v = 0; + stats->lsm_generation_max.v = 0; + stats->lsm_lookup_no_bloom.v = 0; + stats->overflow_page.v = 0; stats->overflow_read.v = 0; stats->overflow_value_cache.v = 0; + stats->page_col_deleted.v = 0; + stats->page_col_fix.v = 0; + stats->page_col_int.v = 0; + stats->page_col_var.v = 0; stats->page_evict.v = 0; stats->page_evict_fail.v = 0; stats->page_read.v = 0; + stats->page_row_int.v = 0; + stats->page_row_leaf.v = 0; stats->page_write.v = 0; stats->rec_dictionary.v = 0; stats->rec_hazard.v = 0; @@ -120,7 +142,8 @@ __wt_stat_clear_btree_stats(WT_STATS *stats_arg) stats->rec_split_intl.v = 0; stats->rec_split_leaf.v = 0; stats->rec_written.v = 0; - stats->update_conflict.v = 0; + stats->txn_update_conflict.v = 0; + stats->txn_write_conflict.v = 0; } int @@ -192,61 +215,3 @@ __wt_stat_clear_connection_stats(WT_STATS *stats_arg) stats->txn_fail_cache.v = 0; stats->txn_rollback.v = 0; } - -int -__wt_stat_alloc_lsm_stats(WT_SESSION_IMPL *session, WT_LSM_STATS **statsp) -{ - WT_LSM_STATS *stats; - - WT_RET(__wt_calloc_def(session, 1, &stats)); - - stats->bloom_cache_evict.desc = - "Number of bloom pages evicted from cache"; - stats->bloom_cache_read.desc = "Number of bloom pages read into cache"; - stats->bloom_count.desc = "Number of bloom filters in the LSM tree"; - stats->bloom_false_positives.desc = - "Number of bloom filter false positives"; - stats->bloom_hits.desc = "Number of bloom filter hits"; - stats->bloom_misses.desc = "Number of bloom filter misses"; - stats->bloom_space.desc = "Total space used by bloom filters"; - stats->cache_evict.desc = "Number of pages evicted from cache"; - stats->cache_evict_fail.desc = - "Number of pages selected for eviction that could not be evicted"; - stats->cache_read.desc = "Number of pages read into cache"; - stats->cache_write.desc = "Number of pages written from cache"; - stats->chunk_cache_evict.desc = - "Number of pages evicted from LSM chunks"; - stats->chunk_cache_read.desc = "Number of pages read into LSM chunks"; - stats->chunk_count.desc = "Number of chunks in the LSM tree"; - stats->generation_max.desc = - "Highest merge generation in the LSM tree"; - stats->search_miss_no_bloom.desc = - "Number of queries that could have benefited from a bloom filter that did not exist"; - - *statsp = stats; - return (0); -} - -void -__wt_stat_clear_lsm_stats(WT_STATS *stats_arg) -{ - WT_LSM_STATS *stats; - - stats = (WT_LSM_STATS *)stats_arg; - stats->bloom_cache_evict.v = 0; - stats->bloom_cache_read.v = 0; - stats->bloom_count.v = 0; - stats->bloom_false_positives.v = 0; - stats->bloom_hits.v = 0; - stats->bloom_misses.v = 0; - stats->bloom_space.v = 0; - stats->cache_evict.v = 0; - stats->cache_evict_fail.v = 0; - stats->cache_read.v = 0; - stats->cache_write.v = 0; - stats->chunk_cache_evict.v = 0; - stats->chunk_cache_read.v = 0; - stats->chunk_count.v = 0; - stats->generation_max.v = 0; - stats->search_miss_no_bloom.v = 0; -} diff --git a/test/suite/test_base05.py b/test/suite/test_base05.py index e4a9ad97bb7..31ba8381e38 100644 --- a/test/suite/test_base05.py +++ b/test/suite/test_base05.py @@ -158,7 +158,7 @@ class test_base05(wttest.WiredTigerTestCase): create_args = 'key_format=S,value_format=S,' + self.config_string() self.session_create("table:" + self.table_name1, create_args) self.pr('creating cursor') - cursor = self.session.open_cursor('table:' + self.table_name1, None, None) + cursor = self.session.open_cursor('table:' + self.table_name1) numbers = {} for i in range(0, self.nentries): numbers[i] = i diff --git a/test/suite/test_bug001.py b/test/suite/test_bug001.py index d9f17fd3699..8a42fe6ecbe 100644 --- a/test/suite/test_bug001.py +++ b/test/suite/test_bug001.py @@ -58,10 +58,7 @@ class test_bug001(wttest.WiredTigerTestCase): # Check search inside trailing implicit keys. for i in range(0, 5): - cursor.set_key(60 + i) - self.assertEqual(cursor.search(), 0) - self.assertEqual(cursor.get_key(), 60 + i) - self.assertEqual(cursor.get_value(), 0x00) + self.assertEqual(cursor[60 + i], 0x00) # Check cursor next inside trailing implicit keys. cursor.set_key(60) @@ -85,10 +82,7 @@ class test_bug001(wttest.WiredTigerTestCase): # Check search inside leading implicit keys. for i in range(0, 5): - cursor.set_key(10 + i) - self.assertEqual(cursor.search(), 0) - self.assertEqual(cursor.get_key(), 10 + i) - self.assertEqual(cursor.get_value(), 0x00) + self.assertEqual(cursor[10 + i], 0x00) # Check cursor next inside leading implicit keys. cursor.set_key(10) diff --git a/test/suite/test_checkpoint01.py b/test/suite/test_checkpoint01.py index 1482c1fdaf6..e218e8c76af 100644 --- a/test/suite/test_checkpoint01.py +++ b/test/suite/test_checkpoint01.py @@ -218,9 +218,7 @@ class test_checkpoint_target(wttest.WiredTigerTestCase): def check(self, uri, value): cursor = self.session.open_cursor(uri, None, "checkpoint=checkpoint-1") - cursor.set_key(key_populate(cursor, 10)) - cursor.search() - self.assertEquals(cursor.get_value(), value) + self.assertEquals(cursor[key_populate(cursor, 10)], value) cursor.close() def test_checkpoint_target(self): @@ -303,9 +301,7 @@ class test_checkpoint_last(wttest.WiredTigerTestCase): # Verify the "last" checkpoint sees the correct value. cursor = self.session.open_cursor( uri, None, "checkpoint=WiredTigerCheckpoint") - cursor.set_key(key_populate(cursor, 10)) - cursor.search() - self.assertEquals(cursor.get_value(), value) + self.assertEquals(cursor[key_populate(cursor, 10)], value) # Don't close the checkpoint cursor, we want it to remain open until # the test completes. diff --git a/test/suite/test_config04.py b/test/suite/test_config04.py index fdda95e74dc..14918636f19 100644 --- a/test/suite/test_config04.py +++ b/test/suite/test_config04.py @@ -27,6 +27,7 @@ import os import wiredtiger, wttest +from wiredtiger import stat # test_config04.py # Individually test config options @@ -82,10 +83,8 @@ class test_config04(wttest.WiredTigerTestCase): def common_cache_size_test(self, sizestr, size): self.common_test('cache_size=' + sizestr) cursor = self.session.open_cursor('statistics:', None, None) - cursor.set_key(wiredtiger.stat.cache_bytes_max) - self.assertEqual(cursor.search(), 0) - got_cache = cursor.get_values()[2] - self.assertEqual(got_cache, size) + self.assertEqual(cursor[stat.conn.cache_bytes_max][2], size) + cursor.close() def test_bad_config(self): self.assertRaisesWithMessage(wiredtiger.WiredTigerError, diff --git a/test/suite/test_cursor04.py b/test/suite/test_cursor04.py index e62aaf2a1e5..55a884f80c5 100644 --- a/test/suite/test_cursor04.py +++ b/test/suite/test_cursor04.py @@ -128,14 +128,10 @@ class test_cursor04(wttest.WiredTigerTestCase): cursor.insert() # 1. Calling search for a value that exists - cursor.set_key(self.genkey(5)) - self.assertEqual(cursor.search(), 0) - self.assertEqual(cursor.get_key(), self.genkey(5)) - self.assertEqual(cursor.get_value(), self.genvalue(5)) + self.assertEqual(cursor[self.genkey(5)], self.genvalue(5)) # 2. Calling search for a value that does not exist - cursor.set_key(self.genkey(self.nentries)) - self.assertEqual(cursor.search(), wiredtiger.WT_NOTFOUND) + self.assertRaises(KeyError, lambda: cursor[self.genkey(self.nentries)]) # 2. Calling search_near for a value beyond the end cursor.set_key(self.genkey(self.nentries)) diff --git a/test/suite/test_cursor05.py b/test/suite/test_cursor05.py index c349edce7f0..23b39e445ed 100644 --- a/test/suite/test_cursor05.py +++ b/test/suite/test_cursor05.py @@ -109,9 +109,7 @@ class test_cursor05(wttest.WiredTigerTestCase): # Do something that leaves the cursor in an uninitialized spot if expectcount > 0: n = expectcount - 1 - cursor.set_key(n, 'key' + str(n)) - cursor.search() - (s1, i2, s3, i4) = cursor.get_values() + s1, i2, s3, i4 = cursor[(n, 'key' + str(n))] self.assertEqual(s1, 'val' + str(n)) self.assertEqual(i2, n) self.assertEqual(s3, 'val' + str(n)) diff --git a/test/suite/test_stat01.py b/test/suite/test_stat01.py index 736917ea9cd..9ad3c3a133c 100644 --- a/test/suite/test_stat01.py +++ b/test/suite/test_stat01.py @@ -25,7 +25,8 @@ # ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR # OTHER DEALINGS IN THE SOFTWARE. -import wiredtiger, wttest +import helper, wiredtiger, wttest +from wiredtiger import stat # test_stat01.py # Statistics operations @@ -35,6 +36,8 @@ class test_stat01(wttest.WiredTigerTestCase): """ tablename = 'test_stat01.wt' + uri = 'file:' + tablename + config = 'key_format=S,allocation_size=512,internal_page_max=16K,leaf_page_max=128K' nentries = 25 def statstr_to_int(self, str): @@ -69,50 +72,61 @@ class test_stat01(wttest.WiredTigerTestCase): self.assertTrue(count > mincount) self.assertTrue(found, 'in stats, did not see: ' + lookfor) - def test_statistics(self): - extra_params = ',allocation_size=512,internal_page_max=16384,leaf_page_max=131072' - self.session.create('table:' + self.tablename, 'key_format=S,value_format=S' + extra_params) - cursor = self.session.open_cursor('table:' + self.tablename, None, None) - value = "" - for i in range(0, self.nentries): - key = str(i) - value = value + key + value # size grows exponentially - cursor.set_key(key) - cursor.set_value(value) - cursor.insert() - cursor.close() - + def test_basic_conn_stats(self): self.printVerbose(2, 'overall database stats:') allstat_cursor = self.session.open_cursor('statistics:', None, None) self.check_stats(allstat_cursor, 10, 'blocks written to a file') # See that we can get a specific stat value by its key, # and verify that its entry is self-consistent - allstat_cursor.set_key(wiredtiger.stat.block_write) - self.assertEqual(allstat_cursor.search(), 0) - values = allstat_cursor.get_values() + values = allstat_cursor[stat.conn.block_write] self.assertEqual(values[0], 'blocks written to a file') val = self.statstr_to_int(values[1]) self.assertEqual(val, values[2]) allstat_cursor.close() + def test_basic_file_stats(self): + self.session.create(self.uri, self.config) + cursor = self.session.open_cursor(self.uri, None, None) + value = "" + for i in range(0, self.nentries): + key = str(i) + value = value + key + value # size grows exponentially + cursor.set_key(key) + cursor.set_value(value) + cursor.insert() + cursor.close() + self.printVerbose(2, 'file specific stats:') - filestat_cursor = self.session.open_cursor('statistics:file:' + self.tablename + ".wt", None, None) + filestat_cursor = self.session.open_cursor('statistics:' + self.uri, None, None) self.check_stats(filestat_cursor, 10, 'overflow pages') # See that we can get a specific stat value by its key, # and verify that its entry is self-consistent - filestat_cursor.set_key(wiredtiger.filestat.overflow) - self.assertEqual(filestat_cursor.search(), 0) - values = filestat_cursor.get_values() + values = filestat_cursor[stat.dsrc.overflow_page] self.assertEqual(values[0], 'overflow pages') val = self.statstr_to_int(values[1]) self.assertEqual(val, values[2]) filestat_cursor.close() - self.assertRaises(wiredtiger.WiredTigerError, - lambda: self.session.open_cursor( - 'statistics:file:DoesNotExist', None, None)) + def test_missing_file_stats(self): + self.assertRaises(wiredtiger.WiredTigerError, lambda: + self.session.open_cursor('statistics:file:DoesNotExist')) + + def test_checkpoint_stats(self): + nentries = 0 + last_size = 0 + for name in ('first', 'second', 'third'): + helper.simple_populate(self, self.uri, self.config, nentries) + nentries += self.nentries + self.session.checkpoint('name=' + name) + cursor = self.session.open_cursor( + 'statistics:' + self.uri, None, 'checkpoint=' + name) + size = cursor[stat.dsrc.overflow_page][1] + self.assertTrue(size >= last_size) + last_size = size + cursor.close() + self.session.truncate(self.uri, None, None) if __name__ == '__main__': wttest.run() |