From d845b75e5f0837f801bdf371babd985308a1ad80 Mon Sep 17 00:00:00 2001 From: Ramon Fernandez Date: Thu, 7 Jan 2016 16:31:22 -0500 Subject: Import wiredtiger-wiredtiger-2.7.0-269-g44463c5.tar.gz from wiredtiger branch mongodb-3.4 ref: 3c2ad56..44463c5 SERVER-21833 Compact does not release space to the system with WiredTiger WT-2060 Simplify aggregation of statistics WT-2099 Seeing memory underflow messages WT-2113 truncate01 sometimes fails WT-2177 Add a per-thread seed to random number generator WT-2198 bulk load and column store appends WT-2231 pinned page cursor searches could check parent keys WT-2235 wt printlog option without unicode WT-2245 WTPERF Truncate has no ability to catch up when it falls behind WT-2246 column-store append searches the leaf page; the maximum record number fails CRUD operations WT-2256 WTPERFs throttle option fires in bursts WT-2257 wtperf doesn't handle overriding workload config WT-2259 __wt_evict_file_exclusive_on() should clear WT_BTREE_NO_EVICTION on error WT-2260 Workloads evict internal pages unexpectedly WT-2262 Random sampling is skewed by tree shape WT-2265 Wiredtiger related change in ppc64le specific code block in gcc.h WT-2266 Add wtperf config to set if perf thresholds are fatal WT-2269 wtperf should dump its config everytime it runs WT-2272 Stress test assertion in the sweep server WT-2275 broken DB after application crash WT-2276 tool to decode checkpoint addr WT-2277 Remove WT check against big-endian systems WT-2279 Define WT_PAUSE(), WT_FULL_BARRIER(), etc when s390x is defined WT-2281 wtperf smoke.sh fails on ppc64le WT-2282 error in wt_txn_update_oldest verbose message test WT-2283 retry in txn_update_oldest results in a hang WT-2285 configure should set BUFFER_ALIGNMENT_DEFAULT to 4kb on linux WT-2289 failure in fast key check WT-2290 WT_SESSION.compact could be more effective. WT-2291 Random cursor walk inefficient in skip list only trees WT-2297 Fix off-by-one error in Huffman config file parsing WT-2299 upper-level WiredTiger code is reaching into the block manager WT-2301 Add reading a range to wtperf WT-2303 Build warning in wtperf WT-2304 wtperf crash dumping config WT-2307 Internal page splits can corrupt cursor iteration WT-2311 Support Sparc --- src/third_party/wiredtiger/NEWS | 231 +- src/third_party/wiredtiger/NEWS.MONGODB | 2523 -------------------- src/third_party/wiredtiger/README | 6 +- src/third_party/wiredtiger/RELEASE_INFO | 2 +- src/third_party/wiredtiger/bench/wtperf/config.c | 142 +- .../bench/wtperf/runners/btree-split-stress.wtperf | 10 + .../bench/wtperf/runners/mongodb-oplog.wtperf | 2 +- .../wiredtiger/bench/wtperf/runners/wtperf_run.sh | 15 +- src/third_party/wiredtiger/bench/wtperf/wtperf.c | 136 +- src/third_party/wiredtiger/bench/wtperf/wtperf.h | 19 + .../wiredtiger/bench/wtperf/wtperf_opt.i | 17 +- .../wiredtiger/bench/wtperf/wtperf_truncate.c | 35 +- .../wiredtiger/build_posix/aclocal/version-set.m4 | 4 +- .../wiredtiger/build_posix/aclocal/version.m4 | 2 +- .../wiredtiger/build_posix/configure.ac.in | 2 +- src/third_party/wiredtiger/build_win/filelist.win | 1 + src/third_party/wiredtiger/dist/filelist | 1 + src/third_party/wiredtiger/dist/log.py | 97 +- src/third_party/wiredtiger/dist/s_copyright | 14 + src/third_party/wiredtiger/dist/s_docs | 3 +- src/third_party/wiredtiger/dist/s_funcs | 2 +- src/third_party/wiredtiger/dist/s_longlines | 7 +- src/third_party/wiredtiger/dist/s_string | 3 +- src/third_party/wiredtiger/dist/s_string.ok | 20 + src/third_party/wiredtiger/dist/s_whitespace | 2 +- src/third_party/wiredtiger/dist/s_win | 1 + src/third_party/wiredtiger/dist/stat.py | 16 +- src/third_party/wiredtiger/dist/stat_data.py | 273 +-- src/third_party/wiredtiger/src/block/block_addr.c | 51 +- .../wiredtiger/src/block/block_compact.c | 92 +- src/third_party/wiredtiger/src/block/block_mgr.c | 16 + src/third_party/wiredtiger/src/block/block_open.c | 38 +- src/third_party/wiredtiger/src/btree/bt_compact.c | 42 +- src/third_party/wiredtiger/src/btree/bt_curnext.c | 111 + src/third_party/wiredtiger/src/btree/bt_curprev.c | 4 + src/third_party/wiredtiger/src/btree/bt_cursor.c | 40 +- src/third_party/wiredtiger/src/btree/bt_debug.c | 4 +- src/third_party/wiredtiger/src/btree/bt_huffman.c | 10 +- src/third_party/wiredtiger/src/btree/bt_page.c | 8 +- src/third_party/wiredtiger/src/btree/bt_slvg.c | 2 +- src/third_party/wiredtiger/src/btree/bt_split.c | 133 +- src/third_party/wiredtiger/src/btree/bt_stat.c | 4 +- src/third_party/wiredtiger/src/btree/col_srch.c | 117 +- src/third_party/wiredtiger/src/btree/row_srch.c | 221 +- src/third_party/wiredtiger/src/cache/cache_las.c | 9 +- src/third_party/wiredtiger/src/conn/conn_api.c | 3 + src/third_party/wiredtiger/src/conn/conn_dhandle.c | 4 +- src/third_party/wiredtiger/src/cursor/cur_bulk.c | 179 +- src/third_party/wiredtiger/src/cursor/cur_json.c | 13 +- src/third_party/wiredtiger/src/cursor/cur_stat.c | 7 +- src/third_party/wiredtiger/src/cursor/cur_table.c | 7 +- src/third_party/wiredtiger/src/evict/evict_lru.c | 22 +- src/third_party/wiredtiger/src/include/block.h | 7 +- src/third_party/wiredtiger/src/include/btmem.h | 4 +- src/third_party/wiredtiger/src/include/column.i | 22 +- .../wiredtiger/src/include/connection.h | 1 + src/third_party/wiredtiger/src/include/cursor.h | 38 +- src/third_party/wiredtiger/src/include/extern.h | 33 +- src/third_party/wiredtiger/src/include/gcc.h | 52 +- src/third_party/wiredtiger/src/include/log.h | 5 + src/third_party/wiredtiger/src/include/misc.h | 3 + src/third_party/wiredtiger/src/include/session.h | 7 +- src/third_party/wiredtiger/src/include/stat.h | 4 +- src/third_party/wiredtiger/src/log/log_auto.c | 96 +- src/third_party/wiredtiger/src/lsm/lsm_stat.c | 31 +- src/third_party/wiredtiger/src/meta/meta_turtle.c | 3 +- src/third_party/wiredtiger/src/os_posix/os_map.c | 12 +- .../wiredtiger/src/os_posix/os_pagesize.c | 19 + .../wiredtiger/src/os_win/os_pagesize.c | 23 + .../wiredtiger/src/reconcile/rec_write.c | 118 +- .../wiredtiger/src/session/session_api.c | 2 +- .../wiredtiger/src/session/session_compact.c | 8 +- src/third_party/wiredtiger/src/support/global.c | 29 +- src/third_party/wiredtiger/src/support/hash_city.c | 6 + src/third_party/wiredtiger/src/support/hex.c | 21 +- src/third_party/wiredtiger/src/support/huffman.c | 26 +- src/third_party/wiredtiger/src/support/rand.c | 23 + src/third_party/wiredtiger/src/support/stat.c | 36 +- src/third_party/wiredtiger/src/txn/txn.c | 82 +- src/third_party/wiredtiger/src/txn/txn_log.c | 25 +- .../wiredtiger/src/utilities/util_list.c | 73 +- .../wiredtiger/src/utilities/util_main.c | 1 - .../wiredtiger/src/utilities/util_printlog.c | 15 +- src/third_party/wiredtiger/tools/wt_ckpt_decode.py | 103 + .../wiredtiger/tools/wtstats/wtstats.html.template | 32 +- 85 files changed, 2364 insertions(+), 3319 deletions(-) delete mode 100644 src/third_party/wiredtiger/NEWS.MONGODB create mode 100644 src/third_party/wiredtiger/bench/wtperf/runners/btree-split-stress.wtperf create mode 100644 src/third_party/wiredtiger/src/os_posix/os_pagesize.c create mode 100644 src/third_party/wiredtiger/src/os_win/os_pagesize.c create mode 100644 src/third_party/wiredtiger/tools/wt_ckpt_decode.py diff --git a/src/third_party/wiredtiger/NEWS b/src/third_party/wiredtiger/NEWS index 1b288c74b18..546d08b2418 100644 --- a/src/third_party/wiredtiger/NEWS +++ b/src/third_party/wiredtiger/NEWS @@ -1,3 +1,228 @@ +WiredTiger release 2.7.0, 2015-12-08 +------------------------------------ + +The WiredTiger 2.7.0 release contains new features, minor API changes and bug +fixes. + +New features and API changes; refer to the API documentation for full details: + +* 959376c WT-147: Create indexes on non-empty tables. +* 4368d39 WT-1315: Add an implementation of cursor joins via a new WT_SESSION::join API. +* 944ccd1 WT-1350: Add a new configuration option to ::wiredtiger_open and + WT_CONNECTION::reconfigure called "eviction_dirty_trigger" that causes eviction to start evicting + dirty pages from cache once the given threshold has been reached. +* ab5a8fb WT-1728: Add a WT_SESSION::reset method to release resources held by a session. +* 263c5b7 WT-1930: Allow setting "file_manager=(close_idle_time=0)" to ::wiredtiger_open and + WT_CONNECTION::reconfigure to disable closing idle handles. +* 6310c3f WT-1959: Change verify to distinguish between warnings and errors. Add a new strict mode + to verify that causes warnings to be reported as errors. Use strict mode to match earlier + behavior. See the upgrading documentation for more information. +* e0d6229 WT-1980: Add a new "metadata:create" URI to WT_SESSION::open_cursor for metadata cursors + that return strings useful for passing to WT_SESSION::create. +* 292712e WT-2065: Add a new configuration option to ::wiredtiger_open and + WT_CONNECTION::reconfigure called "shared_cache=(quota)" that limits the amount of shared cache a + participant can be assigned. +* 4d0ebf4 WT-2104: Add a method to flush log files via a new WT_SESSION::log_flush API. Made + WT_SESSION::commit_transaction configuration options match WT_SESSION::log_flush. Change the + default WT_SESSION::transaction_sync timeout to 20 minutes rather than infinity. +* 21b8330 WT-2151: Enhance logging configuration to allow reconfiguration and add a new + "log=(zero_fill)" configuration option that causes WiredTiger to zero-fill log files on creation. +* 368b307 WT-2200: Add a new configuration option to ::wiredtiger_open called "write_through" that + causes WiredTiger to specify the FILE_FLAG_WRITE_THROUGH on Windows when writing files (default + false, including when "direct_io" is configured). +* 08c0fcd WT-2217: After a successful call to WT_CURSOR::insert, the key and value will be + cleared from the cursor. See the upgrading documentation for more information. +* d4fc69a SERVER-17078: Add a "statistics=(size)" mode to statistics cursors, which allows for + retrieving file size only. +* b83b901 SERVER-18356: Changed the handling of the "config_base" option to ::wiredtiger_open. See + upgrading documentation for more information. + + +The following statistics were removed: + +* f1ed3b9 WT-1481: connection dhandles swept. +* f1ed3b9 WT-1481: connection candidate referenced. +* 4ba4518 WT-1481: failed to find a slot large enough for record. +* 28563af WT-1989: log buffer size increases. +* f81c70d WT-1989: slots selected for switching that were unavailable. +* df4f69c WT-2094: log records written directly. +* df4f69c WT-2094: record size exceeded maximum. +* d68e078 WT-2182: pages split during eviction. + +Lookaside table: + +* 6a5a461 WT-1967: Allow eviction of updates required by old readers. +* 87592ec WT-2074: Fix a race between lookaside table reconciliation and checkpoints. +* 0390b29 WT-2149: Fix the order of creation of the lookaside table. +* 7518a69 WT-2190: Fix transaction visibility test that is applied to the lookaside table. +* 2cf57a6 SERVER-21585: Don't use the lookaside file until the cache is stuck full. + +Issues fixed in MongoDB: + +* d57dc26 SERVER-18829: Have pages start in the middle of the LRU queue for eviction. +* b847ccc SERVER-18838: During drops, don't remove files until the metadata is durable. +* 8f7da9a SERVER-18875: Clean up deleted pages. +* d04083d SERVER-18899: Add unit test to simulate fsyncLock. +* 3ec45a7 SERVER-19340: Avoid type aliasing in the random number generator. +* 907c0ca SERVER-19445: Have the oldest transaction update the oldest tracked ID. +* fb8739f SERVER-19522: Try to evict internal pages with no useful child pages. +* 4545a8b SERVER-19573: Change row-store inserts to avoid page locking. +* b52d2d3 SERVER-19751: Retry pthread_create on EAGAIN or EINTR. +* 46b4ad5 SERVER-19954: Don't scan tracked handles during checkpoints. +* 65abd20 SERVER-19989: Add a write barrier before data handles are added to shared lists. +* 3e46e79 SERVER-19990: Don't assert on eviction of live updates from dead trees. +* 38dad39 SERVER-20008: Don't reset eviction walks when hitting a busy page. +* 3b72361 SERVER-20159: Make all readers wait while the cache is full. +* 8be547b SERVER-20193: Fix obsolete transaction check. +* ad56c6a SERVER-20303: Tune in-memory splits when inserting large objects. +* 7505a02 SERVER-20385: Make WT_CURSOR::next(random) more random. +* 35d46c3 SERVER-21027: Reverse split if there are many deleted pages. +* a6da10e SERVER-21553: Enable fast-path truncate after splits. +* 890ee34 SERVER-21619: Don't do internal page splits after a tree is marked DEAD. +* 0e93d60 SERVER-21691: Avoid insert stalls. + +Other note worthy changes since the previous release: + +* bc2aa57 WT-1744: Throttle worker threads based on eviction targets. +* 55a989e WT-1845: Allow read only transactions to commit after failure. +* df625dc WT-1869: Avoid doing in memory splits while checkpointing a tree. +* ddac54f WT-1942: Add atomic implementations for PPC64 architecture. +* 3866fa6 WT-1962: Make the hot_backup_lock a read/write lock. +* 58f9e99 WT-1963: Fix backup cursor Java API. +* 4e0fe59 WT-1964: Fix a bug in the Java API when closing handles from a different thread. +* 60e2150 WT-1966: Change how the shared cache assigns priority to participants. +* 76d2e73 WT-1975: Ensure previous log files are complete for forced sync. +* e43b22a WT-1977: Improve performance of getting snapshots with many sessions. +* 5eaf63e WT-1978: Better checking and tests for index cursor comparison. +* 1602a4b WT-1981: Fix a signed 32-bit integer unpacking bug. +* cd1704d WT-1982: Fix a bug where cached overflow items were freed too early. +* 57a9f38 WT-1985: Integer packing and other fixes for Python and Java. +* 9897eb2 WT-1986: Fix a race renaming temporary log files. +* b10bff9 WT-1989: Improve scalability of log writes. +* f8dc12b WT-1996: Fix a bug where we would free the fist update during a page rewrite on error. +* 144a383 WT-1998: Fixes for indexes with some rarely used key/value formats. +* 8af8b8a WT-2002: Fix a bug in verify where it would panic when encountering a corrupted file. +* e1d8bc7 WT-2007: Statically allocate log slot buffers to a maximum size. +* 911158c WT-2008: Fix a bug in recovery where a file create went missing. +* 3e2e7e6 WT-2009: Apply tracked metadata operations post-commit. +* 1255cb2 WT-2012: Fix a bug updating the oldest ID. +* ef9d56f WT-2013: Add gcc asm definitions for ARM64. +* c8633e6 WT-2014: Fix a bug in checkpoints where files could be flushed in the wrong order. +* 9b09e69 WT-2015: Fix a bug in error handling during block open. +* 4938b8d WT-2017: Once an eviction server thread is started keep it running. +* 298f86c WT-2019: Fix a logic bug tracking the maximum transaction ID in clean trees. +* 7d6075c WT-2020: Clarify checksum error failure messages. +* 7b302d3 WT-2021: Fix a bug moving the oldest ID forward (introduced by WT-1967). +* 9df72d7 WT-2022: Fix a bug not releasing a handle when opening a non-existent index cursor. +* 81ffc2d WT-2023: Improve locking primitives: simplify read-write lock operations. +* 6b84722 WT-2029: Improve scalability of statistics. +* f97cfe9 WT-2031: Log slot revamp. +* bee11c3 WT-2032: Improve next_random cursors to work with small trees. +* cf53696 WT-2034: Improve shared cache balancing algorithm. +* aee1c94 WT-2035: For index cursors, keep track of which column groups need to be positioned. +* 36310d4 WT-2036: Make handle sweeps more robust. +* c948fbb WT-2037: Only write a checkpoint to the log on close if it wasn't. +* e25e615 WT-2038: Avoid long scans holding the handle list lock. +* 75a4655 WT-2039: Add error check and unit test for log records over 4 GB. +* 5ab26af WT-2042: Only try to evict tombstones that are visible to all readers. +* ce223ac WT-2045: Don't let the eviction server do slow reconciliation, it can stall eviction. +* 6665618 WT-2046: Add a statistic for search restarts. +* 98b4a28 WT-2047: Fix a bug in the random generator code to handle an uninitialized state. +* 258e2e1 WT-2050: Show size with memory allocation errors. +* 2e1471c WT-2053: Fix a bug in disk verify messages. +* e316e61 WT-2056: Reorder btree cursor close so stats are maintained correctly. +* 70f9100 WT-2057: Remove the verbose configuration when writing the base configuration file. +* 41b6fb8 WT-2058: Fix an alignment bug in the mutex and log-slot code. +* d72012b WT-2059: Include non-aggregated stats in cursor results. +* 3e0c7bf WT-2062: Try harder to make progress on in-memory splits. +* 66757f7 WT-2064: Don't spin indefinitely waiting for the handle list lock in eviction. +* 8f42f02 WT-2066: Update the oldest transaction ID from eviction. +* e167592 WT-2068: Protect discarding handles with the handle list lock. +* fd72a09 WT-2075: Fix a hang in logging with parallel workload. +* 11c0fa0 WT-2078: Fix a bug in error handling with statistics cursors. +* 9734d85 WT-2081: Make verify progress reporting less verbose. +* 6008b41 WT-2085: Run some of the log_server threads operations more frequently. +* 39a69ec WT-2086: Add a statistic to track when eviction finds a page that can be split. +* 334e103 WT-2089: Relax restrictions on multiblock eviction and in-memory splits. +* f13b788 WT-2090: Fix a bug in the Windows OS layer that swallowed error returns. +* 83b8db7 WT-2092: Free log condition variables after all threads are joined. +* d9391c0 WT-2093: Use the C99 bool type to clarify when functions return true/false. +* f883d27 WT-2094: Eliminate direct write and record unbuffered log records. +* 9008260 WT-2097: Reintroduce immediate waits when forced eviction is necessary. +* ff1da28 WT-2100: Rename evict to evict_queue so it's easier to search for. +* 41db2ee WT-2101: Don't update the logging ckpt_lsn on clean shutdown. +* e1d6886 WT-2102: Fix a hang in log slot join when forcing log writes. +* 0e96683 WT-2105: Fix a bug where we could reference an invalid memory address if a file is + corrupted on disk. +* 6a565bc WT-2108: Rework in-memory page rewrite support (WT_PM_REC_REWRITE). +* dcb0ddb WT-2114: Make application eviction fairer. +* 10c2f15 WT-2115: Don't skip truncated pages that are part of a checkpoint. +* cd6ce97 WT-2116: Add diagnostic checks for stuck cache and dump the state. +* 51cf672 WT-2119: Don't evict clean multiblock pages with overflow items during checkpoints. +* 346ad40 WT-2126: Clean up if there is an error during splits. +* 6831485 WT-2127: Deepen the tree more regularly to avoid wide internal pages. +* a0b5d2b WT-2128: When decoding huffman encoding during salvage it's possible to have fewer bits + than the symbol length during decoding, if the value has been corrupted. +* 79f74e5 WT-2131: Switch to using a lock to control page splits to avoid starvation. +* 02a3d9f WT-2132: Make debug dump function more robust to errors. +* 8c223e4 WT-2134: Flush all buffered log records in log_flush. +* d1b5e7f WT-2135: Fix log_only setting for backup cursor. Fix initialization. +* aab8101 WT-2137: Check the sync_lsn is in the correct file before moving it forward. +* 323af84 WT-2139: Fix a transaction visibility bug in read-uncommitted transactions. +* 751c628 WT-2146: Improve performance when searching for short keys. +* 62998ce WT-2148: Fix a compiler warning in encoding functions. +* 6c16fdd WT-2153: Fix bug. Now we always need to start the log_server thread. +* 6a5fca3 WT-2154: Make btree dump safer. +* 0d74bc6 WT-2155: Remove last use of F_CAS_ATOMIC and the associated macro. +* cc42bda WT-2156: Allow eviction workers to restart. +* bf1d359 WT-2157: Fix a bug where a failed page split could lead to incomplete checkpoints. +* ce9d265 WT-2159: Don't check the config twice in one path. +* 544f27d WT-2162: Add null pointer check, needed after an index is dropped. +* 0d85ebe WT-2164: Prevent another LSM chunk checkpoint while the first is still in progress. +* a81aae8 WT-2165: Stop using FALLOC_FL_KEEP_SIZE flag when pre-allocating files. +* 2865a76 WT-2167: Switch recovery to using an internal session. +* 5d4c952 WT-2170: Protect the turtle file with a lock. +* 497b744 WT-2174: Avoid the table list lock when creating a size only statistics cursor. +* fdfa804 WT-2178: In-memory storage engine support. +* b9bd01f WT-2179: Added decorator to mark txn13 as part of the --long test suite. +* be544dd WT-2180: Remove cursor.{search,search-near,remove} key size validation. +* be412b5 WT-2182: When internal pages grow large enough, split them into their parents. +* c27e78e WT-2184: Fix log scan bug when final record has many trailing zeros. +* 9584be3 WT-2185: Don't do reverse splits when closing a file. +* f6b12d3 WT-2187: Add flag for flushing a slot. +* a4545bf WT-2189: Update flag set and clear macros to be less error prone. +* 30ab327 WT-2191: In-memory disk image no longer the same as saved updates. +* 4ba5698 WT-2192: Fix the logic around checking whether internal page is evictable. +* 2f0b3e2 WT-2193: Handle read-committed metadata checkpoints during snapshot transactions. +* 9b1febc WT-2194: Java close callbacks should handle cursors that Java code did not open. +* 438f455 WT-2195: Fix a hang after giving up on a reverse split. +* ff27fe9 WT-2196: Fix error handling in size only statistics. +* 0a1ee34 WT-2199: Fix transaction sync inconsistency. +* 2ff1fd6 WT-2203: Release an allocated page on error. +* 3b3cf2a WT-2204: Don't take a local copy of page->modify until we know the page is dirty. +* 179d4d0 WT-2206: Change cache operations from flags to an enumeration. +* 82514ca WT-2207: Track whenever a session has a handle exclusive. +* 78bd4ac WT-2210: Raw compression fails if row-store recovery precedes column-store recovery. +* c360d53 WT-2212: Add a "use_environment" config to ::wiredtiger_open. +* a72ddb7 WT-2218: Add truncate stats. +* ce8c091 WT-2219: Enhancements to in-memory testing. +* e2f1130 WT-2220: Update time comparison macros. +* 59857f9 WT-2222: Add statistics for named snapshots. +* fb9cebe WT-2224: Track which deleted refs are discarded by a split. +* cace179 WT-2228: Avoid unnecessary raw-compression calls. +* 0a52a80 WT-2237: Have threads publish unique transaction IDs so that updates always become + visible immediately on commit. +* 6c7338f WT-2241: Use a lock to protect transaction ID allocation. +* 39dfd21 WT-2243: Don't keep transaction IDs pinned for reading from checkpoints. +* 4c49948 WT-2244: Trigger in-memory splits sooner. +* 9f2e4f3 WT-2248: WT_SESSION::close is updating WT_CONNECTION_IMPL.default_session. +* 264ec21 WT-2249: Keep eviction stuck until cache usage is under 100%. +* dca1411 WT-2250: Minor fix. Use SET instead of increment for stat. +* e731ef8 WT-2251: Free addresses when we discard deleted page references. +* 4fc3e39 WT-2253: Evict pages left behind by in-memory splits. +* 2df5658 WT-2257: Fixes when given multiple thread workload configurations. +* 4c49043 WT-2260: Avoid adding internal pages to the eviction queue + WiredTiger release 2.6.1, 2015-05-13 ------------------------------------ @@ -255,7 +480,7 @@ API and behavior changes: * Update configuration string parsing to always be case sensitive. See upgrading documentation for more information. -* Change the statistics cursor WT_CURSOR.reset method to re-load statistics +* Change the statistics cursor WT_CURSOR::reset method to re-load statistics values. See upgrading documentation for more information. refs WT-1533 @@ -465,7 +690,7 @@ New features and API changes: See API documentation for more information. refs #1381 -* Add a new WT_SESSION.strerror method, a thread-safe alternative to +* Add a new WT_SESSION::strerror method, a thread-safe alternative to ::wiredtiger_strerror. refs #1516 @@ -1271,7 +1496,7 @@ This is primarily a bugfix and performance tuning release. The main changes are: * The default behavior of the wt utility's load command has been changed to overwrite existing data. -* Add a WT_SESSION.create prefix_compression_min configuration option with a +* Add a WT_SESSION::create prefix_compression_min configuration option with a default value of 4. [#624] and [#624] * Fix "make install" of Python API. [#598] diff --git a/src/third_party/wiredtiger/NEWS.MONGODB b/src/third_party/wiredtiger/NEWS.MONGODB deleted file mode 100644 index 831237ba149..00000000000 --- a/src/third_party/wiredtiger/NEWS.MONGODB +++ /dev/null @@ -1,2523 +0,0 @@ -3.2.0, Date TBA ------------------- - -commit 9166bca3d07d6592c1426c2c33bd56b6be0667e0 -Author: Alex Gorrod -Date: Thu Apr 23 05:43:35 2015 +0000 - - Fix a deadlock related to handle locking. - - If one thread does a __wt_session_release_btree of a handle at the same time - as another thread does a __wt_session_get_btree both wanting exclusive access - to the file. It was possible for one thread to get the dhandle list lock and - wait on the handle lock, which another thread held the handle lock waiting for - the handle list lock. - - Temporarily fix by doing a try-lock on the __wt_session_get_btree path, long term - the solution is to get rid of the __conn_dhandle_open_lock method, and split get - and lock into two passes. - - Refs BF-716 - -commit 3e254079484ce35a3cb70c48478c69defdb8f012 -Author: Alex Gorrod -Date: Thu Apr 23 05:42:08 2015 +0000 - - Fix a deadlock related to LSM. There are cases where closing a file with - an existing checkpoint could self deadlock. - - Check in the meta tracking whether we've already visited a checkpoint handle. - - Refs WT-716 - -commit 1e80654b284b47b2dd9c302395ba908bf3a9b898 -Author: Keith Bostic -Date: Wed Apr 22 13:54:03 2015 -0400 - - __wt_config_concat() is a special case of __wt_config_merge(), replace - the three calls to __wt_config_concat() with __wt_config_merge(). - -commit 1c412df22489dc3c18aa5390164ff68474293daf -Author: Keith Bostic -Date: Wed Apr 22 13:29:05 2015 -0400 - - Instead of doing a configuration line merge to remove configuration - values we don't included, followed by a call to concatenate additional - configuration values, do a single merge call including all of the - configuration values we care about. - - This change is slightly more defensive -- if a user tries to change one - of the configuration values we don't allow, we'll strip it, rather than - failing the load. - - Clean up memory handling a bit so we don't leak memory, someday Coverity - will thank me. - -commit 6093e42b21d7e947d7bd053f6691aa1fed1a7f99 -Author: Keith Bostic -Date: Wed Apr 22 11:33:21 2015 -0400 - - Instead of rolling our own configuration removal code, use - __wt_config_merge(), reference WT-1898. - -commit 4322191125284717af1a0c6734b6ea123ca6c50d -Author: Susan LoVerso -Date: Tue Apr 21 15:45:30 2015 -0400 - - Run recovery earlier. WT-1897 - -commit 6155c465a519612e16cec5feb9fcf79fd0121d7f -Author: Alex Gorrod -Date: Tue Apr 21 13:15:06 2015 +1000 - - Revert a change to split large in-memory pages. - - Refs WT-1890 WT-1896 - -commit 610f629949726b16f938ded85188bb6a21820f7e -Author: Keith Bostic -Date: Mon Apr 20 10:40:54 2015 -0400 - - Create a "clear a single walk" function and call it from __evict_clear_walks - and __evict_clear_all_walks, that way we can use the WT_WITH_DHANDLE macro, - and the comment about clearing the eviction reference before releasing the - page appears everywhere it should. - -commit 6ea949933ff9e68d220738d18fa72eb7a91aab65 -Author: Alex Gorrod -Date: Mon Apr 20 16:50:13 2015 +1000 - - Fix coverity reports in test/format code. - - CID 1295092: Integer handling issues (OVERFLOW_BEFORE_WIDEN) - /test/format/ops.c: 93 - CID 1295091: Null pointer dereferences (REVERSE_INULL) - /test/format/ops.c: 489 - -commit 3eceb85ce623dcce9273f7b722bb64f509dbe24d -Author: Michael Cahill -Date: Mon Apr 20 16:15:41 2015 +1000 - - Clear session->dhandle so that future error messages don't dereference freed memory. - -commit 23ce8bae4d6507d6b6d599cb73a26a6c856cce98 -Author: Michael Cahill -Date: Mon Apr 20 15:48:29 2015 +1000 - - Clear eviction walks in all trees before the eviction server exits. - -commit a4bce0e0bc05d528f118b645d0d1915db00cdcf3 -Author: Michael Cahill -Date: Mon Apr 20 14:11:57 2015 +1000 - - Move the "cache is empty" check to where the cache is destoyed, not when the eviction server is exiting. - -commit c8fdd9c676c4a24bee6328a56cf7fd074cd045e0 -Author: Michael Cahill -Date: Mon Apr 20 13:25:36 2015 +1000 - - Shut down the eviction server before closing file handles to avoid a race. - - refs WT-1893 - -commit fb4a089fa71876232478e1181d821f29dedc0bd8 -Author: Keith Bostic -Date: Sun Apr 19 12:43:47 2015 -0400 - - Add the version to the configuration stack before reading the base - configuration file: that way it's always set, but it's always set - to the value of the base configuration file. - -commit 7aee6d5dabb7942aeb685e797b103c47c9337186 -Author: Keith Bostic -Date: Sun Apr 19 11:56:22 2015 -0400 - - Add support to the __wt_config_merge() call to remove strings we don't - want in the final configuration string, then strip out "create=" and - "encryption=(secretkey=)". - -commit 071d545f47ff4e4badcf9b8c066a44dac7fa2f20 -Author: Keith Bostic -Date: Fri Apr 17 11:39:23 2015 -0400 - - Instead of explicitly writing the version into the base configuration - file, append it to the configuration list. - -commit ab5443aa13d62c71423c128853735f699935c01e -Author: Keith Bostic -Date: Fri Apr 17 11:35:13 2015 -0400 - - There's no reason to loop through each application-specified configuration - string separately, there's a call to check them all for the same key. - - This also prevents a value being listed multiple times. Previously, if - you set buffer_alignment in the wiredtiger_open() configuration string, - but also in the WIREDTIGER_CONFIG environment variable, it would appear - twice in the base configuration file, with this change, it only appears - once. - -commit abb0bb80cc6dce29b8db61c6747c228c2701ae5a -Author: Susan LoVerso -Date: Fri Apr 17 10:49:41 2015 -0400 - - Look for any number of non-data-changing log records to determine if we - can skip recovery. WT-1892 - -commit e7f4ecd2055bab4d683eae119f8da95fa7acf21f -Author: Alex Gorrod -Date: Fri Apr 17 04:32:52 2015 +0000 - - Update API documentation to explain session usage. - - This allows users to account for internal WiredTiger session handle - usage. We already allocate additional handles for those we use - unconditionally. This allows users to do specific calculations based - on their session_max setting. - -commit ee02428d1fdf1118c482688ec870a9da69bee45a -Author: Michael Cahill -Date: Fri Apr 17 12:28:52 2015 +1000 - - If an LSM search-near operation lands on a deleted item, make a copy of the key before stepping to the next record. - - refs WT-1891 - -commit 54e856d57da291c5f84da6d0d0ab56280d9956dc -Author: Susan LoVerso -Date: Thu Apr 16 12:22:31 2015 -0400 - - Remove use of unneeded tmp_fh. WT-1872 - -commit 6a32905c397e57643b15e5a3038dbcb99a8a8dc8 -Author: Alex Gorrod -Date: Thu Apr 16 06:18:31 2015 +0000 - - Fix a deadlock in LSM with schema operations. - - There is special code in LSM to co-ordinate schema operations on - tables (drop, rename, etc). The code does a dance dropping and - acquiring locks, to allow utility operations to drain for the tree - while waiting for it to close. - - We were doing the dance with the schema and dhandle list locks. We - needed to include the table lock, or parallel cursor opens could block: - - The cursor open is waiting for the table lock: - __wt_spin_lock src/include/mutex.i:175 - __schema_add_table src/schema/schema_list.c:26 - __wt_schema_get_table src/schema/schema_list.c:98 - __wt_curtable_open src/third_party/wiredtiger/src/cursor/cur_table.c:875 - __wt_open_cursor src/session/session_api.c:240 - - The LSM table drop is waiting for the schema lock: - __wt_spin_lock src/include/mutex.i:175 - __lsm_tree_close src/lsm/lsm_tree.c:107 - __wt_lsm_tree_drop src/lsm/lsm_tree.c:943 - __wt_schema_drop src/schema/schema_drop.c:174 - __drop_table src/schema/schema_drop.c:124 - __wt_schema_drop src/schema/schema_drop.c:176 - __session_drop src/session/session_api.c:528 - -commit 790646183cc5dd056bbf95c4563c20c51602a808 -Author: Alex Gorrod -Date: Thu Apr 16 04:11:36 2015 +0000 - - Fix a bug in LSM where updates with overwrite could be skipped. - - References JIRA BF-829 - - The issue was that we were not looking in all chunks of an LSM - tree before deciding whether to apply an update (insert or remove). - -commit 72ccd267fea9e491fcf3506e85191f71471cf51a -Author: Keith Bostic -Date: Wed Apr 15 09:44:13 2015 -0400 - - A WT_CURSOR.next operation followed by a WT_CURSOR.search has the same - problem as referenced in #1887: the key returned to the application is - in WT_CURSOR_BTREE.tmp, and that WT_ITEM is used as temporary storage - during the search of a row-store leaf page, so the search can overwrite - the search key while it's still in use. - - Change WT_CURSOR.next to return the ey in WT_CURSOR_BTREE.search_key. - - Rename WT_CURSOR_BTREE.search_key to be WT_CURSOR_BTREE.row_key, it's - exclusive to row-store, and no longer exclusive to search. - -commit 50f8bedf616a4009068516df865374f688c76c70 -Author: Keith Bostic -Date: Tue Apr 14 12:52:32 2015 -0400 - - When row-search builds keys during leaf-page search, the last key built - is left in WT_CURSOR_BTREE.search_key, and if it's an exact match, that - buffer's contents are returned to the application (or, in the case of - cursor, the search key can be overwritten during the search, that is, - the searched-for-key, and the temporary buffer where we're building the - on-page keys for comparison, are the same. - - Use the WT_CURSOR_BTREE.tmp buffer during the row-search instead of - WT_CURSOR_BTREE.search_key, and set WT_CURSOR_BTREE.search_key to the - returned key only when we've found an exact match and are returning to - our caller. - - Making WT_CURSOR_BTREE.search_key and WT_CURSOR_BTREE.tmp pointers so - it's easy to swap back-and-forth makes this change noisy; note the new - __wt_btcur_open() function to set things up when a Btree cursor is first - created. - - Reference #1887. - -commit 4cf1871ca6770b035a3b30aca29fa89dc11bfc18 -Author: Alex Gorrod -Date: Tue Apr 14 06:30:41 2015 +0000 - - Make test/thread use WT_TEST not cwd, and add some more logging. - -commit b9e2e76511a24505014369aaf0e1ec286e9c473d -Author: Michael Cahill -Date: Tue Apr 14 16:30:31 2015 +1000 - - Merge bulk cursor close with regular file cursor close: we were missing a decrement that kept bulk-loaded files pinned. - -commit a9e6a51f4ace1da5c73dd54c108d9a764fc5d8a4 -Author: Michael Cahill -Date: Tue Apr 14 14:37:12 2015 +1000 - - Fix some 64-to-32-bit conversion warnings. - - include/cell.i|652 col 20| error: implicit conversion loses integer precision: 'uint64_t' (aka 'unsigned long long') to 'uint32_t' (aka 'unsigned int') - copy.v = unpack->v; - - include/cell.i|705 col 24| error: implicit conversion loses integer precision: 'uint64_t' (aka 'unsigned long long') to 'uint32_t' (aka 'unsigned int') - copy.v = unpack->v; - - reconcile/rec_write.c|2683 col 17| error: implicit conversion loses integer precision: 'size_t' (aka 'unsigned long') to 'uint32_t' (aka 'unsigned int') - tmp->size = ((u_int)(28 + (btree)->block_header)) + len; - -commit 4528ddaec1d4b3382055e5c1c53fb7b1772133a2 -Author: Don Anderson -Date: Mon Apr 13 15:05:16 2015 -0400 - - Generated tables for config subcategories now contains the name of the - method they are applicable for. Refs #1879. - -commit 1ac393ca9adce1f6d5e4bec035e7c49b32e5a722 -Author: Keith Bostic -Date: Mon Apr 13 13:00:57 2015 -0400 - - We can't pass the reconfiguration functions the base configuration array - because repeated calls will cause us to fallback to the default value. - - For example: - - conn->reconfigure(conn, "file_manager=(close_handle_minimum=37)"); - conn->reconfigure(conn, "file_manager=(close_handle_minimum=38)"); - conn->reconfigure(conn, "file_manager=(close_handle_minimum=39)"); - conn->reconfigure(conn, "eviction=(threads_max=10)"); - - The reconfigure call for eviction will reset close_handle_minimum back - to its default value, because it will find the base value, not the - reconfigured value. - - Try and make sure we don't mess this up again, use "cfg" instead of a - separately constructed local variable. - -commit ac37b924ac18f15726d3bd1984c61e89fbdd405e -Author: Keith Bostic -Date: Mon Apr 13 07:17:18 2015 -0400 - - Search the strings in reverse order, that way the first hit wins - and we don't search the base set until there's no other choice. - -commit 110164271dc688c90a092c0c95d37feed2cec188 -Author: Alex Gorrod -Date: Mon Apr 13 05:48:19 2015 +0000 - - Add functionality to drop files at the end of a wtperf run. - - Helps figure out how long a drop takes. - -commit ed3158e71f0bd2716269a5771fd162b60b9a1cc0 -Author: daveh86 -Date: Mon Apr 13 12:59:29 2015 +1000 - - Allow forced eviction of pages already queued for eviction - -commit 9c83351f63afc2e032e492e3030df4f3b1cd6883 -Author: Mark Benvenuto -Date: Sun Apr 12 19:02:32 2015 -0400 - - Disable fallocate on Windows since SetEndofFile does not - ignore truncation requests like POSIX fallocate. - -commit 61a7d81ad26db1f2bfb65258d9b8ae4a4ca25b34 -Author: Keith Bostic -Date: Sun Apr 12 12:44:05 2015 -0400 - - When using ftruncate as the file-extension call, we must use WT_FH.size - as the starting point of the extension (not offset), and we have to read - the size value after acquiring the lock that prevents racing with writers. - - Split the extension functionality out into a separate function and - try to make it a little simpler to understand. - - Reference #1871. - -commit f26f1c1c59d5cbbc8f5f543215d8fc636e7175d2 -Author: Keith Bostic -Date: Sun Apr 12 10:16:34 2015 -0400 - - The OS layer ftruncate() code sets the WT_FH file size, so when - ftruncate is used to extend the file, we skip over the bytes added to - the file during block allocation, and verify eventually fails because - there are unverified blocks in the file. - - Reference #1871. - -commit c27c201de9c766aea5249d3eeb85b8103ea6cefc -Author: Keith Bostic -Date: Sat Apr 11 09:16:11 2015 -0400 - - Possible approach to avoiding lockout when when sweeping files: before - closing the file, flush the file from the cache. - - Add a new cache-operation, WT_SYNC_CLOSE_SWEEP, that walks the cache for - a handle and discards any clean pages it finds. - - Add call to __wt_txn_update_oldest() before checking if a handle can be - swept. - -commit d20f20f1ac324030986b3ee23e1edf96486c92b4 -Author: Alex Gorrod -Date: Fri Apr 10 05:59:04 2015 +0000 - - Update file_manager=(close_idle_time=,close_scan_interval=) max values. - - The old maximum for both was 1000, the new value is 100 thousand. Setting - such large values is not recommended, but there is no internal limitation - on them. - -commit c36a3308f685d3b85efe9ac6ee0835f0974574b4 -Author: Keith Bostic -Date: Thu Apr 9 14:32:40 2015 -0400 - - Don't ignore sweeping entirely when we reach the open-file-count limit, - just ignore the in-use files. - -commit 46ef2555bbc51ce6453536e72202782be4949855 -Author: Keith Bostic -Date: Thu Apr 9 11:06:50 2015 -0400 - - Don't keep sweeping once we reach the minimum number of handles. - -commit 1fdfcc62726d25a7cceeeefff174a8e1bf9f9e67 -Author: Keith Bostic -Date: Thu Apr 9 12:39:03 2015 -0400 - - const: At condition ret == -1, the value of ret must be equal to -1. - CID 72082 (#1 of 1): Redundant test (DEADCODE) - dead_error_condition: The condition ret == -1 must be true. - -commit d04e3c25d46a5c4426e1c6d4881cd9e250014931 -Author: Keith Bostic -Date: Thu Apr 9 12:19:24 2015 -0400 - - Remove unnecessary atomic operation, fixing CID 69810 along the way. - - CID 69810 (#1 of 1): Parse warning (PW.CONVERSION_TO_POINTER_ADDS_BITS) - 1. conversion_to_pointer_adds_bits: conversion from smaller integer to pointer - -commit d585bdab980508e590cf70508f053182c556d6f2 -Author: Keith Bostic -Date: Thu Apr 9 12:00:04 2015 -0400 - - CID 72073 (#1 of 1): Redundant test (DEADCODE) - dead_error_condition: The condition session != NULL must be true. - -commit 21907f9193e30f51a59fcbaddfbc46cb7732d3b7 -Author: Keith Bostic -Date: Thu Apr 9 11:52:13 2015 -0400 - - Remove unnecessary error labels, fixing SERVER-17948 along the way. - - Coverity analysis defect 72088: Redundant test: - File: /src/third_party/wiredtiger/src/log/log.c - Function __log_decompress - /src/third_party/wiredtiger/src/log/log.c, line: 363 - At condition "ret != 0", the value of "ret" must be equal to 0. - -commit a29f4d2f40eee784950147af848fdbf277328b7f -Author: Keith Bostic -Date: Thu Apr 9 11:31:11 2015 -0400 - - SERVER-17954: Coverity analysis defect 72085: Redundant test - At condition "ret != 0", the value of "ret" cannot be equal to 0. - -commit 1298fd6db4f0c1d6133b3e547b2e2db51ec4a708 -Author: Keith Bostic -Date: Wed Apr 8 14:38:49 2015 -0400 - - Fix places where we were using the wrong link for traversing hash buckets. - -commit 7d0e5fe3dfa39f7ff377a1d4660bc2bc36dc0df8 -Author: Mark Benvenuto -Date: Wed Apr 8 10:19:33 2015 -0400 - - Enable test/fops for Windows, and add to CI - - - Added Windows shim for gettimeofday - -commit c6270b677499525067d5d729a6fbdce6ad2f533a -Author: Mark Benvenuto -Date: Wed Apr 8 13:28:41 2015 -0400 - - test/format for Windows - - Fixed an issue where fallocate was setting fh->size (incorrectly copied from ftruncate implementation) - -commit fac74b4665d6dfa3aebecf741c914fd1678fa628 -Author: Keith Bostic -Date: Wed Apr 8 08:47:06 2015 -0400 - - Rename file_manager.open_handles to file_manager.close_handle_minimum, - try and match existing naming for file_manager configuration options. - -commit 709cc8d7ac85d31aeae2387b192092910e6cf854 -Author: Keith Bostic -Date: Wed Apr 8 08:09:23 2015 -0400 - - Don't sweep unless there's a reason we need to close file handles. - Add a new configuration option, file_manager.open_handles that sets - a minimum number of file handles that must be open before sweep runs, - default is 250. Reference #1856, SERVER-17907. - -commit b59dfec2609847bb22bb48f4e7eede8c45312ce9 -Author: Keith Bostic -Date: Wed Apr 8 07:58:49 2015 -0400 - - Fix WT_STAT_ATOMIC_INCR, it didn't get upgraded to the new atomic - macros needed for the Windows port. - -commit 9f9fbb19ba19f90e4fc52d7568bd66427edb31e6 -Author: Pat Gunn -Date: Tue Apr 7 22:56:55 2015 -0400 - - Add a Python ex_stat example - -commit b79dcdebf0b1987b59fa70e50c8c61e5e0a64e64 -Author: Keith Bostic -Date: Tue Apr 7 11:52:12 2015 -0400 - - If the underlying split buffer grows, existing boundary references may - point into freed memory, switch the boundary "first byte" field from a - pointer to a buffer offset to avoid the problem, reference #1852. - -commit 27b37db17e70ef73432e1fb48c530246fd753670 -Author: Keith Bostic -Date: Mon Apr 6 07:27:25 2015 -0400 - - Fix a broken line, #1846. - -commit dbb58f0b8f7b26a70f71f6c67dba93c966b162e0 -Author: Keith Bostic -Date: Sun Apr 5 12:24:31 2015 -0400 - - Search the currently pinned page first in WT_CURSOR::search-near, - before descending the tree from the root. - -commit 353093e16eb4a955c4f2e2f4245577cd9156955f -Author: Keith Bostic -Date: Sun Apr 5 08:36:57 2015 -0400 - - WT_CURSOR::search() near current position. - -commit 96022e89162f746d8252db637a0305aed1965cd1 -Author: Keith Bostic -Date: Sat Apr 4 10:09:17 2015 -0400 - - Inside wiredtiger_open(), we may have allocated scratch memory when using - the dummy session or the subsequently created real session, and we don't - want to tie down memory for the rest of the run in either of them. - -commit 928409dbb64e222f722c5f6692f1d7638ce9e617 -Author: Keith Bostic -Date: Fri Apr 3 20:37:45 2015 -0400 - - Use scratch buffers in wiredtiger_open(), but clear them out when we're - done (we have no idea what other functions are using scratch buffers when - called via wiredtiger_open(), so just make them work). - -commit d7e2351db8b855af3b5b8860e000528ac99d57a4 -Author: Keith Bostic -Date: Thu Apr 2 19:44:16 2015 -0400 - - Windows doesn't have , it does have which is - included by . - -commit d3dacbffd2a87ea79ec05fa569bdd5d34f90254c -Author: Keith Bostic -Date: Thu Apr 2 13:06:19 2015 -0400 - - Make --with-spinlock=pthread_logging option compile again. - - Remove the WT_SESSION_IMPL argument to the fprintf, vfprintf, fflush and - fclose functions, there are places we want to use it that don't have - session handles, and it's not currently needed. Clean up error handling - in the vfprintf function. - -commit fccb479aa9b97fb22d9ec2827c94ba49faa5ab79 -Author: Keith Bostic -Date: Thu Apr 2 09:58:52 2015 -0400 - - Gcc 4.1 can't figure out that copy.v can't be used uninitialized. - Inline a length check in the short-key/value case to avoid it. - -commit dca44d7f2762052bf675a5edc0a2f63534c85cf1 -Author: Keith Bostic -Date: Thu Apr 2 08:55:41 2015 -0400 - - Coverity 44262 (#1 of 1): Parse warning (PW.MISSING_INITIALIZER_ON_CONST) - 1. missing_initializer_on_const: const variable "__clear" requires an - initializer. - - This isn't a bug, but we only use WT_CLEAR_INLINE() in a single place - in WiredTiger, and we can probably do better by not clearing the unpack - structure at all. - -commit 0b60cfdfa3912d3048e3c5dbce56db7745c20964 -Author: Keith Bostic -Date: Wed Apr 1 17:43:31 2015 -0400 - - Do a binary search of the base configuration options when checking - configuration at the API layer instead of a linear walk. - -commit 078cb46916b55c254abe1a966a2413410a5f6174 -Author: Keith Bostic -Date: Wed Apr 1 15:57:04 2015 -0400 - - Use the existence of the #include file to configure the - x86 vector instructions. Some old versions of gcc don't have it, but I - believe they're old enough that we don't care. - -commit 684fd71475cbc6b15290945af0160fac0313ad6b -Author: Susan LoVerso -Date: Wed Apr 1 15:44:42 2015 -0400 - - Reset eol if we continue. Return NOTFOUND if checksum mismatch. #1840 - -commit 507a3005b3bc4f9cc69153da5acb534702b734b8 -Author: Keith Bostic -Date: Wed Apr 1 09:40:36 2015 -0400 - - Don't #include , gcc 4.1.2 doesn't have it, use - instead. - -commit 9c29e0f13268c03038704372c069353c81357791 -Author: Susan LoVerso -Date: Tue Mar 31 16:49:47 2015 -0400 - - Modify log_scan callback args to send in next LSN. #1837 - -commit 76cba586685828fc3a8598b48e70c7614818859c -Author: Keith Bostic -Date: Tue Mar 31 12:11:04 2015 -0400 - - Coverity 1199719 (#1 of 1): Dereference after null check (FORWARD_NULL) - var_deref_op: Dereferencing null pointer "ref". - -commit f9edf738c6075601bb2885cd2aba3ea4a0134b5e -Author: Keith Bostic -Date: Tue Mar 31 12:00:50 2015 -0400 - - Coverty 1129071 (#1 of 1): Dereference before null check (REVERSE_INULL) - check_after_deref: Null-checking "conn" suggests that it may be null, - but it has already been dereferenced on all paths leading to the check. - -commit 4317a14ffead9029c6524c3a5013e1d91b2f0a02 -Author: Keith Bostic -Date: Tue Mar 31 11:47:50 2015 -0400 - - Coverity 1129018, 1129019, 1129020, 1129021: Side effect in assertion - (ASSERT_SIDE_EFFECT). - -commit a5bb492b41e4121ff69776ba70072585aef405af -Author: Keith Bostic -Date: Tue Mar 31 07:42:01 2015 -0400 - - When looking for the next-larger item (__col_insert_search_gt), stay - at the same level if the checked record is equal to the searched for - record (can't happen in the current use of this code, but it's the - correct thing to do in a skiplist). - - When looking for the next-smaller item (__col_insert_search_lt), the - search still has to be greater-than, reference #1835. - -commit f2055cab87688cbb26da659c8038dbb4f032eb30 -Author: Keith Bostic -Date: Tue Mar 31 07:41:40 2015 -0400 - - Add a __setitem__ to cursors in the Python API. - -commit 674170067c4016455e78d14ec24e3641d047f1c4 -Author: Michael Cahill -Date: Tue Mar 31 14:26:00 2015 +1100 - - Add a __setitem__ to cursors in the Python API, remove lots of boilerplate c.set_key ... c.set_value ... c.insert code. - -commit 4790f13cd8191d834f40c97a0c00d729f91acd1b -Author: Keith Bostic -Date: Mon Mar 30 09:52:34 2015 -0400 - - async_max_queue: collection statistic, aggregate doesn't apply, don't - clear. - - cache_eviction_maximum_page_size: collection statistic, aggregate - doesn't apply, don't clear. - - various txn_checkpoint_XXX statistics: collection statistic, aggregate - doesn't apply - - various Btree maximum size statistics: aggregate the maximum value, - rather than doing no aggregation at all. - - lsm_chunk_count: aggregate the count of LSM chunks. - - Add syntax checking code to stat.py to disallow aggregation flags - for connection-level statistics. - - Reference #1742. - -commit 0c9f1341e2fdb93d3bd4d3fc58176f6ad169825e -Author: Keith Bostic -Date: Sat Mar 28 13:15:53 2015 -0400 - - When we find a record in the slot's update skiplist, but then want to - jump past the rest of the deleted records, we have to adjust based on - the starting record of the slot, use the page's repeat array to find - that starting record. - - Another run at the __col_insert_search_gt (the greater-than skiplist - search), hopefully it's finally correct. - -commit b5edc28c2588a0257daf50f43db7b5bf335c7ef5 -Author: Keith Bostic -Date: Fri Mar 27 14:40:36 2015 -0400 - - Cleanup, fix, simplify the cursor-insert-greater-than search, add a - corresponding less-than search and hook it into the WT_CURSOR.prev - function. This runs better than before, but there are still problems. - -commit c7cdb2e1f1960bc9432185df8fb7b507198889ff -Author: Keith Bostic -Date: Fri Mar 27 10:38:17 2015 -0400 - - The gaps in column-store tables can be large enough we spend too much - time looping through the "deleted" records when writing out the page, - skip the boring part. Reference #1807. - -commit 488d064b45fb194ea2f3b9e2187214fb44b86a5f -Author: Keith Bostic -Date: Fri Mar 27 10:07:16 2015 -0400 - - There can be huge gaps in the variable-length column-store name space - appearing as deleted records. If more than one deleted record, do the - work of finding the next useful record. Reference #1807. - -commit e1ee6432fb5a948ebd5cafe2ba9c2b79411458f5 -Author: Mark Benvenuto -Date: Thu Mar 26 11:31:27 2015 -0400 - - Revert "Make LZ4 default off in scons builds for now." - - This reverts commit 67b71e1eaad1a5825f99a415d9851976f11dbfee. - -commit 003e6c3598408c7670f65a8720622c38fdaf148d -Author: Mark Benvenuto -Date: Thu Mar 26 11:17:57 2015 -0400 - - Use Standard C type uint64_t for zlib - -commit e12b9e0b005da7364330f4d3409256ded26ba90d -Author: Keith Bostic -Date: Thu Mar 26 18:31:15 2015 -0400 - - Add checks for Windows builds: _M_AMD64 and _MSC_VER. - -commit 1b1371c83e3e78feab0921c88a63d960288d58e0 -Author: Keith Bostic -Date: Thu Mar 26 11:09:28 2015 -0400 - - __wt_btree_size_overflow uses the page's WT_PAGE_INDEX, wrap the call - in WT_ENTER_PAGE_INDEX/WT_LEAVE_PAGE_INDEX. - -commit e4c9309756f8bcda4bd7b9be5232887cdea377be -Author: Keith Bostic -Date: Thu Mar 26 10:57:33 2015 -0400 - - Instead of asserting split-generation is set in WT_INTL_FOREACH_BEGIN, - move the test into the WT_INTL_INDEX_COPY macro (which is called by - WT_INTL_FOREACH_BEGIN), WT_INTL_INDEX_COPY is the code that reads the - actual WT_PAGE_INDEX value: if a page can split, it's not safe to look - at anything a WT_PAGE_INDEX references, and we have code that uses a - WT_PAGE_INDEX reference outside of the WT_INTL_FOREACH_BEGIN macro. - - Add two versions of the WT_INTL_INDEX_COPY macro, WT_INTL_INDEX_GET - which checks the split-generation, and WT_INTL_INDEX_GET_SAFE which - doesn't. - -commit 8bdb29fed9ef4a124e8c1c3c1a6c031f4ecbe130 -Author: Keith Bostic -Date: Thu Mar 26 09:34:19 2015 -0400 - - Remove safe version of the WT_INTL_FOREACH_BEGIN macro (it was only used - in a single non-DIAGNOSTIC code path), wrap child page-index walk during - a deepen split with WT_ENTER_PAGE_INDEX/WT_LEAVE_PAGE_INDEX, reference - issue #1799. - -commit 87326b721a443936e118e72e58e4f51e14845132 -Author: Keith Bostic -Date: Wed Mar 25 21:44:02 2015 -0400 - - Use the size of the vector chunk to decide if we'll execute vector - instructions, rather than some specific length. - -commit fc4eea6e3eb39cdf1b147d6a7b65af4f0d08f5c1 -Author: Keith Bostic -Date: Wed Mar 25 18:20:23 2015 -0400 - - Instead of using _mm_cmplt_epi8 when we find a mismatch, fall into the - slow comparison code. - -commit a29b65429f2963bb3e3ff8ca4416f16ae8ca2e52 -Author: Keith Bostic -Date: Wed Mar 25 16:28:34 2015 -0400 - - Cleanup #includes so the whole tree builds. - -commit 35b436dfd47895cc19310095f4f4e5f3b9b86501 -Author: Keith Bostic -Date: Mon Mar 23 17:51:50 2015 -0400 - - We don't have to reinitialize userp and treep when falling into the - slow/remainder loop, they're correctly positioned. - -commit a307a61135394d5006bd3257325bffd82ea80f21 -Author: Keith Bostic -Date: Mon Mar 23 17:14:30 2015 -0400 - - Don't call _mm_cmplt_epi8() until we don't compare equal. - -commit a609c82bfa6fbf80cd5fb853e1d97c16878a0180 -Author: Susan LoVerso -Date: Mon Mar 23 15:25:23 2015 -0400 - - Set checkpoint LSN to existing log record. #1700 - -commit 076336e9b244664440777cfeab9fe0d925d4c25e -Author: Keith Bostic -Date: Mon Mar 23 10:13:37 2015 -0400 - - Integrate Sasha's vectorized search code. - - -3.0.2, Mar 26 2015 ------------------- - -commit 17bd2356a5b17893e626749be399fb8fda23db1e -Author: Mark Benvenuto -Date: Fri Mar 20 01:07:59 2015 -0400 - - Use beginthreadex, and ensure we use the correct C calling conventions in all callbacks - -commit 269e847ad64dd12dfcadb58f84f905069e5b8dce -Author: Michael Cahill -Date: Wed Mar 25 15:47:46 2015 +1100 - - Split sweep into two passes: one that walks the handle list without locking and finds candidate handles to close, and a second pass holding the handle list lock that removes dead handles. - - refs #1814, #1811, #1808 - -commit 065a435f6b1d1b3fcb640d59c3109d0c2e24d308 -Author: Alex Gorrod -Date: Tue Mar 24 16:53:28 2015 +1100 - - Add statistics to track internal/overflow/leaf pages in cache. - - It is often useful to know if a cache is full of internal pages. I could have added page count tracking, but byte count tracking seems more useful (enough to justify the extra performance overhead). - -commit f4616895f1ac35aaa50fc1a68e0c6dd9e0cf7717 -Author: Michael Cahill -Date: Mon Mar 23 17:00:15 2015 +1100 - - If a walk for pages to evict ends on a page requiring forced eviction, don't keep it pinned. - - refs SERVER-16662, SERVER-17382 - -commit 49ddcca712db3a50c390f48b518f0835f28bc9d4 -Author: Keith Bostic -Date: Sat Mar 21 16:17:51 2015 -0400 - - Separate out the code to flush a file handle and rename the file into place. - -commit a0e88c41781c5b7ea0155fb57a58c91a964af4a3 -Author: Keith Bostic -Date: Fri Mar 20 19:08:45 2015 -0400 - - If the underlying FILE was opened for writing, flush and fsync it during close. - -commit 8d918f0ef8588056ecf729e72ffdd8bc0a79fd6c -Author: Michael Cahill -Date: Fri Mar 20 22:54:13 2015 +1100 - - Fix a race closing eviction helper threads: close the main eviction thread first, so the number of helper threads can be read safely. - - Partial fix for #1698 - -commit 294b0bce296bebf9790418e5575f59be5bec000c -Author: Michael Cahill -Date: Fri Mar 20 17:19:28 2015 +1100 - - Only align buffers on Linux if direct I/O is configured. Clarify why we round up the allocation size: it usually won't matter because we allocate multiples of the alignment size anyway. - -commit ee7456799277e19ff4771de79ac4eafac43aff9b -Author: Mark Benvenuto -Date: Thu Mar 19 16:16:15 2015 -0400 - - Visual Studio 2015 Preview support - -commit 9c60b462a25a36915207dde240579c5aa673f2c7 -Author: Alex Gorrod -Date: Fri Mar 20 00:09:42 2015 +0000 - - Update the eviction server to not set stuck if it isn't full. - - When only looking for pages to force out, the cache isn't really stuck. The trouble is that if we set stuck the eviction server doesn't clear walks which can lead to it always holding a reference t othe page we (really) want to evict. - - refs #1777 - -commit f0138353697d8706bd1f26e83b9fff8f4e83af8d -Author: Keith Bostic -Date: Thu Mar 19 13:04:17 2015 -0400 - - Create the base configuration file in a temporary file and rename it into place so a crash can't result in a corrupted base configuration file. - - refs #1775, #1776, SERVER-17571 - -commit 2e15cd6fc322c90c763394c52056bfebd4153aad -Author: Michael Cahill -Date: Thu Mar 19 10:25:31 2015 +1100 - - Avoid EBUSY errors from truncate caused by checkpoints. - - refs #1643 - -commit 3188352d623de85803db3dd6e5b5188822e2db4f -Author: Michael Cahill -Date: Wed Mar 18 13:00:53 2015 +1100 - - If the session-level attempt to lock a handle gets EBUSY, fall back to the slow path. The sweep server relies on there being a retry loop if an exclusive operation such as verify conflicts with a sweep. - - refs #1404 - -commit 5d8641568ac27e21d6671cb646a818708eb0aa28 -Author: Susan LoVerso -Date: Tue Mar 17 15:38:57 2015 -0400 - - Set the end of the log if we find a zero hole. - - refs #1766, SERVER-17569, SERVER-17613 - -commit f38e325f87d94f178c932328632857361fc3c92c -Author: Alex Gorrod -Date: Tue Mar 17 04:30:11 2015 +0000 - - Implement review feedback. The new configuration strings are: - - file_manager=(close_idle_time=30,close_scan_interval=10) - - Update the sweep test to take advantage of new configurations and - shorten the runtime. - -commit 87dba78b65bc869c426363d86d02134aa8f8f2ac -Author: Michael Cahill -Date: Tue Mar 17 14:39:49 2015 +1100 - - Round up the "in-memory size" for WT_UPDATEs to a multiple of 32. They are a very common case, and for tiny updates, this seems to to a better job of matching tcmalloc's behavior at least. - - refs SERVER-17424 - -commit d277a088385401ff6e6664dd688a2fcab2ac4087 -Author: Keith Bostic -Date: Mon Mar 16 11:42:56 2015 -0400 - - Sanity check all splits for at least 100 keys on the page, if there are huge keys and a too-small cache, there's nothing to be done. - -commit d4aa136a20c9baf151c3185058ce089679e0de0e -Author: Alex Gorrod -Date: Mon Mar 16 06:10:12 2015 +0000 - - Ensure we free memory when statistics cursor open fails. - - refs #1760 - -commit 66622ac33357d53212ddfa362fdf3c6b439bf34b -Author: Alex Gorrod -Date: Mon Mar 16 05:55:45 2015 +0000 - - Update the split deepen sanity check. Allow a page that is using more than 1/4 of the cache to be split. - - refs #1759 - -commit f99114f785985d152ba8ddfa735771574e6bff5d -Author: Michael Cahill -Date: Mon Mar 16 16:43:27 2015 +1100 - - Fix the search for a free hazard pointer slot. Previously, it gave up early once all slots were allocated, even if they weren't all in use. - -commit 15fe04460c1f057811e0f3e02feb249c710d05ce -Author: Michael Cahill -Date: Mon Mar 16 16:40:49 2015 +1100 - - Track the number of hazard pointers active in the eviction server. Don't start a new walk if we are close to the limit. - - refs SERVER-17551 - -commit e6d8c8a56c4dc83f206d43fcaa21902b35b1e4e6 -Author: Alex Gorrod -Date: Mon Mar 16 05:20:22 2015 +0000 - - Add a new statistic tracking range of IDs pinned by a transaction. - - refs #1746 - -commit f954ab6b3fb6a51ecf3cf625cd3f5c5f818e0fde -Author: Keith Bostic -Date: Sat Mar 14 12:31:00 2015 -0400 - - Pass a allocated memory size to __wt_page_inmem(). - -commit 308dc500adcde7b62c8dbb9aa0fb401795445546 -Author: Keith Bostic -Date: Sat Mar 14 10:51:21 2015 -0400 - - Don't drop core if duplicate symbols are detected (caller's table gets free'd twice). - -commit dc19643859063a03b985d97e2f24b1262ec4b15d -Author: Keith Bostic -Date: Fri Mar 13 15:35:28 2015 -0400 - - Use SCNxxx #defines in sscanf() calls, not PRIxxx. - - We can't cast a pointer to a WT_LSN.offset to a pointer to a different type, we can't know WT_LSN.offset is the same size as that type. - -commit 99a992ed5207822ce2bc2fd69cb5b37408e6080f -Author: Keith Bostic -Date: Fri Mar 13 14:17:34 2015 -0400 - - Switch sscanf from SCNu46 to SCNi64 so we handle both hexadecimal and integer symbol/frequency values - - Add testing support for out-of-range symbol/frequency values. - - refs #1536 - -commit 48a6f6e76aafbf7b33dbb22325ddb4e8e27603a1 -Author: Keith Bostic -Date: Fri Mar 13 10:04:06 2015 -0400 - - Fix problems with configuration value parsing, break out the Huffman configuration file parsing code into a single routine. - -commit e7ef6d0c3d107f374de9924d981d731fe36fb4ec -Author: Alex Gorrod -Date: Fri Mar 13 16:32:37 2015 +1100 - - Update memory allocation accounting for new pages and realloc. - - They used to not always account for alignment overhead. - -commit 1e97f9730a6fd70c03784e292cb0435c8fb82354 -Author: Don Anderson -Date: Thu Mar 12 14:02:24 2015 -0400 - - Fix to track deleted cells. Remove unused vars. - -commit 5778e4098862665d4cdaf2cda3ef40d47d951efb -Author: Alex Gorrod -Date: Thu Mar 12 16:35:27 2015 +1100 - - Add ability to configure sweep server timings via API. - - Still needs some test code. - -commit a1c9f8f8833482c9773fc30b7b9a7b5f1ab09014 -Author: Don Anderson -Date: Wed Mar 11 22:48:12 2015 -0400 - - Made some adjustments to stats so that the btree entries is an accurate total for row and column stores. Entries that appear in internal pages (pointing to leaf pages or other internal pages) are no longer counted. Added and modified tests to check the number of btree entries reported by stats against the number of KV pairs expected to be in btree. - - refs #1733 - -commit 6d7c061a8292021195b32260a8b41d3e92e1958f -Author: Susan LoVerso -Date: Wed Mar 11 11:11:57 2015 -0400 - - Support no-logging mode in wt command for debug. - - refs #1732 - -commit 4a802bd592b6c8506b07900bf89a9d9fe53cfc25 -Author: Keith Bostic -Date: Wed Mar 11 09:30:39 2015 -0400 - - If the application only has 1 outstandin async operation at a time, we won't let them specify that. Change the minimum ops_max value to 1, but set the minimum maximum bound in the code to 10 as before. - -commit 3a412a2371e80ed9e26e546b6fe3c09d2d4e4091 -Author: Keith Bostic -Date: Wed Mar 11 09:29:55 2015 -0400 - - If the application re-uses a WT_SYNC_OP structure, we assert: return EINVAL instead, it's easier to debug than a core dump. - -commit 16119f98fee9525bf1990ccbcc979b9f08a8b000 -Author: Michael Cahill -Date: Wed Mar 11 20:24:20 2015 +1100 - - Don't cache an ikey before winning the race to split into a parent. - - refs #1582 - -commit 4d0b4093e3d3c9fa0be2bbb01467579f05deddc5 -Author: Alex Gorrod -Date: Wed Mar 11 03:44:22 2015 +0000 - - Update visibility check to know about checkpoints. - - This should allow us to evict more pages while a checkpoint is in progress. We can evict dirty pages from a file once the checkpoint is finished in that file. Similarly for the row store specific obsolete update check. - - refs #1745 - -commit 272daddb3734cd196cd303df4271f7e9e8f00cd3 -Author: Alex Gorrod -Date: Wed Mar 11 02:55:56 2015 +0000 - - Add a global checkpoint generation and track it per data handle. Not yet used, but will be used to allow more eviction during checkpoints. - - refs #1745 - -commit 89db28287d7b212efb31203eca253fbf144ea207 -Author: Alex Gorrod -Date: Tue Mar 10 07:19:59 2015 +0000 - - Mark statistics with the right aggregation flags (specifically cache dirty tracking and log preallocation). This helps wtstats graph generator. - - refs #1742 - -commit e51cc35a88ed497f911a25fec9f4bfcc6617d6a2 -Author: Mark Benvenuto -Date: Mon Mar 9 15:40:59 2015 -0400 - - SERVER-17471: Use uint64_t instead of long since long is compiler specific - -commit 105903c1beb0d16b40e243ed3624417c4fa0702c -Author: Keith Bostic -Date: Mon Mar 9 12:19:50 2015 -0400 - - Remove pockets of case-sensitivity for configuration strings. - -commit 1e8c2b89de09462253231213329674b8769d11fe -Author: Keith Bostic -Date: Sun Mar 8 13:05:51 2015 -0400 - - Change the statistics cursor WT_CURSOR.reset method to re-load statistics values. - - refs #1533 - -commit 769dc5976fe0d2448fb4fde511c5bc29eea39bfb -Author: Keith Bostic -Date: Tue Mar 3 16:45:09 2015 -0500 - - Change checkpoints to do first-fit allocation when we start writing the actual checkpoint blocks, that way if we delete significant space, the checkpoint blocks won't prevent file truncation. - - -3.0.1, Mar 9 2015 ------------------ - -commit d654795bb763b95d14604b9b65d09ae79b8ee5b6 -Author: Alex Gorrod -Date: Mon Mar 9 05:35:33 2015 +0000 - - Add a test case for checkpoint consistency. - - We recently fixed a bug where updating a page after the write leaves phase of a checkpoint had completed, then either checkpointed again or closed the database. We would end up with inconsistent data. That was the case because we weren't marking trees dirty all the time. - - This test case reproduces the scenario so we don't reintroduce the failure. - - refs #1735 SUPPORT-1248, SERVER-17506 - -commit 0315ee75f712ed0ccddca0616339de93b17835a3 -Author: Alex Gorrod -Date: Mon Mar 9 11:21:06 2015 +1100 - - Bump release version to 2.5.2 - -commit 4235c69d37474fb4e14673e0ea99337659db948d -Author: Michael Cahill -Date: Mon Mar 9 08:32:50 2015 +1100 - - When skipping a dirty page during a checkpoint, make sure the tree is marked dirty. - - refs SUPPORT-1248, SERVER-17506 - -commit 8382d14f32efc53b19aecd596cab3ba0d682b22d -Author: Keith Bostic -Date: Fri Mar 6 16:10:37 2015 -0500 - - Only increment/decrement from the current position on the first position set by our caller, that is, the position passed-in from our caller is the only position the caller has already seen. - - refs SERVER-17345. - -commit ed9c48c7b8fa5dd3362e417fda8337f1690585ed -Author: Michael Cahill -Date: Fri Mar 6 22:48:45 2015 +1100 - - Ignore empty child pages in column stores. - -commit 553a351ca6a81179c7f1db9c04d6f96aef0545ac -Author: Alex Gorrod -Date: Thu Mar 5 05:46:15 2015 +0000 - - Don't allow LSM bloom create to block waiting for space in the cache. - - We are only accessing one page at a time, and allowing the bloom create to block can lead to LSM not making progress. - - Refs: #1720 - -commit 5285b768de3237b4379f1526176efdeb55860971 -Author: Michael Cahill -Date: Thu Mar 5 16:15:08 2015 +1100 - - Remove the special "discard" path for trees marked clean: always go through eviction, which checks page modify flags. This may mean additional writes to internal pages when discarding a tree, but means that if a discard stops part-way through, the remaining in-memory tree has not lost any context. - - refs SUPPORT-1248, SERVER-17510 - -commit 00edf7a47115923bc9f3eaa7eee84c9f7d6b0d77 -Author: Michael Cahill -Date: Thu Mar 5 16:15:07 2015 +1100 - - If a page is split by eviction, mark the tree dirty to avoid having dirty pages in a tree marked clean. - - refs SUPPORT-1248 - -commit 57d3eba53fb91a0287374b9642b7cd4ef644854a -Author: Susan LoVerso -Date: Wed Mar 4 10:00:59 2015 -0500 - - Add fsync before closing log files and after header. #1717 - - refs SERVER-17451 - -commit d970bfe6b1bed7d1919b800bf2d65a3789b74d6f -Author: Keith Bostic -Date: Tue Mar 3 09:38:59 2015 -0500 - - Don't set eviction_workers_min/eviction_workers_max in the connection structure before checking the values are OK. - -commit 90f3f34a97440b6788a1a558e560a33fd116f166 -Author: Susan LoVerso -Date: Tue Mar 3 09:26:38 2015 -0500 - - Move writing into log worker thread. #1683 - -commit 1266bbb1143b22fec6b1c255b3aade5d0506477e -Author: Alex Gorrod -Date: Tue Mar 3 03:37:18 2015 +0000 - - Fix a bug in the reconfigure API related to shared cache quotas. - - While fixing the bug simplify the code flow for reconfigure and caches/shared caches. - - refs #1712 - -commit 1ebd617e6dfcf542983d62d9666c5a328dd41bb8 -Author: Keith Bostic -Date: Mon Mar 2 15:31:05 2015 -0500 - - Add overflow key/value counts to the statistics code - - refs #1520, #1703 - -commit a2166ead528ef61da478db67e3c0209a6bef5ac6 -Author: Alex Gorrod -Date: Mon Mar 2 14:18:37 2015 +1100 - - Allow memory_page_max to be at most 1/4 of the cache size not 1/2. - - If we let a single page grow to half the cache size, it's too easy for the cache to get pinned full if it is tiny to start with. - -commit b036921625e415bb66ac458922b81a9fae07740b -Author: Alex Gorrod -Date: Mon Mar 2 14:17:12 2015 +1100 - - Take all eviction candidates if we are aggressive. - -commit 0ddd3face0b99f5653001825bf3df0662ffcdc10 -Author: Michael Cahill -Date: Mon Mar 2 13:07:08 2015 +1100 - - Reverse the direction of the LRU walk each time the eviction server goes to sleep. Keep looking for candidates if eviction is stuck. Don't give up our walk position if eviction is stuck. - -commit cff10ba30f2eac379197e5d7dea49da2b8159890 -Author: Don Anderson -Date: Fri Feb 27 15:05:24 2015 -0500 - - Added test for wt dump on an index. - - refs #1709 - -commit a9f0e3ac769a060a8c3b06bac71fbed0e5f46cc6 -Author: Michael Cahill -Date: Fri Feb 27 16:59:35 2015 +1100 - - If we encounter a dirty page when closing a clean tree, switch to checkpointing. - - refs SERVER-17319, #1643?, #1404? - -commit bfcf5987b2b6f08d931d620330aed46837e3a8c2 -Author: Michael Cahill -Date: Thu Feb 26 16:32:09 2015 +1100 - - Add some paranoia to LSM around transaction checks: make sure we have allocated an ID before using it for anything. - - Recheck switch transactions before doing update checks in old chunks. - - refs #1641, #1701, #1702 - -commit da0bc67c821282e9fd0da725279811b59b25a675 -Author: Michael Cahill -Date: Thu Feb 26 16:32:09 2015 +1100 - - Add some paranoia around setting row-store internal keys. - - refs #1582 - -commit 5ea91f6ed0e0677530c5ab8215f81eb48ea307b8 -Author: Alex Gorrod -Date: Thu Feb 26 04:51:25 2015 +0000 - - Several optimizations to large page eviction: - - * Don't update the read generation on page in if it's set to oldest. - * Clear the walk positions before the eviction server sleeps. - * If only looking for pages that would block add them all to the queue. - * If evicting dirty pages use the worker threads, not the server. - -commit 0eecd0a2d97771380ecbd7fd27bd44988db1148c -Author: Alex Gorrod -Date: Thu Feb 26 02:08:36 2015 +0000 - - Fix a bug in checkpoint, where it could get an EBUSY return. - - The case that could return EBUSY was when checkpointing with a specific target, while that target was open exclusively or for a bulk load. - - Refs #1404 #1589 - -commit 392a540deec817c5d6738b8e848a68882df3ac8a -Author: Susan LoVerso -Date: Wed Feb 25 15:03:50 2015 -0500 - - If the LSN given doesn't exist, don't return an error, but do force recovery. Fix recover.sh to grep the CONFIG. #1700 - -commit 59b699b7085868d1b12a41ae4cd7a01f25f6e865 -Author: Alex Gorrod -Date: Tue Feb 24 23:55:20 2015 +0000 - - Handle the case where a large record pushes us over a page boundary and we spill across two pages correctly. The previous fix could let us span additional space. - - Fix another bug in the fixup code - ensure there is enough space in the temporary buffer when fixing up after the fist page is full. - - refs #1697 - -commit ad8b58188961943e74c57b85e3b976aa03b79617 -Author: Susan LoVerso -Date: Tue Feb 24 18:54:18 2015 -0500 - - Set flags to SLOT_INIT_FLAGS on free. refs #1683 - -commit 45e4c049044efc96c682f390466a35d22dac555f -Author: Keith Bostic -Date: Tue Feb 24 16:17:32 2015 -0500 - - Move WT_PAGE.u.row.d next to WT_PAGE.u.row.entries, it reduces cache misses inside of row-search. - - refs #1665 - -commit 3c6d7adf422f432ae117e2292dcce00cc3b531a3 -Author: Keith Bostic -Date: Tue Feb 24 16:07:16 2015 -0500 - - Turn off key prefix-compression and rebuild the key before doing the boundary split. The size of the key is likely to increase by a few bytes, and if the value is large enough to consume almost all of the buffer, we overflow the space available. - - refs #1697 - -commit ca9ab16c320f6f154ff1fd3d0b65316f87e8bddc -Author: Alex Gorrod -Date: Tue Feb 24 06:51:14 2015 +0000 - - Fix a bug in reconciliation. In cases where we decide to squeeze one more large entry onto a page, we could have attempted to decrement space available negative. - -commit 777c35b074fce3656c14ca5770b424a65c719134 -Author: Susan LoVerso -Date: Thu Feb 19 12:32:16 2015 -0500 - - Add log worker thread to advance write_lsn. - - refs #1683 - -commit ae686a225a011bac07119e2d66e837e08d5a3a0e -Author: Keith Bostic -Date: Tue Feb 10 12:46:04 2015 -0500 - - Sasha says (my paraphrasing): I simply rearranged the fields in the WT_PAGE struct, so that u.row.d and u.row.entries are close together at the very end of the definition of union u, and the "uint8_t type;" immediately follows that. - - -3.0-RC10, Feb 24 2015 ---------------------- - -commit 2fdfb2bbed56e42e1717e567828c68d0b2eb868d -Author: Michael Cahill -Date: Tue Feb 24 14:44:48 2015 +1100 - - Review places that set/clear session->dhandle, replace with macros. Change callers to save/restore if they need to release a handle after a call. - -commit ce89b608835561b11ce4e525a5ebdad86558f115 -Author: Keith Bostic -Date: Mon Feb 23 19:55:56 2015 -0500 - - The statistics server has open handles and may be opening underlying handles, make sure we don't overwrite them, reference #1694. - -commit 8827d909d7f14e9ad767d909d65598508ff0025c -Author: Keith Bostic -Date: Mon Feb 23 09:39:21 2015 -0500 - - Add an explicit barrier after setting the checkpointing value. (I'm pretty sure it's not actually necessary to have a barrier in the current code, but the barrier we rely on is in a different function and isn't always called, depending the eviction configuration of this file, I'd just rather be safe than try and debug this some time in the future.) - -commit 10abb2c47cffbcc215c3507256cee3e2cae2dd5f -Author: Michael Cahill -Date: Mon Feb 23 15:23:54 2015 +1100 - - If eviction is walking a file with only one live page (e.g., the last page), be careful to step over it, or eviction's hazard reference can make forced eviction stall. - - SERVER-17344 - -commit f72367f97357cf3e77f0d57d39992686b400ebe7 -Author: Keith Bostic -Date: Sun Feb 22 15:40:37 2015 -0500 - - Make sure we have a local copy of the start/stop cursor keys before calling into the underlying Btree range truncate function - - SERVER-17141. - -commit 09d345951d43e7ed928980048e5c5c927406a66b -Author: Keith Bostic -Date: Fri Feb 20 11:25:22 2015 -0500 - - If WT_SESSION.truncate isn't given a start cursor, instantiate one, it's always faster to traverse Btree objects in a forward direction. - -commit ce38bc5b40f1aa3ffc07cdd2cf993e32143135b5 -Author: Keith Bostic -Date: Fri Feb 20 08:57:57 2015 -0500 - - We don't need to do a search in the row-store cursor truncate setup code, the WT_SESSION.truncate API code already did one. - - This does not mean WT_SESSION.truncate can't return WT_NOTFOUND in some cases, the first thing cursor truncate does is a cursor remove call in order to acquire the page's write generation information -- that remove call does the usual "discard my current reference and get a new one" work, which allows another deleting thread to race and remove one of the truncate's start/stop keys. - - refs SERVER-17141 - - -3.0-RC9, Feb 18 2015 --------------------- - -commit bf3ee2cd064b46cf0175d75950c825aa1f42c694 -Author: Michael Cahill -Date: Wed Feb 18 12:53:41 2015 +1100 - - Flip cache overhead to apply to the allocated bytes rather than the total size. Include the overhead in stats so that tools (e.g., mongostat) report accurate cache full and dirty percentages. This also makes eviction triggers and targets meaningful: with the default trigger of 95% and overhead 8%, eviction was previously never triggered until the cache was completely full. - -commit f9e6f942cf73c8a53aaadbc587c1b7efad6cc832 -Author: Keith Bostic -Date: Tue Feb 17 11:33:13 2015 -0500 - - Coverity notes the TXN_API_END_RETRY macro has an unnecessary test for "(ret == 0)" at the end of the do {} while loop. - -commit c34a56f357e21d134a2d9d0fefc032544069d8d7 -Author: Michael Cahill -Date: Tue Feb 17 21:56:34 2015 +1100 - - Allow the maximum number of eviction threads to be reconfigured. This was previously permitted by the API, but the array of thread contexts was not correctly resized, leading to segfaults. - - refs SERVER-17293 - -commit 67527fc235406469e69dbaec3dcd571469e660c0 -Author: Michael Cahill -Date: Tue Feb 17 21:50:53 2015 +1100 - - Make the eviction walk incremental: don't spend too long in any one file, fix tracking of whether we are making progress. - -commit 788265ed273c63183053e6325a9aa03c89c02860 -Author: Michael Cahill -Date: Tue Feb 17 21:48:44 2015 +1100 - - Combine the various checks for whether a page can be evicted into one place. - -commit 748e7b0c58b358b14340bacae41f9c46f3c06f7e -Author: Michael Cahill -Date: Tue Feb 17 21:47:57 2015 +1100 - - Skip hot pages during write leaves: checkpoint will have to visit them anyway. - -commit a9de0f7ac8ad373d7aef6a480c69a2a7e0b55c59 -Author: Michael Cahill -Date: Tue Feb 17 21:18:41 2015 +1100 - - Run recovery after crashing test/format in the recovery test. - -commit 4733961a3c1fa37988178d1b1dd4eb44d83b63f6 -Author: Thomas Rueckstiess -Date: Mon Feb 16 12:50:26 2015 +1100 - - fixes and improvements for wtperf parsing - - - convert wtperf microsec to millisec - - don't skip monitor* files when parsing directory - - parsing code reorganization - - renamed wtperf stats fixture - - added tests - -commit dc396e1cd64871219b9e5a1b6558707feb70706e -Author: Keith Bostic -Date: Sun Feb 15 13:44:30 2015 -0500 - - Clear the btree object statistics we're about to count, otherwise each cursor gets a cumulative value. - -commit 195b144bb37814b31cfa413029cda0b28f13f261 -Author: Keith Bostic -Date: Fri Feb 13 12:06:36 2015 -0500 - - Don't map WT_NOTFOUND to ENOENT unless a uri was specified, that's the only interesting case. Reference SERVER-17141. - -commit e9d7fee2c2c08985b8e2d2716e899853c5198290 -Author: Thomas Rueckstiess -Date: Fri Feb 13 17:22:05 2015 +1100 - - added support to parse wtperf files. - - they go into a separate stats section named "wtperf". - -commit 29d0d26fd1cd76392ea8225c1c4022ca54443737 -Author: Keith Bostic -Date: Thu Feb 12 18:05:33 2015 -0500 - - Ignore unexpected information in the metadata entry, the metadata entry might have been created by a future release, with unknown options. - -commit 05f07753059a4fa7f0f1bab7a107a9e6d17bf4af -Author: Keith Bostic -Date: Thu Feb 12 15:42:35 2015 -0500 - - Remove the requirement of a HAVE_DIAGNOSTIC build for the verify commands to work (except for dump_offsets, that requires the btree debugging code and so won't work anywhere but a HAVE_DIAGNOSTIC build). - -commit 006ed9f17c7fc0fe65dc43717ed0239b3bac564c -Author: Keith Bostic -Date: Thu Feb 12 15:31:36 2015 -0500 - - Add support for a new verify debug option, "dump_shape", that reports the levels of the tree. - - __wt_config_gets() returns WT_NOTFOUND when there's no entry, don't fail every command when DIAGNOSTIC #defined and debug options aren't set. - -commit 46b7721215856d08ca3a37f7ffc27c57b1d4c1d7 -Author: Susan LoVerso -Date: Thu Feb 12 13:27:32 2015 -0500 - - Add recover config setting and use it in the wt command. #1651 - -commit 0305a51ffba383af13d6078d409a03b249c502c5 -Author: Don Anderson -Date: Wed Feb 11 10:21:04 2015 -0500 - - Add test to detect file ID problems in recovery. Refs #1622. - -commit fc0ff5a9ea09e54512353d2275126cb54dbc5451 -Author: Susan LoVerso -Date: Tue Feb 10 13:02:28 2015 -0500 - - Allow 'wt' command to run with or without recovery. #1651 - -commit a26d87a53eb2ac2dcae9312b7979499c34c11613 -Author: Keith Bostic -Date: Mon Feb 2 19:20:33 2015 -0500 - - Replace wiredtiger_strerror_r with WT_SESSION.strerror, reference #1516. - -commit 33c146b51fdac86999e2eaa67f5636490eb441fb -Author: Michael Cahill -Date: Thu Feb 12 13:44:35 2015 +1100 - - Disable aggregation across all open checkpoints if statistics cursors don't specify a checkpoint. - -commit 04ec3d021d2f8b08b69d3ea5d0f243f468c71f2e -Author: Michael Cahill -Date: Thu Feb 12 13:00:49 2015 +1100 - - Move server thread waits to the beginning of their loops: check that we're still running before waiting. This makes more sense to me, but also fixes a problem introduced recently where the checkpoint server could hang on shutdown if the signal from the closing thread got lost. - -commit 85aae87cb1e019b0bcac4854e6508f11104f5339 -Author: Keith Bostic -Date: Wed Feb 11 12:34:03 2015 -0500 - - Mimic Alex's fix in 152a0ef, to fsync created files to disk, for truncated - files, never surprise the upper-layer. - -commit f445f3bf63e3fa096479c5963f75d91e02f9b616 -Author: Michael Cahill -Date: Wed Feb 11 17:49:04 2015 +1100 - - If logs crossed the threshold size while we were taking a checkpoint, don't take another one immediately. - - Should help with SERVER-17206, where we saw two checkpoints every iteration. - -commit 0d85a9716b786de5fc90c00fb31765ade8aefd1f -Author: Michael Cahill -Date: Wed Feb 11 17:48:03 2015 +1100 - - Check if a page was recently split before doing forced eviction. We used to do this, but it got lost in a recent reorg of __wt_page_release. This change should mean that after an in-memory split, application threads that are appending have time to move to the new page at the end of the tree, rather than getting stuck trying to force out a page. - - SERVER-16938, SERVER-17121 - -commit 545d064fd4cbb0b35dc536e772c60b26a193d3f2 -Author: Michael Cahill -Date: Wed Feb 11 17:45:26 2015 +1100 - - When doing truncates, if we see a clean page in memory, try to evict it before truncating. It should be cheap (just freeing memory), and if the eviction succeeds, the fast truncate code can kick in and mark the whole page deleted immediately. Otherwise, truncate will mark each record on the page deleted, and the next time through will try to force that page out, which has to go through reconciliation to figure out that all of the records are deleted. - - SERVER-17157 - -commit 9bbb8595abd6ac962a0debf20a6cdcef73d83855 -Author: Michael Cahill -Date: Tue Feb 10 16:09:36 2015 +1100 - - Allow size-limited LSM trees to have Bloom filters, based on the normal configuration. - -commit c040f84a765c7c39f03e173a555eb50f85e2e698 -Author: Michael Cahill -Date: Tue Feb 10 15:58:02 2015 +1100 - - Re-enable the global setting to disable LSM merges. - - refs #1657 - -commit 8f14899ba0ce5b1a8df689e3c68db9a68bfeee66 -Author: Alex Gorrod -Date: Tue Feb 10 04:36:57 2015 +0000 - - Fix a bug when re-opening an LSM tree. - - We could have attempted to update the last chunk that is already on disk. - -commit d8263d46c1aa136d24ef194a8f62f0b02b92b9b0 -Author: Michael Cahill -Date: Tue Feb 10 15:11:59 2015 +1100 - - Improve LRU eviction of large pages: don't give up because a large page has recent updates: push on and try to do eviction and restore updates. - -commit eb02caa2564a18e857d18ef4b3f25683b438111c -Author: Michael Cahill -Date: Tue Feb 10 15:01:37 2015 +1100 - - Fix a local variable read when looking for pages evict racing with a page becoming dirty. - - refs #1660 - -commit da4d99e7ad57057a1b8397629d59a3c83c28de21 -Author: Alex Gorrod -Date: Tue Feb 10 02:32:25 2015 +0000 - - Fix a bug in LSM cursor open. - - The bug caused us to re-open more cursors than necessary in open. - - Related to fix: 439a655e - -commit eec16c3052af107bbe57aaf547eb8e70d2de4966 -Author: Alex Gorrod -Date: Tue Feb 10 00:26:37 2015 +0000 - - Don't do LSM merge throttling if merges are disabled. - -commit fcee4c8ce0b5db9d3340169deb321601b81f4a1b -Author: Keith Bostic -Date: Mon Feb 9 14:04:02 2015 -0500 - - Track splits during eviction by data-source as well as by connection. - - Don't double-count in-memory splits (we're incrementing cache_eviction_split in the underlying split-parent routine, not in the caller, so it's counting both normal and in-memory splits). Instead, cache_eviction_split is normal eviction splits, cache_inmem_split is in-memory -splits, - -commit 3d1f9eace79b1aff84369d0caee245f9d6d96a60 -Author: Alex Gorrod -Date: Mon Feb 9 06:25:36 2015 +0000 - - Add a mode to LSM where we can limit the size of data in the tree. - - A feature request to allow for a high insert throughput into a table with a size limitation. - - Adds a new configuration option to WT_SESSION::create which is lsm=(chunk_count_limit=0), default to 0 which is disabled. - - Refs #1652 - -commit c63ba34c915d95c156aaf6c47a04fe6d361b91ad -Author: Michael Cahill -Date: Mon Feb 9 14:07:57 2015 +1100 - - Don't double-count the on-disk header size when setting split boundaries. - - refs #1655 - -commit 152a0efdbd3ea66b142f52eed3c9224437143eec -Author: Alex Gorrod -Date: Mon Feb 9 12:25:10 2015 +1100 - - Fix a bug in table create. A crash could cause recovery to break. - - Refs SERVER-17204 - - The bug is that we weren't doing an fsync of the file after it was created. Recovery assumes that if there are records for a particular file, then it will exist on disk. - -commit 4d50f5878073e582567848ae03ee506bb5058227 -Author: Alex Gorrod -Date: Mon Feb 9 00:43:27 2015 +0000 - - Remove obsolete updates every time we add a new update. - - We used to only do the check when the cache was full. That could lead to update chains growing immensely long, which is bad. - - Refs: #1647, SERVER-17195 - -commit e891a1f312850bcaaf5183f3fd2e091567044a96 -Author: Keith Bostic -Date: Sun Feb 8 17:59:49 2015 -0500 - - If we find a "removed" page, clean or dirty, leaf or internal, fast-path eviction, it helps with append-only workloads. - -commit ab2a7e9b397adf657081458e11f3dc472b10fd2b -Author: Keith Bostic -Date: Thu Feb 5 15:54:01 2015 -0500 - - There's a problem that went in in #1282, the key difference is that we are setting a split boundary at the end of the first page when there is more than a page worth of data. See also #1630 and #1631. This is an alternate approach to #1631: the real change is to fallthrough into the split case if the next item won't fit, callers of the split code can't handle failure from split, it has to create enough room for the next item to be entered into the buffer. - -commit 90a352717a45a40d047b33c9fb00e7174e1ae04f -Author: Susan LoVerso -Date: Thu Feb 5 12:59:08 2015 -0500 - - Initialize first_lsn if we have no logs. #1638 - -commit 7cc7efb75c90e778f9757b954ad3ec85912b58fd -Author: Don Anderson -Date: Thu Feb 5 12:20:55 2015 -0500 - - For wt printlog, make operations into a JSON array. Without that, any tool that parses JSON is almost certain to merge successive values of repeated fields. - Refs #1438. - -commit 5bf11d893548804b890836a3d9ef4335c4319bb7 -Author: Susan LoVerso -Date: Wed Feb 4 15:46:30 2015 -0500 - - Add name_hash and hash bucket queues for fh and block. #1643 - - SERVER-17078 - -commit 3b0c18f612c9cf4d61bc13785ff7125fa67b265a -Author: Alex Gorrod -Date: Wed Feb 4 06:27:07 2015 +0000 - - Keep filling pages in reconciliation until we hit a boundary. - - This reverts some of a change for #1282 (without reverting the functionality in that change, AFAI can tell). - - Refs #1630 - - -3.0-RC8 Feb 4 2015 ------------------- - -commit d8b7f0b8db92a2ad6d64b95cafeaf20f0a90c8ce -Author: Michael Cahill -Date: Wed Feb 4 16:00:11 2015 +1100 - - Updates should always mark pages dirty (before checking for obsolete updates to free). - -commit 0947f82e01587836277d911b147bc98eefb58507 -Author: Keith Bostic -Date: Tue Feb 3 10:28:00 2015 -0500 - - Fixes for split cache accounting: multi-page splits weren't correctly accounting for the allocated WT_REFs, insert splits weren't correctly handling the new right-page's instantiated key (the parent needs to be incremened by both the left- and right-hand page's keys, and cannot assume it's the same size as the original WT_REF's key), insert splits need to increment the parent page's WT_REF size by two, not one. - -commit df96addef5f3ffcb495b4bf54390cf3fd41ac924 -Author: Michael Cahill -Date: Mon Feb 2 16:45:46 2015 +1100 - - In recovery, track the maximum file ID in the metadata, regardless of whether there are any updates to roll forward. - - Previously, we tracked the largest file ID that was updated in the logs being rolled forward. It was usually the case that the most recently created file was also the most recently updated, so that calculation usually worked and wasn't detected until the repro in SERVER-17142 that created tables, did a clean shutdown and restart, then created more tables and did a dirty shutdown and restart, which was rolling forward updates into the wrong tables. - - refs SERVER-17142, SERVER-17131(?) - -commit 71f1559c91ed119082ebe42772da15e28915e1c8 -Author: Michael Cahill -Date: Tue Feb 3 10:40:27 2015 +1100 - - Start with clean trees so we can detect updates racing with sweep. - - Use a deleted ref to a leaf page that is created on first update, which is the same state the tree should be in if an empty leaf page is evicted. The only wrinkle is that bulk operations expect to find a leaf page in the tree: create it explicitly before the bulk insert starts. This was probably a bug before: if we had created a tree and kept it around for long enough with cache pressure before a bulk load started, the initial leaf page could have been evicted. - -commit 8545c4b3b7f5ed306215c82f1ad1cbe3664f0c50 -Author: Keith Bostic -Date: Mon Feb 2 17:13:09 2015 -0500 - - Make the "split to deepen the tree" configuration values real, stored - in the metadata file, but they remain undocumented for now. - -commit fb769dafee4aca91a60a28cd89317c268ac79d4f -Author: Keith Bostic -Date: Mon Feb 2 16:36:22 2015 -0500 - - WT_CELL_ADDR_DEL is 0, so we can't test vtype against 0 to know if it's - been set or not. Reference SERVER-16866. - -commit feca80738c1b9103b4faa04ddb0718344347f640 -Author: Susan LoVerso -Date: Mon Feb 2 13:53:21 2015 -0500 - - Wrap calls to functions using pindex with WT_WITH_PAGE_INDEX. - -commit 23f2e1ba0680a2e8fa7a081f1b46e1ae2ab220d4 -Author: Michael Cahill -Date: Mon Feb 2 17:18:03 2015 +1100 - - Once we decide to force-evict a page, do it directly rather than setting read_gen and hoping page release agrees. - -commit 5f00de07b5bad20a6ffb5ec7d412c4ca0b10c64f -Author: Michael Cahill -Date: Mon Feb 2 17:11:27 2015 +1100 - - split_gen paranoia: always increment split_gen once per split, use the allocated value to check for existing readers. Make sure that publishing a split_gen doesn't miss an update. - -commit 10a74d6af4f945e34368bc5754797ef1d684d8ab -Author: Michael Cahill -Date: Mon Feb 2 16:52:34 2015 +1100 - - If discarding a tree for sweep races with an update, give up the discard gracefully. - - refs #1618, SERVER-17048 - -commit a2d20dc49cac870977d91213a7fe6dabf362ce70 -Author: Michael Cahill -Date: Mon Feb 2 16:45:46 2015 +1100 - - In recovery, track the maximum file ID in the metadata, regardless of whether there are any updates to roll forward. - - Previously, we tracked the largest file ID that was updated in the logs being rolled forward. It was usually the case that the most recently created file was also the most recently updated, so that calculation usually worked and wasn't detected until the repro in SERVER-17142 that created tables, did a clean shutdown and restart, then created more tables and did a dirty shutdown and restart, which was rolling forward updates into the wrong tables. - - refs SERVER-17142, SERVER-17131(?) - -commit b0a828b262a2d0d3cf1361eed98aa25a1168a7a6 -Author: Keith Bostic -Date: Sat Jan 31 12:59:34 2015 -0500 - - We no longer calculate allocation overhead per allocation chunk, revert the workaround the problem with page memory size calculations during splits where we forced the new parent page memory size to 5% of its current value; reference #1564, #1565. This fixes a problem where 5% of a page's memory footprint isn't large enough to accommodate the cache decrements that will be done in the page's future, leading to page underflow. - - Minor cleanups: we no longer calculate allocation overhead per allocation chunk, the macro WT_MEMSIZE_ADD is no longer needed at all, and WT_MEMSIZE_TRANSFER is renamed to WT_MEM_TRANSFER. - -commit b640366c28fc66744e482c20c16973cb052aef8e -Author: Keith Bostic -Date: Fri Jan 30 10:19:31 2015 -0500 - - I believe we can race with pages being marked clean or dirty, which means we need to entirely divorce the page's dirty-byte count from page state: the page's dirty-byte count is just a value that tells us how many dirty bytes this page has contributed to the cache's total dirty-bytes count. Sync the cache's information to that value when possible, but don't worry if we can't. - -commit d02ea7246ec33e05b5fd60c499fea3ffe25c57d2 -Author: Michael Cahill -Date: Fri Jan 30 17:38:09 2015 +1100 - - Use reads to measure cache pressure with shared caches. We previously tracked writes, which were skewed by checkpoints. - - refs #1569 - -commit a326c3ba10e0d299944a650b890f8c2d851db34a -Author: Keith Bostic -Date: Thu Jan 29 17:19:06 2015 -0500 - - Simplify the cache calculation when a page is marked clean, use the page's dirty-byte count (which allows a race between the page being marked clean and being re-dirtied). - - This branch is still not correct, but appears to be able to run the CONFIG from #1582 without underflow for a much longer time). - - Reference #1605. - -commit 1c60c4966dd68ea2bf05ebe62e3f1d8de1a7d033 -Author: Michael Cahill -Date: Thu Jan 29 14:33:36 2015 +1100 - - Use a copy of the oldest transaction ID when sweeping cached overflow items. Otherwise, we could free structures that are still hooked into the skiplist. - - refs #1615 - -commit 42724267278c64f5af68b281c9ee5742d1a56d72 -Author: Susan LoVerso -Date: Wed Jan 28 10:31:51 2015 -0500 - - Adjust logging yield and timeout values. #1610 - -commit ae102f4fe604f7fd547dece8ee138e8292d4f07c -Author: Michael Cahill -Date: Wed Jan 28 17:40:06 2015 +1100 - - Cleanup accounting for update lists when restoring updates to evicted pages. Previously, we only accounted for the first update in a list. - - refs SERVER-16997 - -commit 4adf9c929b1b46f273239214b4e2757fcfdb8f96 -Author: Mark Benvenuto -Date: Fri Jan 23 18:29:15 2015 -0500 - - Windows Install Documentation - -commit 8faa218d27e7f21091f0b51a973f27047db1d950 -Author: Mark Benvenuto -Date: Tue Jan 27 13:47:24 2015 -0500 - - MCI configuration update - -commit 422cbb6cea5fa5be6829044215ae46dc10be5f70 -Author: Mark Benvenuto -Date: Mon Jan 26 16:11:29 2015 -0500 - - Add Install Target to SCons - -commit 41e7ab083d79a650e93a34d09e01e973ca4100d9 -Author: Mark Benvenuto -Date: Fri Jan 23 15:54:40 2015 -0500 - - WiredTiger DLL support - - Examples that only depend on public API use DLL now - -commit 23b2493e75cd166075eaccdaef75c8beee4576db -Author: Mark Benvenuto -Date: Fri Jan 23 15:50:16 2015 -0500 - - Scons Improvements - - Added --enable-attach, --enable-diagnostic, --enable-verbose - - Renamed --enable-swig to --enable-python for consistency + swig cleanup - - Renamed wiredtiger static library to libwiredtiger.lib - -commit 96ab0ef67eee20fa75fa6d52c97d98bc119b74ae -Author: Mark Benvenuto -Date: Thu Jan 15 15:07:01 2015 -0500 - - Struct alignment and packing for MSVC - -commit f3b65997ece52382eed91730416d5f919bea79cd -Author: Susan LoVerso -Date: Fri Jan 9 10:49:59 2015 -0500 - - Fix huffman config and add huffman tests. #1536 - -2.8-RC7 Jan 27 2015 -------------------- - -commit 2b4172f17008ff36dbeb50cadaf4fb97fc859e4e -Author: Michael Cahill -Date: Tue Jan 27 15:50:09 2015 +1100 - - Revert a workaround for splits during truncate. - - refs #1583, #1563, SERVER-16868 - -commit c2e108e2774ae79504579bcdca33f26fcff8cb07 -Author: Michael Cahill -Date: Tue Jan 27 09:58:32 2015 +1100 - - Change recovery to start from the checkpoint LSN in the metadata. Don't assert that we see a checkpoint complete in the available log: if the application crashes in between syncing the metadata and writing the final checkpoint record, there is no need to roll anything forward but we don't have the final checkpoint. - - refs #1529 - -commit 2555e80d2020ba9833c436a22d1031f6c5778a64 -Author: Keith Bostic -Date: Mon Jan 26 14:31:25 2015 -0500 - - Coverity CID 50796 (#1 of 1): Unintentional integer overflow (OVERFLOW_BEFORE_WIDEN) - SERVER-17001 - -commit 1ce3b94d6e40d37a77e62eda500f286bd3816eb9 -Author: Michael Cahill -Date: Mon Jan 26 15:56:25 2015 +1100 - - Grab the table list lock while building the list of handles to checkpoint. - - This avoids a potential deadlock during compact operations and/or checkpoints with a target list (and an assertion about lock ordering in diagnostic builds). - - Note that nested locking is not ideal: the medium-term fix here is #1598. - - refs #1589, SERVER-16967 - -commit db3943563a87c3e4c42445ae9f3a07efacfdf4ac -Author: Michael Cahill -Date: Mon Jan 26 14:54:47 2015 +1100 - - Free WT_REFs deleted by truncate. We were doing this when a page spontaneously became empty, but not if the "fast truncate" code kicked in. - - refs SERVER-16921 - -commit 2063efb22c3c29b980f86f7fee77b6d03ba63ec1 -Author: Keith Bostic -Date: Fri Jan 23 16:21:06 2015 -0500 - - Don't count pages evicted by a worker thread as an "application thread" eviction; add a new statistic to distinguish between the server itself evicting pages and the eviction worker threads evicting. - - Don't increment the eviction counters unless we find a page to evict, __evict_lru_pages() gets called a huge number of times in any workload where eviction is happening. - - Reference SERVER-16997, SERVER-17020. - -commit 3abb99d58aaa46b0b3fcd338293a668422e3fcaf -Author: Mark Benvenuto -Date: Fri Jan 23 15:05:16 2015 -0500 - - Close Thread Handle after thread join on Windows - -commit 7d677aedfdcaa5458e900e556b662def460d0281 -Author: Don Anderson -Date: Fri Jan 23 08:52:13 2015 -0500 - - Fix drop index on a newly opened session. - Fix __wt_schema_open_index to return WT_NOTFOUND when opening a single index. This fixes opening a cursor on a non-existant index. - Refs #1567. - -commit 3626081dff24e1448281d10658752b996897ca82 -Author: Keith Bostic -Date: Thu Jan 22 18:08:15 2015 -0500 - - Add the cache_overhead configuration string to allow applications to configure their cache overhead. - -commit 4843cd78e7f90937ebdb23f84fbd7c133a7e5256 -Author: Don Anderson -Date: Thu Jan 22 10:40:11 2015 -0500 - - Prepend underscores to SWIG methods that could have name conflicts - with WT internal names. refs #1574. - -commit ebb1d9402c0ce2911069b0437d71766b92c3dc12 -Author: Susan LoVerso -Date: Wed Jan 21 12:57:20 2015 -0500 - - Add log code to ensure write-no-sync. #1585 - -commit 44fa4fbff95d0689b20c3fe3f4a55202554f0d9f -Author: Keith Bostic -Date: Mon Jan 19 14:25:39 2015 -0500 - - Make compact more aggressive about finding blocks to move. - -2.8-RC6, Jan 20 2015 --------------------- - -commit ab1d63d3aa2371ce53287c6c6c77833eb281a38a -Author: Susan LoVerso -Date: Tue Jan 20 15:37:46 2015 -0500 - - Check for valid log_fh handle in wt_log_write. #1580 - -commit e2de971061abea9451e92d60f0870136c9c0af42 -Author: Keith Bostic -Date: Tue Jan 20 13:24:06 2015 -0500 - - Quit page eviction immediately if we're trying to evict a tree, that is, an internal page that has other internal pages as children. - -commit 6f3c5a933ef8ce79efc03a22a8c03526ffb2197b -Author: Keith Bostic -Date: Mon Jan 19 12:38:24 2015 -0500 - - The size of the file is decreasing each time, so compacting 10 times, at 10%, is not sufficient to drive a file to its smallest size. The right fix is probably to get better information from the block manager as to exactly how much the size of the file has decreased, but that's messy, especially when you consider the checkpoints requires to get to that smallest size. For now, do 100 compaction attempts instead of 10, and depend on the no-progress state and/or the compaction timeout to limit the amount of time we spend here. - -commit 72172b088fba6769866aecabba8176303140f5c4 -Author: Keith Bostic -Date: Mon Jan 19 10:25:13 2015 -0500 - - Coverity 1264611, memory leak (WT_RET that should have been a WT_ERR). - -commit f61f984cf5241ac54bc2ea368c8c15e0cdfa91aa -Author: Alex Gorrod -Date: Sat Jan 17 22:25:02 2015 +0000 - - Fix a deadlock opening statistics cursors. - - Refs #1575 and JIRA SERVER-16738 - -commit c5fa51a0f18e4117d9f7b841de86eb35af751264 -Author: Susan LoVerso -Date: Sat Jan 17 09:07:04 2015 -0500 - - Log close thread needs to wait for any outstanding writes. #1571 - -commit 9cd8120f491595ad6ac1c25c4b154ad6556b5fe7 -Author: Michael Cahill -Date: Sat Jan 17 09:49:09 2015 +1100 - - Close the session for the log close server thread. Fixes a leak detected by address sanitizer. - -commit bd7364ea9a0542bee61db0a89e771faf814f6f53 -Author: Alex Gorrod -Date: Fri Jan 16 21:03:55 2015 +0000 - - Fix a bug in raw compression, where we were overflowing memory. - - We weren't growing the buffer enough when adding new items in. - - Refs SERVER-16664 - -commit 76addf73581c53f24462ab5fd724048aec36eaf3 -Author: Michael Cahill -Date: Sat Jan 17 05:48:03 2015 +1100 - - Have WT_CURSOR::equals return 1 when cursors are equal, 0 when not. - -commit b2841dfc015d9502e1def870605968144b935570 -Author: Susan LoVerso -Date: Thu Jan 15 21:58:39 2015 -0500 - - Add log thread to fsync and close log files. #1560 - -commit ebb93969ebfb6b9bb9dc60621933f2fbeac4b472 -Author: Keith Bostic -Date: Thu Jan 15 22:47:52 2015 +0000 - - Don't do memory adjustments for the WT_REF's WT_ADDR structures, we don't do those adjustments in other places we set addresses. - - Workaround the problem with page memory size calculations during splits by forcing the new parent page memory size to 5% of its current value; reference #1564. - - Minor cleanups/renaming of the code instantiating the WT_REF structures during a tree-deepening split to clarify what's going on there. - -commit e0031209183c880fb1a1b99399013e7675a75e88 -Author: Alex Gorrod -Date: Fri Jan 16 09:26:34 2015 +1100 - - Don't look at a page after it may be freed during split. - - During the process of doing a split we switch the ref to WT_REF_MEM - after which it's no longer safe to refer to the page. Shuffle the code so that we don't. - - SERVER-16868 - -commit b6d7532cbf823d537b8f1733169fe4de08173c09 -Author: Susan LoVerso -Date: Thu Jan 15 16:55:00 2015 -0500 - - Only advance sync_lsn to the end of our write. We waited until the log->sync_lsn is advanced into our file. It was a bug to set the sync_lsn to the current write_lsn as that can be too far ahead in a new log file when earlier log files aren't done yet. - -commit 85851933a938c53dfa57d1621cab1a959db672eb -Author: Thomas Rueckstiess -Date: Mon Dec 15 11:04:43 2014 +1100 - - wtstats.py: removed python-nvd3 dependencies, rewriting with HTML template - -commit 4c26d2324bae1d7030b0142d50dbd2ccf11ddeb6 -Author: Keith Bostic -Date: Thu Dec 11 19:32:50 2014 -0500 - - Add support for a WT_CURSOR.reconfigure method, reference #1381. - -2.8-RC5 Jan 15 2015 -------------------- - -commit 2e54a27683c5e2fd88918575383c76d3f60c3c78 -Author: Michael Cahill -Date: Thu Jan 15 07:17:21 2015 +1100 - - Workaround a read-after-free involving eviction during truncates. We were implicitly relying on first_dirty_txn to prevent pages being immediately force-evicted by truncate. The bug is not fully understood, but this change restores the previous window where reads can complete before the page is evicted for real. - - refs BF-759 - -commit 8a1bfe3c35f0c1d90ea3e8e70c2aae8dff1fdbb3 -Author: Susan LoVerso -Date: Wed Jan 14 15:07:39 2015 -0500 - - Force log file closes to go in sequence. #1555 - - Update the sync_lsn after sync'ing and closing an earlier log file and make sure archive doesn't try to remove a file that is still in use. - -2.8-RC4 Dec 22 2014 -------------------- - -commit fbb96d94cdba9a28f5c5d737ce6c96543f3289f4 -Author: Michael Cahill -Date: Mon Dec 22 15:59:46 2014 +1100 - - Use the original page's first_dirty_txn when restoring updates to match what we do for in-memory splits. - - refs #1475 - -commit 4df72e8e20139ddf667e1f0d3b6b7dcf91deb006 -Author: Michael Cahill -Date: Mon Dec 22 13:12:07 2014 +1100 - - Avoid EBUSY returns to verify and salvage caused by checkpoints. The "fix" involves blocking checkpoints while salvage or verify are in progress. - - refs #1404, SERVER-16457 - -commit 864f3495721b1311b49df19ee241bfca9adf0863 -Author: Keith Bostic -Date: Sun Dec 21 20:47:52 2014 -0500 - - Make the cache bytes-written and bytes-read match, both should - ignore compression. Reference #1505. - -commit 995d6f8c26ae19013a1eb921fd871481ca643f47 -Author: Michael Cahill -Date: Mon Dec 22 12:42:46 2014 +1100 - - Eviction should do update-restore if upper layers are trying to force out a page, regardless of its size. Also, only look at ref->page after checking for exclusive access. It is possible (but very unlikely) that a child page pointer could be replaced in the window where we are checking hazard pointers. - -commit d4abc51ea61211f90f4b70a0486442264ededc27 -Author: Alex Gorrod -Date: Mon Dec 22 10:43:04 2014 +1100 - - Fix a bug where a custom extractor terminate was being called twice. - - Resolves issue #1503. - - Clarify the custom extractor and collator terminate documentation while I'm here. - -commit 16972ef63de1283d85146530c35f522b053e2c1e -Author: Don Anderson -Date: Fri Dec 19 09:56:47 2014 -0500 - - Remove version numbering from the pkg-config file. We don't create include files that are named by version. Programs linked using -lwiredtiger will follow the symlink to wiredtiger-a.b.c.so, so their referred library name is forever stamped as wiredtiger-a.b.c.so, which won't conflict even when we ship wiredtiger-a.b.d.so. - Refs #1458. - -commit e913b0811114d65b543cd78824e809eb487fd330 -Author: Michael Cahill -Date: Fri Dec 19 17:19:58 2014 +1100 - - Check that handles are not being walked by eviction before discarding. - - refs #1497 - -commit 0d21e437917bc7cf08393852a3074957431ea30e -Author: Alex Gorrod -Date: Fri Dec 19 15:44:15 2014 +1100 - - Use the eviction server to write pages with READGEN_OLDEST set. - - Even before the eviction trigger has been reached. This should mean that we clear those pages out of cache earlier, and hopefully will save application threads from doing the evictions (at least sometimes). - -commit e0adfba3c4011c49b73ff3e4a165a4a938f69cb3 -Author: Michael Cahill -Date: Fri Dec 19 12:23:21 2014 +1100 - - Don't try to write leaves from the sweep server. Previously, this was done without locking the handle, and so could race with LSM discarding a handle. - - We know the handle has been idle, so there is a good chance that a checkpoint has run since the last update and the write leaves was wasted effort. If not, this change will keep the handle locked for longer preventing new opens, but it has been idle for a while anyway. - - refs #1495, #1497 (maybe) - -commit 87328a8d5c1d4a201a1df604ba32a87863948bbb -Author: Mark Benvenuto -Date: Thu Dec 18 14:51:51 2014 -0500 - - fix test_salvage on Windows - -commit 1953776ada137f3deae50169bf889d2063b353d3 -Author: Michael Cahill -Date: Thu Dec 18 16:45:06 2014 +1100 - - Don't try to set and clear session->split_gen in WT_INTL_FOREACH_BEGIN: there are too many of those loops to ensure that none of them skip clearing it. Instead, make sure all calls are wrapped in WT_WITH_APAGE_INDEX. - - refs SERVER-16546 - -commit 235f747e2df80d9899497595a2b649e7d6df8601 -Author: Mark Benvenuto -Date: Wed Dec 17 14:11:41 2014 -0500 - - snprintf - Implement a custom version of snprintf match the truncation behavior of C99 standard snprintf until MSVC supports snprintf. - -commit 857a6fd0c4b6b001c78cbbc507674e2129029dff -Author: Keith Bostic -Date: Wed Dec 17 15:51:41 2014 -0500 - - Check the block header checksum before we clear it, it should be the same as the passed-in checksum, and if just those 4 bytes are corrupted, we wouldn't noticed. SERVER-16457. - -commit dfa706056c4a359f7f894047bc9e5399efcec776 -Author: Don Anderson -Date: Tue Dec 16 15:58:34 2014 -0500 - - Some refactoring of python packing. More checks for standalone unit tests. - Refs #1429. - -2.8-RC3 Dec 17 2014 -------------------- - -commit bb064847e1c45f2b396d3f65f4e08cd10f33ed6e -Author: Michael Cahill -Date: Wed Dec 17 15:45:23 2014 +1100 - - Detect write-write conflicts before no-overwrite cursors decide to skip an update. - - refs SERVER-16351 - -commit 91abf8e35d5246a653bd9615ffd9723d87999c38 -Author: Alex Gorrod -Date: Wed Dec 17 13:36:02 2014 +1100 - - Add support for none configuration string to log compressor. - - To be consistent with block compression configuration. - -commit 5438fee4942b4dbf484799dad6e12e042d253e99 -Author: Alex Gorrod -Date: Wed Dec 17 12:09:33 2014 +1100 - - Return an error if a shared cache configuration is set, but not enabled. - - Check for a configuration via shared_cache=(size=). - - Refs #1487 - -commit 390a5b71b25492dc3030e908a65a11a04401852b -Author: Keith Bostic -Date: Tue Dec 16 15:32:09 2014 -0500 - - We documented that huffman_key and huffman_value took "none" as an argument, but they didn't. - - Worse, if key was set but not value, we'd set value anyway, free of charge, and if value was set but not key, we'd fail. I doubt this is a problem (it's pretty surprising if anyone would set key but not also set value). - - Use __wt_config_gets_none() to support the "none" setting, re-work the logic to ignore keys or values that are zero-length. - - Reference #1417. - -commit 662e26eeb31f76f2c4aeebf6690c9056612de32e -Author: Susan LoVerso -Date: Tue Dec 16 14:20:01 2014 -0500 - - Atomically create all log files and move them into place. #1482 - -commit 5c30d62dbf7ec0976d6ec4d2aed4ba272aadd499 -Author: Keith Bostic -Date: Mon Dec 15 19:41:05 2014 -0500 - - If we race with the logging thread and get to __wt_logmgr_destroy() while __log_archive_once() is still using conn->log_path, we can free it out from under the running server. If there's a logging thread running, don't free conn->log_path until we've joined that thread. - Reference #1480. - -commit d77d35db407fd74c266bdb728b12c74fdab26ba2 -Author: Keith Bostic -Date: Mon Dec 15 18:33:54 2014 -0500 - - Even if we don't track any overflow pages during our read of the file, we still have to process the list of pages looking for leaf pages that reference unavailable overflow pages, no overflow pages doesn't imply there are no references to overflow pages. - -commit cedf8cfe69bf964629aab498feb20a0b1ab77bc0 -Author: Don Anderson -Date: Mon Dec 15 11:54:15 2014 -0500 - - Fix use of 'compressed' flag for printlog. - Added printlog call to test case for log compression. - Refs #1472. - -commit 3210b11cf7bfb79f3ed52cd1c17a13c644a82e7a -Author: Susan LoVerso -Date: Mon Dec 15 11:02:10 2014 -0500 - - Fix memory leak. Always free log_path. #1473 - -commit 097c61e5f3326bc71f2d645b3f539c5c6d1ae3fb -Author: Alex Gorrod -Date: Mon Dec 15 05:58:06 2014 +0000 - - Allow printlog to work without recovery. - - It now works even if compression or a custom path are setup. - -commit b8921272755ce66d09ab2a001745573420bd41ac -Author: Keith Bostic -Date: Wed Dec 10 15:40:06 2014 -0500 - - We can't use the corrected page size to calculate the buffer's space available, but we don't have a page size either. We do know how much space we added, so use that to increment the space available. - - The raw compression handler can no longer pass a page size buffer to the underlying compression function, because a single key/value object could be larger than the page size, instead, pass a buffer of the same length as the source to compress, whatever that is. - -commit 402041727de02931be1dd385f3c970f31a53341c -Author: Michael Cahill -Date: Wed Dec 10 10:53:27 2014 +1100 - - Remove the min / max bounds on overflow sizes. - -commit 5088ee53fce569915e8de8c168da50cff7991ec1 -Author: Keith Bostic -Date: Tue Dec 9 16:22:20 2014 -0500 - - Separate the btree maximum key/value sizes from the underlying page size, reference #1282. - - Deprecate the internal_item_max and leaf_item_max configuration strings, replace with internal_key_max, leaf_key_max and leaf_value_max. - - Remove examples/c/ex_file.c (there's no real need for a "file" URI example, and it's easy to replace the one place the documentation used it). - -commit e1e187e8fdfb48526f2a62e3f0f48072c30db53e -Author: Michael Cahill -Date: Tue Dec 16 10:29:08 2014 +1100 - - Update the swept handles statistic any time we close the underlying handle, regardless of whether a session still references the data handle. - - reds #1460, #1466 - -commit 080b34fde5de97459c383c67ba93d9fdc88090a8 -Author: Don Anderson -Date: Fri Dec 12 11:13:22 2014 -0500 - - Allow pruning scenarios with different limits for default vs. long runs. The pruned scenario list now matches the original ordering. Tests now print with both the scenario number and the scenario args. This should make it easier to diagnose and debug problems that effect only certain scenarios. Refs #1461. - -commit d5b88e08e2f6e39d098cfff3c013f4aa035c88bc -Author: Don Anderson -Date: Thu Dec 11 15:13:11 2014 -0500 - - Changed python test suite to allow for shorter runs by default, with a --long (or -l) option for the complete runs. Txn02 in particular now runs only a small number of scenarios by default. Also added a @longtest("description") decorator for individual tests that can be marked to be run only under --long. Refs #1461. - -commit 2f37332e5bbd14823f0c78ad38672dbce074e87f -Author: Alex Gorrod -Date: Fri Dec 12 17:21:50 2014 +1100 - - Ensure metadata table is open at start of checkpoints. - -commit 2cb10882f4f7189a3c2de4d7e187117873fded32 -Author: Alex Gorrod -Date: Fri Dec 12 17:07:11 2014 +1100 - - Switch to stashing the metadata dhandle, not the btree. - - Also update checkpoint to use the saved handle instead of - searching for it again. - -commit 42c05161cf8cc74606b259ceeeb41dd38ea7fb4e -Author: Susan LoVerso -Date: Thu Dec 11 15:24:14 2014 -0500 - - Use dhandle hash lists in more places. Adjust sweep timings. #1460 - -commit 7fb6315e45e74f0bef0a04505018e1ab0b68d144 -Author: Michael Cahill -Date: Thu Dec 11 23:06:19 2014 +1100 - - If LSM search_near finds a matching tombstone, step the whole LSM cursor next to find the closest key. We can't step individual chunk cursors, or we could return a record that is deleted in a more recent chunk. - - MongoDB BF-694, BF-700 - -commit 5f6bbc898564aefb312255555abd34202cb98815 -Author: Michael Cahill -Date: Wed Dec 10 14:39:25 2014 +1100 - - Track whether eviction is making progress regardless of whether the cache is 100% full. Otherwise we can get into a tight loop. Use the count of pages evicted rather than a flag, now that there are multiple eviction threads. - -commit cbe9e9bdbc508f95076b8097d41bb4cc799eab1c -Author: Don Anderson -Date: Tue Dec 9 19:11:52 2014 -0500 - - Change timing to allow archive thread to complete on tests that - do archive. Since this can make each test run substantially longer, - reduce the number of tests that are doing archive from ~4000 to - something under 100. Refs #1452. - -commit 62af85890179abb9fda17a619fcd5ae69fb369e0 -Merge: b83bf08 b24c7af -Author: Michael Cahill -Date: Wed Dec 10 11:03:48 2014 +1100 - - Merge pull request #1449 from wiredtiger/lsm-switch-simplify - - Improve and simplify the LSM switch logic - -commit 7e0f7d7b803f9af04ad10b2bec6ef5073aa79248 -Author: Don Anderson -Date: Tue Dec 9 16:04:48 2014 -0500 - - SESSION->drop with "force" of nonexistant index/colgroup should be silent. - Refs #1436. - -commit be364821d75c0c42169d79c486fa582c777f7082 -Author: Michael Cahill -Date: Tue Dec 9 15:58:28 2014 +1100 - - Sweep old handles more aggressively: - - 1. don't have checkpoint or other periodic operations like statistics logging keep old handles alive; - 2. don't wait for all sessions to empty the file from their cache before closing; - 3. only update the time of death from the sweep thread. - -commit c96a4c954ccc73744f8a1fbcf2fea6debdfca018 -Merge: cc8eb0b 2e332b9 -Author: Alex Gorrod -Date: Tue Dec 9 14:41:36 2014 +1100 - - Merge pull request #1443 from wiredtiger/cursor-open-optimize - - Cursor open optimize - -commit 993c8ede8ff64eac9e87d1adcd39f8575039222b -Author: Michael Cahill -Date: Tue Dec 9 13:21:13 2014 +1100 - - Improve and simplify the LSM switch logic: it is fine to keep writing into a chunk while it is being switched, until either it hits the hard chunk size limit or a switch transaction ID is chosen that is larger than the writer's. Fixes an assertion failure introduced by #1432 that could write to an old chunk after the switch completed. - - refs #1432, #1418 - -commit 5551461cd5f26249e4330c9f87b4945d7ec2bb34 -Author: Alex Gorrod -Date: Mon Dec 8 18:01:31 2014 +1100 - - If there are only two LSM worker threads, don't let the cache get full. - - Allow the first thread to do flushes as well as switches and drops if there are only two threads. - - Refs #1441, but this is a hang seen from that test/format configuration, not a segfault. - -commit 8f06d6b79dabed54ad1e05515bbdb31e23c4e991 -Author: Don Anderson -Date: Fri Dec 5 14:22:41 2014 -0500 - - Modify printlog output so that arbitrary strings are shown as using the JSON Unicode standard. refs #1438. - -commit 68090796dea07e7b2d3d5bee8d69aafcd8febe16 -Author: Alex Gorrod -Date: Fri Dec 5 04:36:25 2014 +0000 - - Avoid string comparisons when looking up tables. - - Save a hash value in the table so we can do integer comparisons rather than string comparisons. - -commit 4de5e3a71bfad1c2a9ef1eccccdd45ec02fecba9 -Author: Michael Cahill -Date: Fri Dec 5 13:01:51 2014 +1100 - - Force eviction if we see many consecutive deletes when scanning through a page. This fixes quadratic behavior in find-first+delete workloads. - -commit bbced52c939e16ad5662b3a177cef3e52abddd6e -Author: Keith Bostic -Date: Thu Dec 4 07:57:13 2014 -0500 - - In the final close, continue and remove the handle no matter what errors we see, otherwise the handle-close code in __wt_conn_dhandle_discard() can become infinite loops, where we repeatedly attempt to close the same file handles. Reference #1434. - -commit 46fa7f0b6397fe765c5e8c2853f9cd0b067bc808 -Author: Keith Bostic -Date: Wed Dec 3 13:46:01 2014 -0500 - - Changes in #1204, #1288 mistakenly changed the values for some error defines, which breaks backward compatibility. (The WT_DEADLOCK error name sorted differently from WT_ROLLBACK, and we were assigning error values based on the sort order in a script.) Revert the change, and make sure it doesn't happen again. - -commit 249e88485c75951a0584a7c7a8dd4b8f8b6a3382 -Author: Keith Bostic -Date: Wed Dec 3 06:25:16 2014 -0500 - - Support "none" in all configuration strings as an alternative to an empty string. Reference #1417. - -commit 63d7c7869f8c2ab5a3e6ee935d1e37f21d40755f -Author: Don Anderson -Date: Tue Dec 2 14:00:11 2014 -0500 - - Added log compression. When configured, we attempt to compress each log record. Added printlog output to show before/after compression sizes. Refs #1359. - diff --git a/src/third_party/wiredtiger/README b/src/third_party/wiredtiger/README index 5056431c95b..07dde47feaf 100644 --- a/src/third_party/wiredtiger/README +++ b/src/third_party/wiredtiger/README @@ -1,6 +1,6 @@ -WiredTiger 2.7.0: (November 19, 2015) +WiredTiger 2.7.1: (December 8, 2015) -This is version 2.7.0 of WiredTiger. +This is version 2.7.1 of WiredTiger. WiredTiger release packages and documentation can be found at: @@ -8,7 +8,7 @@ WiredTiger release packages and documentation can be found at: The documentation for this specific release can be found at: - http://source.wiredtiger.com/2.7.0/index.html + http://source.wiredtiger.com/2.7.1/index.html The WiredTiger source code can be found at: diff --git a/src/third_party/wiredtiger/RELEASE_INFO b/src/third_party/wiredtiger/RELEASE_INFO index 1204e262af2..d2c7995910e 100644 --- a/src/third_party/wiredtiger/RELEASE_INFO +++ b/src/third_party/wiredtiger/RELEASE_INFO @@ -1,6 +1,6 @@ WIREDTIGER_VERSION_MAJOR=2 WIREDTIGER_VERSION_MINOR=7 -WIREDTIGER_VERSION_PATCH=0 +WIREDTIGER_VERSION_PATCH=1 WIREDTIGER_VERSION="$WIREDTIGER_VERSION_MAJOR.$WIREDTIGER_VERSION_MINOR.$WIREDTIGER_VERSION_PATCH" WIREDTIGER_RELEASE_DATE=`date "+%B %e, %Y"` diff --git a/src/third_party/wiredtiger/bench/wtperf/config.c b/src/third_party/wiredtiger/bench/wtperf/config.c index 6b0ce47ef3f..808e85eedae 100644 --- a/src/third_party/wiredtiger/bench/wtperf/config.c +++ b/src/third_party/wiredtiger/bench/wtperf/config.c @@ -46,7 +46,6 @@ static void config_opt_usage(void); #define STRING_MATCH(str, bytes, len) \ (strncmp(str, bytes, len) == 0 && (str)[(len)] == '\0') - /* * config_assign -- * Assign the src config to the dest, any storage allocated in dest is @@ -55,6 +54,7 @@ static void config_opt_usage(void); int config_assign(CONFIG *dest, const CONFIG *src) { + CONFIG_QUEUE_ENTRY *conf_line, *tmp_line; size_t i, len; char *newstr, **pstr; @@ -97,6 +97,18 @@ config_assign(CONFIG *dest, const CONFIG *src) } TAILQ_INIT(&dest->stone_head); + TAILQ_INIT(&dest->config_head); + + /* Clone the config string information into the new cfg object */ + TAILQ_FOREACH(conf_line, &src->config_head, c) { + len = strlen(conf_line->string); + if ((tmp_line = calloc(sizeof(CONFIG_QUEUE_ENTRY), 1)) == NULL) + return (enomem(src)); + if ((tmp_line->string = calloc(len + 1, 1)) == NULL) + return (enomem(src)); + strncpy(tmp_line->string, conf_line->string, len); + TAILQ_INSERT_TAIL(&dest->config_head, tmp_line, c); + } return (0); } @@ -107,9 +119,17 @@ config_assign(CONFIG *dest, const CONFIG *src) void config_free(CONFIG *cfg) { + CONFIG_QUEUE_ENTRY *config_line; size_t i; char **pstr; + while (!TAILQ_EMPTY(&cfg->config_head)) { + config_line = TAILQ_FIRST(&cfg->config_head); + TAILQ_REMOVE(&cfg->config_head, config_line, c); + free(config_line->string); + free(config_line); + } + for (i = 0; i < sizeof(config_opts) / sizeof(config_opts[0]); i++) if (config_opts[i].type == STRING_TYPE || config_opts[i].type == CONFIG_STRING_TYPE) { @@ -181,6 +201,16 @@ config_threads(CONFIG *cfg, const char *config, size_t len) int ret; group = scan = NULL; + if (cfg->workload != NULL) { + /* + * This call overrides an earlier call. Free and + * reset everything. + */ + free(cfg->workload); + cfg->workload = NULL; + cfg->workload_cnt = 0; + cfg->workers_cnt = 0; + } /* Allocate the workload array. */ if ((cfg->workload = calloc(WORKLOAD_MAX, sizeof(WORKLOAD))) == NULL) return (enomem(cfg)); @@ -201,7 +231,7 @@ config_threads(CONFIG *cfg, const char *config, size_t len) if ((ret = wiredtiger_config_parser_open( NULL, groupk.str, groupk.len, &scan)) != 0) goto err; - + /* Move to the next workload slot. */ if (cfg->workload_cnt == WORKLOAD_MAX) { fprintf(stderr, @@ -308,7 +338,7 @@ err: if (group != NULL) (void)group->close(group); if (scan != NULL) (void)scan->close(scan); - + fprintf(stderr, "invalid thread configuration or scan error: %.*s\n", (int)len, config); @@ -560,16 +590,34 @@ err: if (fd != -1) int config_opt_line(CONFIG *cfg, const char *optstr) { + CONFIG_QUEUE_ENTRY *config_line; WT_CONFIG_ITEM k, v; WT_CONFIG_PARSER *scan; + size_t len; int ret, t_ret; + char *string_copy; + len = strlen(optstr); if ((ret = wiredtiger_config_parser_open( - NULL, optstr, strlen(optstr), &scan)) != 0) { + NULL, optstr, len, &scan)) != 0) { lprintf(cfg, ret, 0, "Error in config_scan_begin"); return (ret); } + /* + * Append the current line to our copy of the config. The config is + * stored in the order it is processed, so added options will be after + * any parsed from the original config. We allocate len + 1 to allow for + * a null byte to be added. + */ + if ((string_copy = calloc(len + 1, 1)) == NULL) + return (enomem(cfg)); + + strncpy(string_copy, optstr, len); + config_line = calloc(sizeof(CONFIG_QUEUE_ENTRY), 1); + config_line->string = string_copy; + TAILQ_INSERT_TAIL(&cfg->config_head, config_line, c); + while (ret == 0) { if ((ret = scan->next(scan, &k, &v)) != 0) { /* Any parse error has already been reported. */ @@ -643,6 +691,90 @@ config_sanity(CONFIG *cfg) return (0); } +/* + * config_consolidate -- + * Consolidate repeated configuration settings so that it only appears + * once in the configuration output file. + */ +void +config_consolidate(CONFIG *cfg) +{ + CONFIG_QUEUE_ENTRY *conf_line, *test_line, *tmp; + char *string_key; + + /* + * This loop iterates over the config queue and for entry checks if an + * entry later in the queue has the same key. If a match is found then + * the current queue entry is removed and we continue. + */ + conf_line = TAILQ_FIRST(&cfg->config_head); + while (conf_line != NULL) { + string_key = strchr(conf_line->string, '='); + tmp = test_line = TAILQ_NEXT(conf_line, c); + while (test_line != NULL) { + /* + * The + 1 here forces the '=' sign to be matched + * ensuring we don't match keys that have a common + * prefix such as "table_count" and "table_count_idle" + * as being the same key. + */ + if (strncmp(conf_line->string, test_line->string, + (size_t)(string_key - conf_line->string + 1)) + == 0) { + TAILQ_REMOVE(&cfg->config_head, conf_line, c); + free(conf_line->string); + free(conf_line); + break; + } + test_line = TAILQ_NEXT(test_line, c); + } + conf_line = tmp; + } +} + +/* + * config_to_file -- + * Write the final config used in this execution to a file. + */ +void +config_to_file(CONFIG *cfg) +{ + CONFIG_QUEUE_ENTRY *config_line; + FILE *fp; + size_t req_len; + char *path; + + fp = NULL; + + /* Backup the config */ + req_len = strlen(cfg->home) + 100; + if ((path = calloc(req_len, 1)) == NULL) { + (void)enomem(cfg); + goto err; + } + + snprintf(path, req_len + 14, "%s/CONFIG.wtperf", cfg->home); + if ((fp = fopen(path, "w")) == NULL) { + lprintf(cfg, errno, 0, "%s", path); + goto err; + } + + /* Print the config dump */ + fprintf(fp,"# Warning. This config includes " + "unwritten, implicit configuration defaults.\n" + "# Changes to those values may cause differences in behavior.\n"); + config_consolidate(cfg); + config_line = TAILQ_FIRST(&cfg->config_head); + while (config_line != NULL) { + fprintf(fp, "%s\n", config_line->string); + config_line = TAILQ_NEXT(config_line, c); + } + +err: free(path); + if (fp != NULL) + (void)fclose(fp); +} + /* * config_print -- * Print out the configuration in verbose mode. @@ -677,7 +809,7 @@ config_print(CONFIG *cfg) for (i = 0, workp = cfg->workload; i < cfg->workload_cnt; ++i, ++workp) printf("\t\t%" PRId64 " threads (inserts=%" PRId64 - ", reads=%" PRId64 ", updates=%" PRId64 + ", reads=%" PRId64 ", updates=%" PRId64 ", truncates=% " PRId64 ")\n", workp->threads, workp->insert, workp->read, diff --git a/src/third_party/wiredtiger/bench/wtperf/runners/btree-split-stress.wtperf b/src/third_party/wiredtiger/bench/wtperf/runners/btree-split-stress.wtperf new file mode 100644 index 00000000000..deb8c70d12f --- /dev/null +++ b/src/third_party/wiredtiger/bench/wtperf/runners/btree-split-stress.wtperf @@ -0,0 +1,10 @@ +conn_config="cache_size=2GB,statistics=[fast,clear],statistics_log=(wait=10),eviction=(threads_max=4,threads_min=4)" +table_config="type=file,leaf_page_max=8k,internal_page_max=8k,memory_page_max=2MB,split_deepen_min_child=250" +icount=200000 +report_interval=5 +run_time=300 +reopen_connection=false +populate_threads=2 +value_sz=256 +read_range=100 +threads=((count=4,inserts=1,throttle=100000),(count=8,reads=1)) diff --git a/src/third_party/wiredtiger/bench/wtperf/runners/mongodb-oplog.wtperf b/src/third_party/wiredtiger/bench/wtperf/runners/mongodb-oplog.wtperf index 34235f04518..105e8c4f5bb 100644 --- a/src/third_party/wiredtiger/bench/wtperf/runners/mongodb-oplog.wtperf +++ b/src/third_party/wiredtiger/bench/wtperf/runners/mongodb-oplog.wtperf @@ -8,4 +8,4 @@ run_time=500 populate_threads=1 # Setup three threads to insert into the oplog # Setup one thread to be doing truncates from the oplog -threads=((count=3,inserts=1,throttle=2000),(count=1,truncate=1,truncate_pct=10,truncate_count=50000)) +threads=((count=3,inserts=1,throttle=4000),(count=1,truncate=1,truncate_pct=10,truncate_count=50000)) diff --git a/src/third_party/wiredtiger/bench/wtperf/runners/wtperf_run.sh b/src/third_party/wiredtiger/bench/wtperf/runners/wtperf_run.sh index ac31c2a2e78..7a1ad44f39c 100755 --- a/src/third_party/wiredtiger/bench/wtperf/runners/wtperf_run.sh +++ b/src/third_party/wiredtiger/bench/wtperf/runners/wtperf_run.sh @@ -12,16 +12,27 @@ # This script should be invoked with the pathname of the wtperf test # config to run and the number of runs. # -if test "$#" -ne "2"; then +if test "$#" -lt "2"; then echo "Must specify wtperf test to run and number of runs" exit 1 fi wttest=$1 runmax=$2 +# Jenkins removes the quotes from the passed in arg so we may +# have 3 or 4 args. +wtarg="" +wtarg2="" +if test "$#" -gt "2"; then + wtarg=$3 + if test "$#" -eq "4"; then + wtarg2=$4 + fi +fi home=./WT_TEST outfile=./wtperf.out rm -f $outfile +echo "Parsed $# args: test: $wttest runmax: $runmax args: $wtarg $wtarg2" >> $outfile # Each of these has an entry for each op in ops below. avg=(0 0 0 0) @@ -77,7 +88,7 @@ run=1 while test "$run" -le "$runmax"; do rm -rf $home mkdir $home - LD_PRELOAD=/usr/lib64/libjemalloc.so.1 LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/lib ./wtperf -O $wttest + LD_PRELOAD=/usr/lib64/libjemalloc.so.1 LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/lib ./wtperf -O $wttest $wtarg $wtarg2 if test "$?" -ne "0"; then exit 1 fi diff --git a/src/third_party/wiredtiger/bench/wtperf/wtperf.c b/src/third_party/wiredtiger/bench/wtperf/wtperf.c index 9ac96862fa1..5386096d9b7 100644 --- a/src/third_party/wiredtiger/bench/wtperf/wtperf.c +++ b/src/third_party/wiredtiger/bench/wtperf/wtperf.c @@ -60,6 +60,7 @@ static const CONFIG default_cfg = { 0, /* total seconds running */ 0, /* has truncate */ {NULL, NULL}, /* the truncate queue */ + {NULL, NULL}, /* the config queue */ #define OPT_DEFINE_DEFAULT #include "wtperf_opt.i" @@ -371,6 +372,53 @@ err: cfg->error = cfg->stop = 1; return (NULL); } +/* + * do_range_reads -- + * If configured to execute a sequence of next operations after each + * search do them. Ensuring the keys we see are always in order. + */ +static int +do_range_reads(CONFIG *cfg, WT_CURSOR *cursor) +{ + size_t range; + uint64_t next_val, prev_val; + char *range_key_buf; + char buf[512]; + int ret; + + ret = 0; + + if (cfg->read_range == 0) + return (0); + + memset(&buf[0], 0, 512 * sizeof(char)); + range_key_buf = &buf[0]; + + /* Save where the first key is for comparisons. */ + cursor->get_key(cursor, &range_key_buf); + extract_key(range_key_buf, &next_val); + + for (range = 0; range < cfg->read_range; ++range) { + prev_val = next_val; + ret = cursor->next(cursor); + /* We are done if we reach the end. */ + if (ret != 0) + break; + + /* Retrieve and decode the key */ + cursor->get_key(cursor, &range_key_buf); + extract_key(range_key_buf, &next_val); + if (next_val < prev_val) { + lprintf(cfg, EINVAL, 0, + "Out of order keys %" PRIu64 + " came before %" PRIu64, + prev_val, next_val); + return (EINVAL); + } + } + return (0); +} + static void * worker(void *arg) { @@ -381,8 +429,8 @@ worker(void *arg) WT_CONNECTION *conn; WT_CURSOR **cursors, *cursor, *tmp_cursor; WT_SESSION *session; - int64_t ops, ops_per_txn, throttle_ops; size_t i; + int64_t ops, ops_per_txn, throttle_ops; uint64_t next_val, usecs; uint8_t *op, *op_end; int measure_latency, ret, truncated; @@ -533,7 +581,14 @@ worker(void *arg) "get_value in read."); goto err; } + /* + * If we want to read a range, then call next + * for several operations, confirming that the + * next key is in the correct order. + */ + ret = do_range_reads(cfg, cursor); } + if (ret == 0 || ret == WT_NOTFOUND) break; goto op_err; @@ -1097,9 +1152,10 @@ monitor(void *arg) uint32_t read_avg, read_min, read_max; uint32_t insert_avg, insert_min, insert_max; uint32_t update_avg, update_min, update_max; - uint32_t latency_max; + uint32_t latency_max, level; u_int i; - int ret; + int msg_err, ret; + const char *str; char buf[64], *path; cfg = (CONFIG *)arg; @@ -1197,25 +1253,41 @@ monitor(void *arg) if (latency_max != 0 && (read_max > latency_max || insert_max > latency_max || - update_max > latency_max)) - /* - * Make this a non-fatal error and print WARNING in - * the output so Jenkins can flag it as unstable. - */ - lprintf(cfg, 0, 0, - "WARNING: max latency exceeded: threshold %" PRIu32 + update_max > latency_max)) { + if (cfg->max_latency_fatal) { + level = 1; + msg_err = WT_PANIC; + str = "ERROR"; + } else { + level = 0; + msg_err = 0; + str = "WARNING"; + } + lprintf(cfg, msg_err, level, + "%s: max latency exceeded: threshold %" PRIu32 " read max %" PRIu32 " insert max %" PRIu32 - " update max %" PRIu32, latency_max, + " update max %" PRIu32, str, latency_max, read_max, insert_max, update_max); + } if (min_thr != 0 && ((cur_reads != 0 && cur_reads < min_thr) || (cur_inserts != 0 && cur_inserts < min_thr) || - (cur_updates != 0 && cur_updates < min_thr))) - lprintf(cfg, WT_PANIC, 0, - "minimum throughput not met: threshold %" PRIu64 + (cur_updates != 0 && cur_updates < min_thr))) { + if (cfg->min_throughput_fatal) { + level = 1; + msg_err = WT_PANIC; + str = "ERROR"; + } else { + level = 0; + msg_err = 0; + str = "WARNING"; + } + lprintf(cfg, msg_err, level, + "%s: minimum throughput not met: threshold %" PRIu64 " reads %" PRIu64 " inserts %" PRIu64 - " updates %" PRIu64, min_thr, cur_reads, + " updates %" PRIu64, str, min_thr, cur_reads, cur_inserts, cur_updates); + } last_reads = reads; last_inserts = inserts; last_updates = updates; @@ -1534,8 +1606,10 @@ execute_workload(CONFIG *cfg) lprintf(cfg, 0, 1, "Starting workload #%d: %" PRId64 " threads, inserts=%" PRId64 ", reads=%" PRId64 ", updates=%" PRId64 - ", truncate=%" PRId64, i + 1, workp->threads, workp->insert, - workp->read, workp->update, workp->truncate); + ", truncate=%" PRId64 ", throttle=%" PRId64, + i + 1, workp->threads, workp->insert, + workp->read, workp->update, workp->truncate, + workp->throttle); /* Figure out the workload's schedule. */ if ((ret = run_mix_schedule(cfg, workp)) != 0) @@ -1906,7 +1980,7 @@ start_run(CONFIG *cfg) monitor_created = ret = 0; /* [-Wconditional-uninitialized] */ memset(&monitor_thread, 0, sizeof(monitor_thread)); - + if ((ret = setup_log_file(cfg)) != 0) goto err; @@ -2084,6 +2158,8 @@ main(int argc, char *argv[]) if (config_assign(cfg, &default_cfg)) goto err; + TAILQ_INIT(&cfg->config_head); + /* Do a basic validation of options, and home is needed before open. */ while ((ch = __wt_getopt("wtperf", argc, argv, opts)) != EOF) switch (ch) { @@ -2289,6 +2365,9 @@ main(int argc, char *argv[]) if ((ret = config_sanity(cfg)) != 0) goto err; + /* Write a copy of the config. */ + config_to_file(cfg); + /* Display the configuration. */ if (cfg->verbose > 1) config_print(cfg); @@ -2314,7 +2393,7 @@ start_threads(CONFIG *cfg, WORKLOAD *workp, CONFIG_THREAD *base, u_int num, void *(*func)(void *)) { CONFIG_THREAD *thread; - u_int i, j; + u_int i; int ret; /* Initialize the threads. */ @@ -2323,15 +2402,13 @@ start_threads(CONFIG *cfg, thread->workload = workp; /* - * We don't want the threads executing in lock-step, move each - * new RNG state further along in the sequence. + * We don't want the threads executing in lock-step, seed each + * one differently. */ - if (i == 0) - __wt_random_init(&thread->rnd); - else - thread->rnd = (thread - 1)->rnd; - for (j = 0; j < 1000; ++j) - (void)__wt_random(&thread->rnd); + if ((ret = __wt_random_init_seed(NULL, &thread->rnd)) != 0) { + lprintf(cfg, ret, 0, "Error initializing RNG"); + return (ret); + } /* * Every thread gets a key/data buffer because we don't bother @@ -2427,6 +2504,11 @@ worker_throttle(int64_t throttle, int64_t *ops, struct timespec *interval) if (usecs_to_complete < USEC_PER_SEC) (void)usleep((useconds_t)(USEC_PER_SEC - usecs_to_complete)); + /* + * After sleeping, set the interval to the current time. + */ + if (__wt_epoch(NULL, &now) != 0) + return; *ops = 0; *interval = now; } diff --git a/src/third_party/wiredtiger/bench/wtperf/wtperf.h b/src/third_party/wiredtiger/bench/wtperf/wtperf.h index e4b9fc00798..361b135ced7 100644 --- a/src/third_party/wiredtiger/bench/wtperf/wtperf.h +++ b/src/third_party/wiredtiger/bench/wtperf/wtperf.h @@ -116,6 +116,7 @@ struct __truncate_struct { uint64_t last_total_inserts; uint64_t num_stones; uint64_t last_key; + uint64_t catchup_multiplier; }; /* Queue entry for use with the Truncate Logic */ @@ -126,6 +127,12 @@ struct __truncate_queue_entry { }; typedef struct __truncate_queue_entry TRUNCATE_QUEUE_ENTRY; +struct __config_queue_entry { + char *string; + TAILQ_ENTRY(__config_queue_entry) c; +}; +typedef struct __config_queue_entry CONFIG_QUEUE_ENTRY; + #define LOG_PARTIAL_CONFIG ",log=(enabled=false)" /* * NOTE: If you add any fields to this structure here, you must also add @@ -180,6 +187,9 @@ struct __config { /* Configuration structure */ /* Queue head for use with the Truncate Logic */ TAILQ_HEAD(__truncate_qh, __truncate_queue_entry) stone_head; + /* Queue head to save a copy of the config to be output */ + TAILQ_HEAD(__config_qh, __config_queue_entry) config_head; + /* Fields changeable on command line are listed in wtperf_opt.i */ #define OPT_DECLARE_STRUCT #include "wtperf_opt.i" @@ -188,6 +198,7 @@ struct __config { /* Configuration structure */ #define ELEMENTS(a) (sizeof(a) / sizeof(a[0])) +#define READ_RANGE_OPS 10 #define THROTTLE_OPS 100 #define THOUSAND (1000ULL) @@ -270,6 +281,8 @@ void config_free(CONFIG *); int config_opt_file(CONFIG *, const char *); int config_opt_line(CONFIG *, const char *); int config_opt_str(CONFIG *, const char *, const char *); +void config_to_file(CONFIG *); +void config_consolidate(CONFIG *); void config_print(CONFIG *); int config_sanity(CONFIG *); void latency_insert(CONFIG *, uint32_t *, uint32_t *, uint32_t *); @@ -304,4 +317,10 @@ generate_key(CONFIG *cfg, char *key_buf, uint64_t keyno) sprintf(key_buf, "%0*" PRIu64, cfg->key_sz - 1, keyno); } +static inline void +extract_key(char *key_buf, uint64_t *keynop) +{ + sscanf(key_buf, "%" SCNu64, keynop); +} + #endif diff --git a/src/third_party/wiredtiger/bench/wtperf/wtperf_opt.i b/src/third_party/wiredtiger/bench/wtperf/wtperf_opt.i index be3ba462e0c..3c122e4d186 100644 --- a/src/third_party/wiredtiger/bench/wtperf/wtperf_opt.i +++ b/src/third_party/wiredtiger/bench/wtperf/wtperf_opt.i @@ -117,11 +117,17 @@ DEF_OPT_AS_BOOL(insert_rmw, 0, DEF_OPT_AS_UINT32(key_sz, 20, "key size") DEF_OPT_AS_BOOL(log_partial, 0, "perform partial logging on first table only.") DEF_OPT_AS_UINT32(min_throughput, 0, - "abort if any throughput measured is less than this amount. Requires " - "sample_interval to be configured") + "notify if any throughput measured is less than this amount. " + "Aborts or prints warning based on min_throughput_fatal setting. " + "Requires sample_interval to be configured") +DEF_OPT_AS_BOOL(min_throughput_fatal, 0, + "print warning (false) or abort (true) of min_throughput failure.") DEF_OPT_AS_UINT32(max_latency, 0, - "abort if any latency measured exceeds this number of milliseconds." + "notify if any latency measured exceeds this number of milliseconds." + "Aborts or prints warning based on min_throughput_fatal setting. " "Requires sample_interval to be configured") +DEF_OPT_AS_BOOL(max_latency_fatal, 0, + "print warning (false) or abort (true) of max_latency failure.") DEF_OPT_AS_UINT32(pareto, 0, "use pareto distribution for random numbers. Zero " "to disable, otherwise a percentage indicating how aggressive the " "distribution should be.") @@ -134,6 +140,7 @@ DEF_OPT_AS_UINT32(random_range, 0, "if non zero choose a value from within this range as the key for " "insert operations") DEF_OPT_AS_BOOL(random_value, 0, "generate random content for the value") +DEF_OPT_AS_UINT32(read_range, 0, "scan a range of keys after each search") DEF_OPT_AS_BOOL(reopen_connection, 1, "close and reopen the connection between populate and workload phases") DEF_OPT_AS_UINT32(report_interval, 2, @@ -164,8 +171,8 @@ DEF_OPT_AS_STRING(threads, "", "workload configuration: each 'count' " "'update' entries are the ratios of insert, read and update operations " "done by each worker thread; If a throttle value is provided each thread " "will do a maximum of that number of operations per second; multiple " - "workload configurations may be " - "specified; for example, a more complex threads configuration might be " + "workload configurations may be specified per threads configuration; " + "for example, a more complex threads configuration might be " "'threads=((count=2,reads=1)(count=8,reads=1,inserts=2,updates=1))' " "which would create 2 threads doing nothing but reads and 8 threads " "each doing 50% inserts and 25% reads and updates. Allowed configuration " diff --git a/src/third_party/wiredtiger/bench/wtperf/wtperf_truncate.c b/src/third_party/wiredtiger/bench/wtperf/wtperf_truncate.c index 581d1987947..2aec122875e 100644 --- a/src/third_party/wiredtiger/bench/wtperf/wtperf_truncate.c +++ b/src/third_party/wiredtiger/bench/wtperf/wtperf_truncate.c @@ -54,6 +54,12 @@ setup_truncate(CONFIG *cfg, CONFIG_THREAD *thread, WT_SESSION *session) { session, cfg->uris[0], NULL, NULL, &cursor)) != 0) goto err; + /* + * If we find the workload getting behind we multiply the number of + * records to be truncated. + */ + trunc_cfg->catchup_multiplier = 1; + /* How many entries between each stone. */ trunc_cfg->stone_gap = (workload->truncate_count * workload->truncate_pct) / 100; @@ -133,6 +139,7 @@ run_truncate(CONFIG *cfg, CONFIG_THREAD *thread, TRUNCATE_QUEUE_ENTRY *truncate_item; char *truncate_key; int ret, t_ret; + uint64_t used_stone_gap; ret = 0; trunc_cfg = &thread->trunc_cfg; @@ -145,11 +152,32 @@ run_truncate(CONFIG *cfg, CONFIG_THREAD *thread, trunc_cfg->last_total_inserts = trunc_cfg->total_inserts; /* We are done if there isn't enough data to trigger a new milestone. */ - if (trunc_cfg->expected_total <= trunc_cfg->needed_stones) + if (trunc_cfg->expected_total <= thread->workload->truncate_count) return (0); + /* + * If we are falling behind and using more than one stone per lap we + * should widen the stone gap for this lap to try and catch up quicker. + */ + if (trunc_cfg->expected_total > + thread->workload->truncate_count + trunc_cfg->stone_gap) { + /* + * Increase the multiplier until we create stones that are + * almost large enough to truncate the whole expected table size + * in one operation. + */ + trunc_cfg->catchup_multiplier = + WT_MIN(trunc_cfg->catchup_multiplier + 1, + trunc_cfg->needed_stones - 1); + } else { + /* Back off if we start seeing an improvement */ + trunc_cfg->catchup_multiplier = + WT_MAX(trunc_cfg->catchup_multiplier - 1, 1); + } + used_stone_gap = trunc_cfg->stone_gap * trunc_cfg->catchup_multiplier; + while (trunc_cfg->num_stones < trunc_cfg->needed_stones) { - trunc_cfg->last_key += trunc_cfg->stone_gap; + trunc_cfg->last_key += used_stone_gap; truncate_key = calloc(cfg->key_sz, 1); if (truncate_key == NULL) { lprintf(cfg, ENOMEM, 0, @@ -165,7 +193,7 @@ run_truncate(CONFIG *cfg, CONFIG_THREAD *thread, } generate_key(cfg, truncate_key, trunc_cfg->last_key); truncate_item->key = truncate_key; - truncate_item->diff = trunc_cfg->stone_gap; + truncate_item->diff = used_stone_gap; TAILQ_INSERT_TAIL(&cfg->stone_head, truncate_item, q); trunc_cfg->num_stones++; } @@ -189,7 +217,6 @@ run_truncate(CONFIG *cfg, CONFIG_THREAD *thread, goto err; } - *truncatedp = 1; trunc_cfg->expected_total -= truncate_item->diff; diff --git a/src/third_party/wiredtiger/build_posix/aclocal/version-set.m4 b/src/third_party/wiredtiger/build_posix/aclocal/version-set.m4 index c8b89b7842b..6c7c6eed9cf 100644 --- a/src/third_party/wiredtiger/build_posix/aclocal/version-set.m4 +++ b/src/third_party/wiredtiger/build_posix/aclocal/version-set.m4 @@ -2,8 +2,8 @@ dnl build by dist/s_version VERSION_MAJOR=2 VERSION_MINOR=7 -VERSION_PATCH=0 -VERSION_STRING='"WiredTiger 2.7.0: (November 19, 2015)"' +VERSION_PATCH=1 +VERSION_STRING='"WiredTiger 2.7.1: (December 8, 2015)"' AC_SUBST(VERSION_MAJOR) AC_SUBST(VERSION_MINOR) diff --git a/src/third_party/wiredtiger/build_posix/aclocal/version.m4 b/src/third_party/wiredtiger/build_posix/aclocal/version.m4 index 2ebe4516695..3b690982f9d 100644 --- a/src/third_party/wiredtiger/build_posix/aclocal/version.m4 +++ b/src/third_party/wiredtiger/build_posix/aclocal/version.m4 @@ -1,2 +1,2 @@ dnl WiredTiger product version for AC_INIT. Maintained by dist/s_version -2.7.0 +2.7.1 diff --git a/src/third_party/wiredtiger/build_posix/configure.ac.in b/src/third_party/wiredtiger/build_posix/configure.ac.in index de2f8963629..5949fb0509c 100644 --- a/src/third_party/wiredtiger/build_posix/configure.ac.in +++ b/src/third_party/wiredtiger/build_posix/configure.ac.in @@ -103,7 +103,7 @@ esac # Linux requires buffers aligned to 4KB boundaries for O_DIRECT to work. BUFFER_ALIGNMENT=0 -if test "$ac_cv_func_posix_memalign" = "yes" ; then +if test "$ax_cv_func_posix_memalign_works" = "yes" ; then case "$host_os" in linux*) BUFFER_ALIGNMENT=4096 ;; esac diff --git a/src/third_party/wiredtiger/build_win/filelist.win b/src/third_party/wiredtiger/build_win/filelist.win index af6ddf98da9..b845c45823e 100644 --- a/src/third_party/wiredtiger/build_win/filelist.win +++ b/src/third_party/wiredtiger/build_win/filelist.win @@ -121,6 +121,7 @@ src/os_win/os_map.c src/os_win/os_mtx_cond.c src/os_win/os_once.c src/os_win/os_open.c +src/os_win/os_pagesize.c src/os_win/os_path.c src/os_win/os_priv.c src/os_win/os_remove.c diff --git a/src/third_party/wiredtiger/dist/filelist b/src/third_party/wiredtiger/dist/filelist index 52af87c2a68..dde090e5a85 100644 --- a/src/third_party/wiredtiger/dist/filelist +++ b/src/third_party/wiredtiger/dist/filelist @@ -119,6 +119,7 @@ src/os_posix/os_mtx_cond.c src/os_posix/os_mtx_rw.c src/os_posix/os_once.c src/os_posix/os_open.c +src/os_posix/os_pagesize.c src/os_posix/os_path.c src/os_posix/os_priv.c src/os_posix/os_remove.c diff --git a/src/third_party/wiredtiger/dist/log.py b/src/third_party/wiredtiger/dist/log.py index feeb053db3e..6d35bf2e718 100644 --- a/src/third_party/wiredtiger/dist/log.py +++ b/src/third_party/wiredtiger/dist/log.py @@ -8,14 +8,15 @@ import log_data tmp_file = '__tmp' # Map log record types to: -# (C type, pack type, printf format, printf arg(s), printf setup) +# (C type, pack type, printf format, printf arg(s), list of setup functions) field_types = { - 'string' : ('const char *', 'S', '%s', 'arg', ''), + 'string' : ('const char *', 'S', '%s', 'arg', [ '' ]), 'item' : ('WT_ITEM *', 'u', '%s', 'escaped', - 'WT_ERR(__logrec_jsonify_str(session, &escaped, &arg));'), - 'recno' : ('uint64_t', 'r', '%" PRIu64 "', 'arg', ''), - 'uint32' : ('uint32_t', 'I', '%" PRIu32 "', 'arg', ''), - 'uint64' : ('uint64_t', 'Q', '%" PRIu64 "', 'arg', ''), + [ 'WT_ERR(__logrec_make_json_str(session, &escaped, &arg));', + 'WT_ERR(__logrec_make_hex_str(session, &escaped, &arg));']), + 'recno' : ('uint64_t', 'r', '%" PRIu64 "', 'arg', [ '' ]), + 'uint32' : ('uint32_t', 'I', '%" PRIu32 "', 'arg', [ '' ]), + 'uint64' : ('uint64_t', 'Q', '%" PRIu64 "', 'arg', [ '' ]), } def cintype(f): @@ -38,15 +39,13 @@ def clocaltype(f): return type def escape_decl(fields): - for f in fields: - if 'escaped' in field_types[f[0]][4]: - return '\n\tchar *escaped;' - return '' + return '\n\tchar *escaped;' if has_escape(fields) else '' def has_escape(fields): for f in fields: - if 'escaped' in field_types[f[0]][4]: - return True + for setup in field_types[f[0]][4]: + if 'escaped' in setup: + return True return False def pack_fmt(fields): @@ -65,10 +64,38 @@ def printf_arg(f): arg = field_types[f[0]][3].replace('arg', f[1]) return ' ' + arg -def printf_setup(f): - stmt = field_types[f[0]][4].replace('arg', f[1]) - return '' if stmt == '' else stmt + '\n\t' - +def printf_setup(f, i, nl_indent): + stmt = field_types[f[0]][4][i].replace('arg', f[1]) + return '' if stmt == '' else stmt + nl_indent + +def n_setup(f): + return len(field_types[f[0]][4]) + +# Create a printf line, with an optional setup function. +# ishex indicates that the the field name in the output is modified +# (to add "-hex"), and that the setup and printf are conditional +# in the generated code. +def printf_line(f, optype, i, ishex): + ifbegin = '' + ifend = '' + nl_indent = '\n\t' + name = f[1] + postcomma = '' if i + 1 == len(optype.fields) else ',\\n' + precomma = '' + if ishex > 0: + name += '-hex' + ifend = nl_indent + '}' + nl_indent += '\t' + ifbegin = 'if (LF_ISSET(WT_TXN_PRINTLOG_HEX)) {' + nl_indent + if postcomma == '': + precomma = ',\\n' + body = '%s%s(__wt_fprintf(out,' % ( + printf_setup(f, ishex, nl_indent), + 'WT_ERR' if has_escape(optype.fields) else 'WT_RET') + \ + '%s "%s \\"%s\\": \\"%s\\"%s",%s));' % ( + nl_indent, precomma, name, printf_fmt(f), postcomma, + printf_arg(f)) + return ifbegin + body + ifend ##################################################################### # Update log.h with #defines for types @@ -176,7 +203,7 @@ __logrec_json_unpack_str(char *dest, size_t destlen, const char *src, } static int -__logrec_jsonify_str(WT_SESSION_IMPL *session, char **destp, WT_ITEM *item) +__logrec_make_json_str(WT_SESSION_IMPL *session, char **destp, WT_ITEM *item) { \tsize_t needed; @@ -185,6 +212,17 @@ __logrec_jsonify_str(WT_SESSION_IMPL *session, char **destp, WT_ITEM *item) \t(void)__logrec_json_unpack_str(*destp, needed, item->data, item->size); \treturn (0); } + +static int +__logrec_make_hex_str(WT_SESSION_IMPL *session, char **destp, WT_ITEM *item) +{ +\tsize_t needed; + +\tneeded = item->size * 2 + 1; +\tWT_RET(__wt_realloc(session, NULL, needed, destp)); +\t__wt_fill_hex(item->data, item->size, (uint8_t *)*destp, needed, NULL); +\treturn (0); +} ''') # Emit code to read, write and print log operations (within a log record) @@ -255,11 +293,12 @@ __wt_logop_%(name)s_unpack( tfile.write(''' int __wt_logop_%(name)s_print( - WT_SESSION_IMPL *session, const uint8_t **pp, const uint8_t *end, FILE *out) + WT_SESSION_IMPL *session, const uint8_t **pp, const uint8_t *end, + FILE *out, uint32_t flags) { %(arg_ret)s\t%(arg_decls)s -\t%(arg_init)sWT_RET(__wt_logop_%(name)s_unpack( +\t%(arg_unused)s%(arg_init)sWT_RET(__wt_logop_%(name)s_unpack( \t session, pp, end%(arg_addrs)s)); \tWT_RET(__wt_fprintf(out, " \\"optype\\": \\"%(name)s\\",\\n")); @@ -272,27 +311,22 @@ __wt_logop_%(name)s_print( 'arg_decls' : ('\n\t'.join('%s%s%s;' % (clocaltype(f), '' if clocaltype(f)[-1] == '*' else ' ', f[1]) for f in optype.fields)) + escape_decl(optype.fields), + 'arg_unused' : ('' if has_escape(optype.fields) + else 'WT_UNUSED(flags);\n\t'), 'arg_init' : ('escaped = NULL;\n\t' if has_escape(optype.fields) else ''), 'arg_fini' : ('\nerr:\t__wt_free(session, escaped);\n\treturn (ret);' if has_escape(optype.fields) else '\treturn (0);'), 'arg_addrs' : ''.join(', &%s' % f[1] for f in optype.fields), - 'print_args' : '\n\t'.join( - '%s%s(__wt_fprintf(out,\n\t " \\"%s\\": \\"%s\\",\\n",%s));' % - (printf_setup(f), - 'WT_ERR' if has_escape(optype.fields) else 'WT_RET', - f[1], printf_fmt(f), printf_arg(f)) - for f in optype.fields[:-1]) + str( - '\n\t%s%s(__wt_fprintf(out,\n\t " \\"%s\\": \\"%s\\"",%s));' % - (printf_setup(last_field), - 'WT_ERR' if has_escape(optype.fields) else 'WT_RET', - last_field[1], printf_fmt(last_field), printf_arg(last_field))), + 'print_args' : '\n\t'.join(printf_line(f, optype, i, s) + for i,f in enumerate(optype.fields) for s in range(0, n_setup(f))) }) # Emit the printlog entry point tfile.write(''' int __wt_txn_op_printlog( - WT_SESSION_IMPL *session, const uint8_t **pp, const uint8_t *end, FILE *out) + WT_SESSION_IMPL *session, const uint8_t **pp, const uint8_t *end, + FILE *out, uint32_t flags) { \tuint32_t optype, opsize; @@ -308,7 +342,8 @@ for optype in log_data.optypes: tfile.write(''' \tcase %(macro)s: -\t\tWT_RET(%(print_func)s(session, pp, end, out)); +\t\tWT_RET(%(print_func)s(session, pp, end, out, +\t\t flags)); \t\tbreak; ''' % { 'macro' : optype.macro_name(), diff --git a/src/third_party/wiredtiger/dist/s_copyright b/src/third_party/wiredtiger/dist/s_copyright index 020be6ae33d..0816274a367 100755 --- a/src/third_party/wiredtiger/dist/s_copyright +++ b/src/third_party/wiredtiger/dist/s_copyright @@ -6,6 +6,7 @@ c1=__wt.copyright.1 c2=__wt.copyright.2 c3=__wt.copyright.3 c4=__wt.copyright.4 +c5=__wt.copyright.5 check() { @@ -34,6 +35,9 @@ check() if `sed -e 1,3p -e 4q -e d $1 | diff - dist/$c4 > /dev/null` ; then return; fi + if `sed -e 2,7p -e 8q -e d $1 | diff - dist/$c5 > /dev/null` ; then + return; + fi echo "$1: copyright information is incorrect" exit 1 @@ -81,6 +85,16 @@ cat > $c4 < $c5 < ../src/docs/changelog.md + sed -e 's, \([0-9a-f]\{7\}\) , [\1](https://github.com/wiredtiger/wiredtiger/commit/\1) ,g' \ + -e 's,\(\(WT\|SERVER\)-[0-9]*\),[\1](https://jira.mongodb.org/browse/\1),g' ../NEWS) > ../src/docs/changelog.md } wtperf_config() diff --git a/src/third_party/wiredtiger/dist/s_funcs b/src/third_party/wiredtiger/dist/s_funcs index 3769ccc4aa7..5fee03b5615 100755 --- a/src/third_party/wiredtiger/dist/s_funcs +++ b/src/third_party/wiredtiger/dist/s_funcs @@ -6,7 +6,7 @@ trap 'rm -f $t; exit 0' 0 1 2 3 13 15 # List of files to search. l=`sed -e 's,#.*,,' -e '/^$/d' -e 's,^,../,' filelist` -l="$l `echo ../src/*/*.i ../src/utilities/*.c`" +l="$l `echo ../src/*/*.i ../src/utilities/*.c ../bench/wtperf/*.c`" ( # Copy out the functions we don't use, but it's OK. diff --git a/src/third_party/wiredtiger/dist/s_longlines b/src/third_party/wiredtiger/dist/s_longlines index 15ca5603385..decedb58f44 100755 --- a/src/third_party/wiredtiger/dist/s_longlines +++ b/src/third_party/wiredtiger/dist/s_longlines @@ -8,10 +8,11 @@ l=`(cd .. && find bench/wtperf examples ext src test -name '*.[chisy]' && find dist -name '*.py' && find src -name '*.in') | - sed -e '/include\/extern\.h/d'\ - -e '/support\/stat\.c/d'` + sed -e '/dist\/stat_data\.py/d' \ + -e '/support\/stat\.c/d' \ + -e '/include\/extern\.h/d'` for f in $l ; do expand -t8 < ../$f | awk -- \ - "{if(length(\$0) > 80) printf(\"%s:%d\\n\", \"$f\", NR)}" + "{if(length(\$0) > 80) printf(\"%s:%d\\n\", \"$f\", NR)}" done diff --git a/src/third_party/wiredtiger/dist/s_string b/src/third_party/wiredtiger/dist/s_string index 08d066f5929..3a4f9e190d3 100755 --- a/src/third_party/wiredtiger/dist/s_string +++ b/src/third_party/wiredtiger/dist/s_string @@ -30,7 +30,8 @@ replace() { # check: # Check the spelling of an individual file. check() { - aspell --lang=en $1 list < ../$2 | + # Strip out git hashes, which are seven character hex strings. + sed 's/ [0-9a-f]\{7\} / /g' ../$2 | aspell --lang=en $1 list | sort -u | comm -23 /dev/stdin s_string.ok > $t test -s $t && { diff --git a/src/third_party/wiredtiger/dist/s_string.ok b/src/third_party/wiredtiger/dist/s_string.ok index 3669fa4d608..27583402259 100644 --- a/src/third_party/wiredtiger/dist/s_string.ok +++ b/src/third_party/wiredtiger/dist/s_string.ok @@ -102,6 +102,7 @@ Encryptor Encryptors Enqueue Eron +FALLOC FALLTHROUGH FH FLD @@ -143,6 +144,7 @@ INIT INITIALIZER INMEM INTL +ISA ITEMs Inline Intra @@ -179,6 +181,7 @@ LevelDB Levyx Llqr Llqrt +LoadLoad LockFile Lookaside Lookup @@ -186,6 +189,7 @@ MALLOC MEM MEMALIGN MERCHANTABILITY +MONGODB MSVC MULTIBLOCK MUTEX @@ -224,6 +228,7 @@ Obama Outfmt PARAM POSIX +PPC PREDEFINE PRIu PRNG @@ -281,10 +286,13 @@ Spinlock Spinlocks Split's Stoica +StoreLoad +StoreStore TAILQ TCMalloc TODO TORTIOUS +TSO TXN TXNC Timespec @@ -299,6 +307,7 @@ ULINE URI URIs UTF +UltraSparc Unbuffered UnixLib Unmap @@ -358,6 +367,7 @@ arg argc args argv +asm async asyncopp asyncops @@ -368,6 +378,7 @@ automake bInheritHandle basecfg basho +bcr bdb beginthreadex bigram @@ -409,6 +420,7 @@ bzip calloc cas catfmt +ccr cd centric cfg @@ -595,6 +607,7 @@ free'd fscanf fstat fsync +fsyncLock fsyncs ftruncate func @@ -708,6 +721,7 @@ lookaside lookup lookups lossy +lr lrt lru lseek @@ -716,6 +730,7 @@ lsn lsnappy lt lu +lwsync lz lzo madvise @@ -723,6 +738,8 @@ majorp malloc marshall marshalled +mbll +mbss mem memalign membar @@ -799,6 +816,7 @@ os ovfl ownp packv +pagesize parens pareto parserp @@ -877,6 +895,7 @@ runtime rwlock sH sHQ +scalability sched scr sd @@ -1018,6 +1037,7 @@ variable's vectorized versa vfprintf +vm vpack vprintf vrfy diff --git a/src/third_party/wiredtiger/dist/s_whitespace b/src/third_party/wiredtiger/dist/s_whitespace index 38eb5d6c2df..d13de4b5989 100755 --- a/src/third_party/wiredtiger/dist/s_whitespace +++ b/src/third_party/wiredtiger/dist/s_whitespace @@ -32,7 +32,7 @@ for f in `find dist -name '*.py' -name 's_*'`; do done # C-language sources. -for f in `find examples ext src test \ +for f in `find bench examples ext src test \ -name '*.[chi]' -o \ -name '*.dox' -o \ -name '*.in' -o \ diff --git a/src/third_party/wiredtiger/dist/s_win b/src/third_party/wiredtiger/dist/s_win index cdfc71a8a1e..1eb4702d517 100755 --- a/src/third_party/wiredtiger/dist/s_win +++ b/src/third_party/wiredtiger/dist/s_win @@ -62,6 +62,7 @@ win_filelist() -e 's;os_posix/os_mtx_cond.c;os_win/os_mtx_cond.c;' \ -e 's;os_posix/os_once.c;os_win/os_once.c;' \ -e 's;os_posix/os_open.c;os_win/os_open.c;' \ + -e 's;os_posix/os_pagesize.c;os_win/os_pagesize.c;' \ -e 's;os_posix/os_path.c;os_win/os_path.c;' \ -e 's;os_posix/os_priv.c;os_win/os_priv.c;' \ -e 's;os_posix/os_remove.c;os_win/os_remove.c;' \ diff --git a/src/third_party/wiredtiger/dist/stat.py b/src/third_party/wiredtiger/dist/stat.py index d62fda3fcb9..6dcfccfeab5 100644 --- a/src/third_party/wiredtiger/dist/stat.py +++ b/src/third_party/wiredtiger/dist/stat.py @@ -171,9 +171,7 @@ __wt_stat_''' + name + '''_aggregate_single( { ''') for l in sorted(list): - if 'no_aggregate' in l.flags: - o = '\tto->' + l.name + ' = from->' + l.name + ';\n' - elif 'max_aggregate' in l.flags: + if 'max_aggregate' in l.flags: o = '\tif (from->' + l.name + ' > to->' + l.name + ')\n' +\ '\t\tto->' + l.name + ' = from->' + l.name + ';\n' else: @@ -197,12 +195,12 @@ __wt_stat_''' + name + '''_aggregate( f.write('\tint64_t v;\n\n') break; for l in sorted(list): - if 'no_aggregate' in l.flags: - o = '\tto->' + l.name + ' = from[0]->' + l.name + ';\n' - elif 'max_aggregate' in l.flags: - o = '\tif ((v = WT_STAT_READ(from, ' + l.name + ')) >\n' +\ - '\t to->' + l.name + ')\n' +\ - '\t\tto->' + l.name + ' = v;\n' + if 'max_aggregate' in l.flags: + o = '\tif ((v = WT_STAT_READ(from, ' + l.name + ')) > ' +\ + 'to->' + l.name + ')\n' + if len(o) > 72: # Account for the leading tab. + o = o.replace(' > ', ' >\n\t ') + o +='\t\tto->' + l.name + ' = v;\n' else: o = '\tto->' + l.name + ' += WT_STAT_READ(from, ' + l.name + ');\n' if len(o) > 72: # Account for the leading tab. diff --git a/src/third_party/wiredtiger/dist/stat_data.py b/src/third_party/wiredtiger/dist/stat_data.py index 3a23071a3f2..41a93961079 100644 --- a/src/third_party/wiredtiger/dist/stat_data.py +++ b/src/third_party/wiredtiger/dist/stat_data.py @@ -8,20 +8,13 @@ # NOTE: All statistics descriptions must have a prefix string followed by ':'. # # Data-source statistics are normally aggregated across the set of underlying -# objects. Additional optionaly configuration flags are available: -# no_aggregate Ignore the value when aggregating statistics +# objects. Additional optional configuration flags are available: # max_aggregate Take the maximum value when aggregating statistics -# -# Optional configuration flags: # no_clear Value not cleared when statistics cleared # no_scale Don't scale value per second in the logging tool script # -# The no_clear flag is a little complicated: it means we don't clear the values -# when resetting statistics after each run (necessary when the WiredTiger engine -# is updating values that persist over multiple runs, for example the count of -# cursors), but it also causes the underlying display routines to not treat the -# change between displays as relative to the number of seconds, that is, it's an -# absolute value. The no_clear flag should be set in either case. +# The no_clear and no_scale flags are normally always set together (values that +# are maintained over time are normally not scaled per second). from operator import attrgetter import sys @@ -129,13 +122,11 @@ connection_stats = [ # Async API statistics ########################################## AsyncStat('async_alloc_race', 'number of allocation state races'), - AsyncStat('async_alloc_view', - 'number of operation slots viewed for allocation'), + AsyncStat('async_alloc_view', 'number of operation slots viewed for allocation'), AsyncStat('async_cur_queue', 'current work queue length'), AsyncStat('async_flush', 'number of flush calls'), AsyncStat('async_full', 'number of times operation allocation failed'), - AsyncStat('async_max_queue', - 'maximum work queue length', 'no_clear,no_scale'), + AsyncStat('async_max_queue', 'maximum work queue length', 'no_clear,no_scale'), AsyncStat('async_nowork', 'number of times worker found no work'), AsyncStat('async_op_alloc', 'total allocations'), AsyncStat('async_op_compact', 'total compact calls'), @@ -158,89 +149,59 @@ connection_stats = [ ########################################## # Cache and eviction statistics ########################################## - CacheStat('cache_bytes_dirty', - 'tracked dirty bytes in the cache', 'no_clear,no_scale'), - CacheStat('cache_bytes_internal', - 'tracked bytes belonging to internal pages in the cache', - 'no_clear,no_scale'), - CacheStat('cache_bytes_inuse', - 'bytes currently in the cache', 'no_clear,no_scale'), - CacheStat('cache_bytes_leaf', - 'tracked bytes belonging to leaf pages in the cache', - 'no_clear,no_scale'), - CacheStat('cache_bytes_max', - 'maximum bytes configured', 'no_clear,no_scale'), - CacheStat('cache_bytes_overflow', - 'tracked bytes belonging to overflow pages in the cache', - 'no_clear,no_scale'), + CacheStat('cache_bytes_dirty', 'tracked dirty bytes in the cache', 'no_clear,no_scale'), + CacheStat('cache_bytes_internal', 'tracked bytes belonging to internal pages in the cache', 'no_clear,no_scale'), + CacheStat('cache_bytes_inuse', 'bytes currently in the cache', 'no_clear,no_scale'), + CacheStat('cache_bytes_leaf', 'tracked bytes belonging to leaf pages in the cache', 'no_clear,no_scale'), + CacheStat('cache_bytes_max', 'maximum bytes configured', 'no_clear,no_scale'), + CacheStat('cache_bytes_overflow', 'tracked bytes belonging to overflow pages in the cache', 'no_clear,no_scale'), CacheStat('cache_bytes_read', 'bytes read into cache'), CacheStat('cache_bytes_write', 'bytes written from cache'), CacheStat('cache_eviction_app', 'pages evicted by application threads'), CacheStat('cache_eviction_checkpoint', 'checkpoint blocked page eviction'), CacheStat('cache_eviction_clean', 'unmodified pages evicted'), - CacheStat('cache_eviction_deepen', - 'page split during eviction deepened the tree'), + CacheStat('cache_eviction_deepen', 'page split during eviction deepened the tree'), CacheStat('cache_eviction_dirty', 'modified pages evicted'), - CacheStat('cache_eviction_fail', - 'pages selected for eviction unable to be evicted'), - CacheStat('cache_eviction_force', - 'pages evicted because they exceeded the in-memory maximum'), - CacheStat('cache_eviction_force_delete', - 'pages evicted because they had chains of deleted items'), - CacheStat('cache_eviction_force_fail', - 'failed eviction of pages that exceeded the in-memory maximum'), + CacheStat('cache_eviction_fail', 'pages selected for eviction unable to be evicted'), + CacheStat('cache_eviction_force', 'pages evicted because they exceeded the in-memory maximum'), + CacheStat('cache_eviction_force_delete', 'pages evicted because they had chains of deleted items'), + CacheStat('cache_eviction_force_fail', 'failed eviction of pages that exceeded the in-memory maximum'), CacheStat('cache_eviction_hazard', 'hazard pointer blocked page eviction'), CacheStat('cache_eviction_internal', 'internal pages evicted'), - CacheStat('cache_eviction_maximum_page_size', - 'maximum page size at eviction', 'no_clear,no_scale'), - CacheStat('cache_eviction_queue_empty', - 'eviction server candidate queue empty when topping up'), - CacheStat('cache_eviction_queue_not_empty', - 'eviction server candidate queue not empty when topping up'), - CacheStat('cache_eviction_server_evicting', - 'eviction server evicting pages'), - CacheStat('cache_eviction_server_not_evicting', - 'eviction server populating queue, but not evicting pages'), - CacheStat('cache_eviction_slow', - 'eviction server unable to reach eviction goal'), - CacheStat('cache_eviction_split_internal', - 'internal pages split during eviction'), + CacheStat('cache_eviction_maximum_page_size', 'maximum page size at eviction', 'no_clear,no_scale'), + CacheStat('cache_eviction_queue_empty', 'eviction server candidate queue empty when topping up'), + CacheStat('cache_eviction_queue_not_empty', 'eviction server candidate queue not empty when topping up'), + CacheStat('cache_eviction_server_evicting', 'eviction server evicting pages'), + CacheStat('cache_eviction_server_not_evicting', 'eviction server populating queue, but not evicting pages'), + CacheStat('cache_eviction_slow', 'eviction server unable to reach eviction goal'), + CacheStat('cache_eviction_split_internal', 'internal pages split during eviction'), CacheStat('cache_eviction_split_leaf', 'leaf pages split during eviction'), CacheStat('cache_eviction_walk', 'pages walked for eviction'), - CacheStat('cache_eviction_worker_evicting', - 'eviction worker thread evicting pages'), + CacheStat('cache_eviction_worker_evicting', 'eviction worker thread evicting pages'), CacheStat('cache_inmem_split', 'in-memory page splits'), - CacheStat('cache_inmem_splittable', - 'in-memory page passed criteria to be split'), + CacheStat('cache_inmem_splittable', 'in-memory page passed criteria to be split'), CacheStat('cache_lookaside_insert', 'lookaside table insert calls'), CacheStat('cache_lookaside_remove', 'lookaside table remove calls'), CacheStat('cache_overhead', 'percentage overhead', 'no_clear,no_scale'), - CacheStat('cache_pages_dirty', - 'tracked dirty pages in the cache', 'no_clear,no_scale'), - CacheStat('cache_pages_inuse', - 'pages currently held in the cache', 'no_clear,no_scale'), + CacheStat('cache_pages_dirty', 'tracked dirty pages in the cache', 'no_clear,no_scale'), + CacheStat('cache_pages_inuse', 'pages currently held in the cache', 'no_clear,no_scale'), CacheStat('cache_read', 'pages read into cache'), - CacheStat('cache_read_lookaside', - 'pages read into cache requiring lookaside entries'), + CacheStat('cache_read_lookaside', 'pages read into cache requiring lookaside entries'), CacheStat('cache_write', 'pages written from cache'), - CacheStat('cache_write_lookaside', - 'page written requiring lookaside records'), - CacheStat('cache_write_restore', - 'pages written requiring in-memory restoration'), + CacheStat('cache_write_lookaside', 'page written requiring lookaside records'), + CacheStat('cache_write_restore', 'pages written requiring in-memory restoration'), ########################################## # Dhandle statistics ########################################## - DhandleStat('dh_conn_handle_count', - 'connection data handles currently active', 'no_clear,no_scale'), + DhandleStat('dh_conn_handle_count', 'connection data handles currently active', 'no_clear,no_scale'), + DhandleStat('dh_session_handles', 'session dhandles swept'), + DhandleStat('dh_session_sweeps', 'session sweep attempts'), DhandleStat('dh_sweep_close', 'connection sweep dhandles closed'), - DhandleStat('dh_sweep_remove', - 'connection sweep dhandles removed from hash list'), DhandleStat('dh_sweep_ref', 'connection sweep candidate became referenced'), + DhandleStat('dh_sweep_remove', 'connection sweep dhandles removed from hash list'), DhandleStat('dh_sweep_tod', 'connection sweep time-of-death sets'), DhandleStat('dh_sweeps', 'connection sweeps'), - DhandleStat('dh_session_handles', 'session dhandles swept'), - DhandleStat('dh_session_sweeps', 'session sweep attempts'), ########################################## # Logging statistics @@ -257,10 +218,8 @@ connection_stats = [ LogStat('log_flush', 'log flush operations'), LogStat('log_max_filesize', 'maximum log file size', 'no_clear,no_scale'), LogStat('log_prealloc_files', 'pre-allocated log files prepared'), - LogStat('log_prealloc_max', - 'number of pre-allocated log files to create', 'no_clear,no_scale'), - LogStat('log_prealloc_missed', - 'pre-allocated log files not ready and missed'), + LogStat('log_prealloc_max', 'number of pre-allocated log files to create', 'no_clear,no_scale'), + LogStat('log_prealloc_missed', 'pre-allocated log files not ready and missed'), LogStat('log_prealloc_used', 'pre-allocated log files used'), LogStat('log_release_write_lsn', 'log release advances write LSN'), LogStat('log_scan_records', 'records processed by log scan'), @@ -283,46 +242,32 @@ connection_stats = [ ########################################## # Reconciliation statistics ########################################## - RecStat('rec_pages', 'page reconciliation calls'), RecStat('rec_page_delete', 'pages deleted'), RecStat('rec_page_delete_fast', 'fast-path pages deleted'), + RecStat('rec_pages', 'page reconciliation calls'), RecStat('rec_pages_eviction', 'page reconciliation calls for eviction'), - RecStat('rec_split_stashed_bytes', - 'split bytes currently awaiting free', 'no_clear,no_scale'), - RecStat('rec_split_stashed_objects', - 'split objects currently awaiting free', 'no_clear,no_scale'), + RecStat('rec_split_stashed_bytes', 'split bytes currently awaiting free', 'no_clear,no_scale'), + RecStat('rec_split_stashed_objects', 'split objects currently awaiting free', 'no_clear,no_scale'), ########################################## # Transaction statistics ########################################## TxnStat('txn_begin', 'transaction begins'), TxnStat('txn_checkpoint', 'transaction checkpoints'), - TxnStat('txn_checkpoint_generation', - 'transaction checkpoint generation', 'no_clear,no_scale'), - TxnStat('txn_checkpoint_running', - 'transaction checkpoint currently running', 'no_clear,no_scale'), - TxnStat('txn_checkpoint_time_max', - 'transaction checkpoint max time (msecs)', 'no_clear,no_scale'), - TxnStat('txn_checkpoint_time_min', - 'transaction checkpoint min time (msecs)', 'no_clear,no_scale'), - TxnStat('txn_checkpoint_time_recent', - 'transaction checkpoint most recent time (msecs)', 'no_clear,no_scale'), - TxnStat('txn_checkpoint_time_total', - 'transaction checkpoint total time (msecs)', 'no_clear,no_scale'), + TxnStat('txn_checkpoint_generation', 'transaction checkpoint generation', 'no_clear,no_scale'), + TxnStat('txn_checkpoint_running', 'transaction checkpoint currently running', 'no_clear,no_scale'), + TxnStat('txn_checkpoint_time_max', 'transaction checkpoint max time (msecs)', 'no_clear,no_scale'), + TxnStat('txn_checkpoint_time_min', 'transaction checkpoint min time (msecs)', 'no_clear,no_scale'), + TxnStat('txn_checkpoint_time_recent', 'transaction checkpoint most recent time (msecs)', 'no_clear,no_scale'), + TxnStat('txn_checkpoint_time_total', 'transaction checkpoint total time (msecs)', 'no_clear,no_scale'), TxnStat('txn_commit', 'transactions committed'), - TxnStat('txn_fail_cache', - 'transaction failures due to cache overflow'), - TxnStat('txn_pinned_checkpoint_range', - 'transaction range of IDs currently pinned by a checkpoint', - 'no_clear,no_scale'), - TxnStat('txn_pinned_range', - 'transaction range of IDs currently pinned', 'no_clear,no_scale'), - TxnStat('txn_pinned_snapshot_range', - 'transaction range of IDs currently pinned by named snapshots', - 'no_clear,no_scale'), + TxnStat('txn_fail_cache', 'transaction failures due to cache overflow'), + TxnStat('txn_pinned_checkpoint_range', 'transaction range of IDs currently pinned by a checkpoint', 'no_clear,no_scale'), + TxnStat('txn_pinned_range', 'transaction range of IDs currently pinned', 'no_clear,no_scale'), + TxnStat('txn_pinned_snapshot_range', 'transaction range of IDs currently pinned by named snapshots', 'no_clear,no_scale'), + TxnStat('txn_rollback', 'transactions rolled back'), TxnStat('txn_snapshots_created', 'number of named snapshots created'), TxnStat('txn_snapshots_dropped', 'number of named snapshots dropped'), - TxnStat('txn_rollback', 'transactions rolled back'), TxnStat('txn_sync', 'transaction sync calls'), ########################################## @@ -331,23 +276,18 @@ connection_stats = [ LSMStat('lsm_checkpoint_throttle', 'sleep for LSM checkpoint throttle'), LSMStat('lsm_merge_throttle', 'sleep for LSM merge throttle'), LSMStat('lsm_rows_merged', 'rows merged in an LSM tree'), - LSMStat('lsm_work_queue_app', - 'application work units currently queued', 'no_clear,no_scale'), - LSMStat('lsm_work_queue_manager', - 'merge work units currently queued', 'no_clear,no_scale'), + LSMStat('lsm_work_queue_app', 'application work units currently queued', 'no_clear,no_scale'), + LSMStat('lsm_work_queue_manager', 'merge work units currently queued', 'no_clear,no_scale'), LSMStat('lsm_work_queue_max', 'tree queue hit maximum'), - LSMStat('lsm_work_queue_switch', - 'switch work units currently queued', 'no_clear,no_scale'), + LSMStat('lsm_work_queue_switch', 'switch work units currently queued', 'no_clear,no_scale'), LSMStat('lsm_work_units_created', 'tree maintenance operations scheduled'), - LSMStat('lsm_work_units_discarded', - 'tree maintenance operations discarded'), + LSMStat('lsm_work_units_discarded', 'tree maintenance operations discarded'), LSMStat('lsm_work_units_done', 'tree maintenance operations executed'), ########################################## # Session operations ########################################## - SessionStat('session_cursor_open', - 'open cursor count', 'no_clear,no_scale'), + SessionStat('session_cursor_open', 'open cursor count', 'no_clear,no_scale'), SessionStat('session_open', 'open session count', 'no_clear,no_scale'), ########################################## @@ -385,8 +325,7 @@ dsrc_stats = [ # Session operations ########################################## SessionStat('session_compact', 'object compaction'), - SessionStat('session_cursor_open', - 'open cursor count', 'no_clear,no_scale'), + SessionStat('session_cursor_open', 'open cursor count', 'no_clear,no_scale'), ########################################## # Cursor operations @@ -394,8 +333,7 @@ dsrc_stats = [ CursorStat('cursor_create', 'create calls'), CursorStat('cursor_insert', 'insert calls'), CursorStat('cursor_insert_bulk', 'bulk-loaded cursor-insert calls'), - CursorStat('cursor_insert_bytes', - 'cursor-insert key and value bytes inserted'), + CursorStat('cursor_insert_bytes', 'cursor-insert key and value bytes inserted'), CursorStat('cursor_next', 'next calls'), CursorStat('cursor_prev', 'prev calls'), CursorStat('cursor_remove', 'remove calls'), @@ -411,33 +349,21 @@ dsrc_stats = [ ########################################## # Btree statistics ########################################## - BtreeStat('btree_checkpoint_generation', - 'btree checkpoint generation', 'no_clear,no_scale'), - BtreeStat('btree_column_deleted', - 'column-store variable-size deleted values', 'no_scale'), - BtreeStat('btree_column_fix', - 'column-store fixed-size leaf pages', 'no_scale'), - BtreeStat('btree_column_internal', - 'column-store internal pages', 'no_scale'), - BtreeStat('btree_column_rle', - 'column-store variable-size RLE encoded values', 'no_scale'), - BtreeStat('btree_column_variable', - 'column-store variable-size leaf pages', 'no_scale'), + BtreeStat('btree_checkpoint_generation', 'btree checkpoint generation', 'no_clear,no_scale'), + BtreeStat('btree_column_deleted', 'column-store variable-size deleted values', 'no_scale'), + BtreeStat('btree_column_fix', 'column-store fixed-size leaf pages', 'no_scale'), + BtreeStat('btree_column_internal', 'column-store internal pages', 'no_scale'), + BtreeStat('btree_column_rle', 'column-store variable-size RLE encoded values', 'no_scale'), + BtreeStat('btree_column_variable', 'column-store variable-size leaf pages', 'no_scale'), BtreeStat('btree_compact_rewrite', 'pages rewritten by compaction'), BtreeStat('btree_entries', 'number of key/value pairs', 'no_scale'), - BtreeStat('btree_fixed_len', 'fixed-record size', 'no_aggregate,no_scale'), - BtreeStat('btree_maximum_depth', - 'maximum tree depth', 'max_aggregate,no_scale'), - BtreeStat('btree_maxintlkey', - 'maximum internal page key size', 'max_aggregate,no_scale'), - BtreeStat('btree_maxintlpage', - 'maximum internal page size', 'max_aggregate,no_scale'), - BtreeStat('btree_maxleafkey', - 'maximum leaf page key size', 'max_aggregate,no_scale'), - BtreeStat('btree_maxleafpage', - 'maximum leaf page size', 'max_aggregate,no_scale'), - BtreeStat('btree_maxleafvalue', - 'maximum leaf page value size', 'max_aggregate,no_scale'), + BtreeStat('btree_fixed_len', 'fixed-record size', 'max_aggregate,no_scale'), + BtreeStat('btree_maximum_depth', 'maximum tree depth', 'max_aggregate,no_scale'), + BtreeStat('btree_maxintlkey', 'maximum internal page key size', 'max_aggregate,no_scale'), + BtreeStat('btree_maxintlpage', 'maximum internal page size', 'max_aggregate,no_scale'), + BtreeStat('btree_maxleafkey', 'maximum leaf page key size', 'max_aggregate,no_scale'), + BtreeStat('btree_maxleafpage', 'maximum leaf page size', 'max_aggregate,no_scale'), + BtreeStat('btree_maxleafvalue', 'maximum leaf page value size', 'max_aggregate,no_scale'), BtreeStat('btree_overflow', 'overflow pages', 'no_scale'), BtreeStat('btree_row_internal', 'row-store internal pages', 'no_scale'), BtreeStat('btree_row_leaf', 'row-store leaf pages', 'no_scale'), @@ -454,26 +380,21 @@ dsrc_stats = [ LSMStat('bloom_size', 'total size of bloom filters', 'no_scale'), LSMStat('lsm_checkpoint_throttle', 'sleep for LSM checkpoint throttle'), LSMStat('lsm_chunk_count', 'chunks in the LSM tree', 'no_scale'), - LSMStat('lsm_generation_max', - 'highest merge generation in the LSM tree', 'max_aggregate,no_scale'), - LSMStat('lsm_lookup_no_bloom', - 'queries that could have benefited ' + - 'from a Bloom filter that did not exist'), + LSMStat('lsm_generation_max', 'highest merge generation in the LSM tree', 'max_aggregate,no_scale'), + LSMStat('lsm_lookup_no_bloom', 'queries that could have benefited from a Bloom filter that did not exist'), LSMStat('lsm_merge_throttle', 'sleep for LSM merge throttle'), ########################################## # Block manager statistics ########################################## - BlockStat('allocation_size', - 'file allocation unit size', 'no_aggregate,no_scale'), + BlockStat('allocation_size', 'file allocation unit size', 'max_aggregate,no_scale'), BlockStat('block_alloc', 'blocks allocated'), BlockStat('block_checkpoint_size', 'checkpoint size', 'no_scale'), BlockStat('block_extension', 'allocations requiring file extension'), BlockStat('block_free', 'blocks freed'), - BlockStat('block_magic', 'file magic number', 'no_aggregate,no_scale'), - BlockStat('block_major', - 'file major version number', 'no_aggregate,no_scale'), - BlockStat('block_minor', 'minor version number', 'no_aggregate,no_scale'), + BlockStat('block_magic', 'file magic number', 'max_aggregate,no_scale'), + BlockStat('block_major', 'file major version number', 'max_aggregate,no_scale'), + BlockStat('block_minor', 'minor version number', 'max_aggregate,no_scale'), BlockStat('block_reuse_bytes', 'file bytes available for reuse'), BlockStat('block_size', 'file size in bytes', 'no_scale'), @@ -484,44 +405,33 @@ dsrc_stats = [ CacheStat('cache_bytes_write', 'bytes written from cache'), CacheStat('cache_eviction_checkpoint', 'checkpoint blocked page eviction'), CacheStat('cache_eviction_clean', 'unmodified pages evicted'), - CacheStat('cache_eviction_deepen', - 'page split during eviction deepened the tree'), + CacheStat('cache_eviction_deepen', 'page split during eviction deepened the tree'), CacheStat('cache_eviction_dirty', 'modified pages evicted'), - CacheStat('cache_eviction_fail', - 'data source pages selected for eviction unable to be evicted'), + CacheStat('cache_eviction_fail', 'data source pages selected for eviction unable to be evicted'), CacheStat('cache_eviction_hazard', 'hazard pointer blocked page eviction'), CacheStat('cache_eviction_internal', 'internal pages evicted'), - CacheStat('cache_eviction_split_internal', - 'internal pages split during eviction'), + CacheStat('cache_eviction_split_internal', 'internal pages split during eviction'), CacheStat('cache_eviction_split_leaf', 'leaf pages split during eviction'), CacheStat('cache_inmem_split', 'in-memory page splits'), - CacheStat('cache_inmem_splittable', - 'in-memory page passed criteria to be split'), - CacheStat('cache_overflow_value', - 'overflow values cached in memory', 'no_scale'), + CacheStat('cache_inmem_splittable', 'in-memory page passed criteria to be split'), + CacheStat('cache_overflow_value', 'overflow values cached in memory', 'no_scale'), CacheStat('cache_read', 'pages read into cache'), - CacheStat('cache_read_lookaside', - 'pages read into cache requiring lookaside entries'), + CacheStat('cache_read_lookaside', 'pages read into cache requiring lookaside entries'), CacheStat('cache_read_overflow', 'overflow pages read into cache'), CacheStat('cache_write', 'pages written from cache'), - CacheStat('cache_write_lookaside', - 'page written requiring lookaside records'), - CacheStat('cache_write_restore', - 'pages written requiring in-memory restoration'), + CacheStat('cache_write_lookaside', 'page written requiring lookaside records'), + CacheStat('cache_write_restore', 'pages written requiring in-memory restoration'), ########################################## # Compression statistics ########################################## - CompressStat('compress_raw_fail', - 'raw compression call failed, no additional data available'), - CompressStat('compress_raw_fail_temporary', - 'raw compression call failed, additional data available'), + CompressStat('compress_raw_fail', 'raw compression call failed, no additional data available'), + CompressStat('compress_raw_fail_temporary', 'raw compression call failed, additional data available'), CompressStat('compress_raw_ok', 'raw compression call succeeded'), CompressStat('compress_read', 'compressed pages read'), CompressStat('compress_write', 'compressed pages written'), CompressStat('compress_write_fail', 'page written failed to compress'), - CompressStat('compress_write_too_small', - 'page written was too small to compress'), + CompressStat('compress_write_too_small', 'page written was too small to compress'), ########################################## # Reconciliation statistics @@ -529,8 +439,7 @@ dsrc_stats = [ RecStat('rec_dictionary', 'dictionary matches'), RecStat('rec_multiblock_internal', 'internal page multi-block writes'), RecStat('rec_multiblock_leaf', 'leaf page multi-block writes'), - RecStat('rec_multiblock_max', - 'maximum blocks required for a page', 'max_aggregate,no_scale'), + RecStat('rec_multiblock_max', 'maximum blocks required for a page', 'max_aggregate,no_scale'), RecStat('rec_overflow_key_internal', 'internal-page overflow keys'), RecStat('rec_overflow_key_leaf', 'leaf-page overflow keys'), RecStat('rec_overflow_value', 'overflow values written'), @@ -539,10 +448,8 @@ dsrc_stats = [ RecStat('rec_page_match', 'page checksum matches'), RecStat('rec_pages', 'page reconciliation calls'), RecStat('rec_pages_eviction', 'page reconciliation calls for eviction'), - RecStat('rec_prefix_compression', - 'leaf page key bytes discarded using prefix compression'), - RecStat('rec_suffix_compression', - 'internal page key bytes discarded using suffix compression'), + RecStat('rec_prefix_compression', 'leaf page key bytes discarded using prefix compression'), + RecStat('rec_suffix_compression', 'internal page key bytes discarded using suffix compression'), ########################################## # Transaction statistics diff --git a/src/third_party/wiredtiger/src/block/block_addr.c b/src/third_party/wiredtiger/src/block/block_addr.c index 6d50e5f0f4e..9ba4ec4a8b2 100644 --- a/src/third_party/wiredtiger/src/block/block_addr.c +++ b/src/third_party/wiredtiger/src/block/block_addr.c @@ -14,7 +14,7 @@ * caller's buffer reference so it can be called repeatedly to load a buffer. */ static int -__block_buffer_to_addr(WT_BLOCK *block, +__block_buffer_to_addr(uint32_t allocsize, const uint8_t **pp, wt_off_t *offsetp, uint32_t *sizep, uint32_t *cksump) { uint64_t o, s, c; @@ -39,8 +39,8 @@ __block_buffer_to_addr(WT_BLOCK *block, *offsetp = 0; *sizep = *cksump = 0; } else { - *offsetp = (wt_off_t)(o + 1) * block->allocsize; - *sizep = (uint32_t)s * block->allocsize; + *offsetp = (wt_off_t)(o + 1) * allocsize; + *sizep = (uint32_t)s * allocsize; *cksump = (uint32_t)c; } return (0); @@ -80,7 +80,8 @@ int __wt_block_buffer_to_addr(WT_BLOCK *block, const uint8_t *p, wt_off_t *offsetp, uint32_t *sizep, uint32_t *cksump) { - return (__block_buffer_to_addr(block, &p, offsetp, sizep, cksump)); + return (__block_buffer_to_addr( + block->allocsize, &p, offsetp, sizep, cksump)); } /* @@ -139,12 +140,12 @@ __wt_block_addr_string(WT_SESSION_IMPL *session, } /* - * __wt_block_buffer_to_ckpt -- + * __block_buffer_to_ckpt -- * Convert a checkpoint cookie into its components. */ -int -__wt_block_buffer_to_ckpt(WT_SESSION_IMPL *session, - WT_BLOCK *block, const uint8_t *p, WT_BLOCK_CKPT *ci) +static int +__block_buffer_to_ckpt(WT_SESSION_IMPL *session, + uint32_t allocsize, const uint8_t *p, WT_BLOCK_CKPT *ci) { uint64_t a; const uint8_t **pp; @@ -154,13 +155,13 @@ __wt_block_buffer_to_ckpt(WT_SESSION_IMPL *session, WT_RET_MSG(session, WT_ERROR, "unsupported checkpoint version"); pp = &p; - WT_RET(__block_buffer_to_addr(block, pp, + WT_RET(__block_buffer_to_addr(allocsize, pp, &ci->root_offset, &ci->root_size, &ci->root_cksum)); - WT_RET(__block_buffer_to_addr(block, pp, + WT_RET(__block_buffer_to_addr(allocsize, pp, &ci->alloc.offset, &ci->alloc.size, &ci->alloc.cksum)); - WT_RET(__block_buffer_to_addr(block, pp, + WT_RET(__block_buffer_to_addr(allocsize, pp, &ci->avail.offset, &ci->avail.size, &ci->avail.cksum)); - WT_RET(__block_buffer_to_addr(block, pp, + WT_RET(__block_buffer_to_addr(allocsize, pp, &ci->discard.offset, &ci->discard.size, &ci->discard.cksum)); WT_RET(__wt_vunpack_uint(pp, 0, &a)); ci->file_size = (wt_off_t)a; @@ -170,6 +171,32 @@ __wt_block_buffer_to_ckpt(WT_SESSION_IMPL *session, return (0); } +/* + * __wt_block_buffer_to_ckpt -- + * Convert a checkpoint cookie into its components, block manager version. + */ +int +__wt_block_buffer_to_ckpt(WT_SESSION_IMPL *session, + WT_BLOCK *block, const uint8_t *p, WT_BLOCK_CKPT *ci) +{ + return (__block_buffer_to_ckpt(session, block->allocsize, p, ci)); +} + +/* + * __wt_block_ckpt_decode -- + * Convert a checkpoint cookie into its components, external utility + * version. + */ +int +__wt_block_ckpt_decode(WT_SESSION *wt_session, + size_t allocsize, const uint8_t *p, WT_BLOCK_CKPT *ci) +{ + WT_SESSION_IMPL *session; + + session = (WT_SESSION_IMPL *)wt_session; + return (__block_buffer_to_ckpt(session, (uint32_t)allocsize, p, ci)); +} + /* * __wt_block_ckpt_to_buffer -- * Convert the components into its checkpoint cookie. diff --git a/src/third_party/wiredtiger/src/block/block_compact.c b/src/third_party/wiredtiger/src/block/block_compact.c index d45d0a96da7..cd304b848d4 100644 --- a/src/third_party/wiredtiger/src/block/block_compact.c +++ b/src/third_party/wiredtiger/src/block/block_compact.c @@ -8,7 +8,7 @@ #include "wt_internal.h" -static int __block_dump_avail(WT_SESSION_IMPL *, WT_BLOCK *); +static int __block_dump_avail(WT_SESSION_IMPL *, WT_BLOCK *, bool); /* * __wt_block_compact_start -- @@ -22,8 +22,6 @@ __wt_block_compact_start(WT_SESSION_IMPL *session, WT_BLOCK *block) /* Switch to first-fit allocation. */ __wt_block_configure_first_fit(block, true); - block->compact_pct_tenths = 0; - return (0); } @@ -34,14 +32,21 @@ __wt_block_compact_start(WT_SESSION_IMPL *session, WT_BLOCK *block) int __wt_block_compact_end(WT_SESSION_IMPL *session, WT_BLOCK *block) { + WT_DECL_RET; + WT_UNUSED(session); /* Restore the original allocation plan. */ __wt_block_configure_first_fit(block, false); - block->compact_pct_tenths = 0; + /* Dump the results of the compaction pass. */ + if (WT_VERBOSE_ISSET(session, WT_VERB_COMPACT)) { + __wt_spin_lock(session, &block->live_lock); + ret = __block_dump_avail(session, block, false); + __wt_spin_unlock(session, &block->live_lock); + } - return (0); + return (ret); } /* @@ -70,12 +75,23 @@ __wt_block_compact_skip(WT_SESSION_IMPL *session, WT_BLOCK *block, bool *skipp) if (fh->size <= WT_MEGABYTE) return (0); + /* + * Reset the compaction state information. This is done here, not in the + * compaction "start" routine, because this function is called first to + * determine if compaction is useful. + */ + block->compact_pct_tenths = 0; + block->compact_pages_reviewed = 0; + block->compact_pages_skipped = 0; + block->compact_pages_written = 0; + __wt_spin_lock(session, &block->live_lock); + /* Dump the current state of the file. */ if (WT_VERBOSE_ISSET(session, WT_VERB_COMPACT)) - WT_ERR(__block_dump_avail(session, block)); + WT_ERR(__block_dump_avail(session, block, true)); - /* Sum the available bytes in the first 80% and 90% of the file. */ + /* Sum the available bytes in the initial 80% and 90% of the file. */ avail_eighty = avail_ninety = 0; ninety = fh->size - fh->size / 10; eighty = fh->size - ((fh->size / 10) * 2); @@ -88,23 +104,6 @@ __wt_block_compact_skip(WT_SESSION_IMPL *session, WT_BLOCK *block, bool *skipp) avail_eighty += ext->size; } - WT_ERR(__wt_verbose(session, WT_VERB_COMPACT, - "%s: %" PRIuMAX "MB (%" PRIuMAX ") available space in the first " - "80%% of the file", - block->name, - (uintmax_t)avail_eighty / WT_MEGABYTE, (uintmax_t)avail_eighty)); - WT_ERR(__wt_verbose(session, WT_VERB_COMPACT, - "%s: %" PRIuMAX "MB (%" PRIuMAX ") available space in the first " - "90%% of the file", - block->name, - (uintmax_t)avail_ninety / WT_MEGABYTE, (uintmax_t)avail_ninety)); - WT_ERR(__wt_verbose(session, WT_VERB_COMPACT, - "%s: require 10%% or %" PRIuMAX "MB (%" PRIuMAX ") in the first " - "90%% of the file to perform compaction, compaction %s", - block->name, - (uintmax_t)(fh->size / 10) / WT_MEGABYTE, (uintmax_t)fh->size / 10, - *skipp ? "skipped" : "proceeding")); - /* * Skip files where we can't recover at least 1MB. * @@ -127,6 +126,23 @@ __wt_block_compact_skip(WT_SESSION_IMPL *session, WT_BLOCK *block, bool *skipp) block->compact_pct_tenths = 1; } + WT_ERR(__wt_verbose(session, WT_VERB_COMPACT, + "%s: %" PRIuMAX "MB (%" PRIuMAX ") available space in the first " + "80%% of the file", + block->name, + (uintmax_t)avail_eighty / WT_MEGABYTE, (uintmax_t)avail_eighty)); + WT_ERR(__wt_verbose(session, WT_VERB_COMPACT, + "%s: %" PRIuMAX "MB (%" PRIuMAX ") available space in the first " + "90%% of the file", + block->name, + (uintmax_t)avail_ninety / WT_MEGABYTE, (uintmax_t)avail_ninety)); + WT_ERR(__wt_verbose(session, WT_VERB_COMPACT, + "%s: require 10%% or %" PRIuMAX "MB (%" PRIuMAX ") in the first " + "90%% of the file to perform compaction, compaction %s", + block->name, + (uintmax_t)(fh->size / 10) / WT_MEGABYTE, (uintmax_t)fh->size / 10, + *skipp ? "skipped" : "proceeding")); + err: __wt_spin_unlock(session, &block->live_lock); return (ret); @@ -177,6 +193,14 @@ __wt_block_compact_page_skip(WT_SESSION_IMPL *session, } __wt_spin_unlock(session, &block->live_lock); + if (WT_VERBOSE_ISSET(session, WT_VERB_COMPACT)) { + ++block->compact_pages_reviewed; + if (*skipp) + ++block->compact_pages_skipped; + else + ++block->compact_pages_written; + } + return (ret); } @@ -185,7 +209,7 @@ __wt_block_compact_page_skip(WT_SESSION_IMPL *session, * Dump out the avail list so we can see what compaction will look like. */ static int -__block_dump_avail(WT_SESSION_IMPL *session, WT_BLOCK *block) +__block_dump_avail(WT_SESSION_IMPL *session, WT_BLOCK *block, bool start) { WT_EXTLIST *el; WT_EXT *ext; @@ -195,6 +219,20 @@ __block_dump_avail(WT_SESSION_IMPL *session, WT_BLOCK *block) el = &block->live.avail; size = block->fh->size; + WT_RET(__wt_verbose(session, WT_VERB_COMPACT, + "============ %s", + start ? "testing for compaction" : "ending compaction pass")); + + if (!start) { + WT_RET(__wt_verbose(session, WT_VERB_COMPACT, + "pages reviewed: %" PRIuMAX, + block->compact_pages_reviewed)); + WT_RET(__wt_verbose(session, WT_VERB_COMPACT, + "pages skipped: %" PRIuMAX, block->compact_pages_skipped)); + WT_RET(__wt_verbose(session, WT_VERB_COMPACT, + "pages written: %" PRIuMAX, block->compact_pages_written)); + } + WT_RET(__wt_verbose(session, WT_VERB_COMPACT, "file size %" PRIuMAX "MB (%" PRIuMAX ") with %" PRIuMAX "%% space available %" PRIuMAX "MB (%" PRIuMAX ")", @@ -219,6 +257,10 @@ __block_dump_avail(WT_SESSION_IMPL *session, WT_BLOCK *block) } #ifdef __VERBOSE_OUTPUT_PERCENTILE + /* + * The verbose output always displays 10% buckets, running this code + * as well also displays 1% buckets. + */ for (i = 0; i < WT_ELEMENTS(percentile); ++i) { v = percentile[i] * 512; WT_RET(__wt_verbose(session, WT_VERB_COMPACT, diff --git a/src/third_party/wiredtiger/src/block/block_mgr.c b/src/third_party/wiredtiger/src/block/block_mgr.c index 7260cab75d9..f9f66e05d7f 100644 --- a/src/third_party/wiredtiger/src/block/block_mgr.c +++ b/src/third_party/wiredtiger/src/block/block_mgr.c @@ -220,6 +220,18 @@ __bm_free(WT_BM *bm, return (__wt_block_free(session, bm->block, addr, addr_size)); } +/* + * __bm_is_mapped -- + * Return if the file is mapped into memory. + */ +static bool +__bm_is_mapped(WT_BM *bm, WT_SESSION_IMPL *session) +{ + WT_UNUSED(session); + + return (bm->map == NULL ? false : true); +} + /* * __bm_stat -- * Block-manager statistics. @@ -357,6 +369,7 @@ __bm_method_set(WT_BM *bm, bool readonly) (int (*)(WT_BM *, WT_SESSION_IMPL *))__bm_readonly; bm->free = (int (*)(WT_BM *, WT_SESSION_IMPL *, const uint8_t *, size_t))__bm_readonly; + bm->is_mapped = __bm_is_mapped; bm->preload = __wt_bm_preload; bm->read = __wt_bm_read; bm->salvage_end = (int (*) @@ -367,6 +380,7 @@ __bm_method_set(WT_BM *bm, bool readonly) (WT_BM *, WT_SESSION_IMPL *))__bm_readonly; bm->salvage_valid = (int (*)(WT_BM *, WT_SESSION_IMPL *, uint8_t *, size_t, bool))__bm_readonly; + bm->size = __wt_block_manager_size; bm->stat = __bm_stat; bm->sync = (int (*)(WT_BM *, WT_SESSION_IMPL *, bool))__bm_readonly; @@ -391,12 +405,14 @@ __bm_method_set(WT_BM *bm, bool readonly) bm->compact_skip = __bm_compact_skip; bm->compact_start = __bm_compact_start; bm->free = __bm_free; + bm->is_mapped = __bm_is_mapped; bm->preload = __wt_bm_preload; bm->read = __wt_bm_read; bm->salvage_end = __bm_salvage_end; bm->salvage_next = __bm_salvage_next; bm->salvage_start = __bm_salvage_start; bm->salvage_valid = __bm_salvage_valid; + bm->size = __wt_block_manager_size; bm->stat = __bm_stat; bm->sync = __bm_sync; bm->verify_addr = __bm_verify_addr; diff --git a/src/third_party/wiredtiger/src/block/block_open.c b/src/third_party/wiredtiger/src/block/block_open.c index 7cf12d36066..ff70b765d1f 100644 --- a/src/third_party/wiredtiger/src/block/block_open.c +++ b/src/third_party/wiredtiger/src/block/block_open.c @@ -405,27 +405,37 @@ __wt_block_stat(WT_SESSION_IMPL *session, WT_BLOCK *block, WT_DSRC_STATS *stats) * Reading from the live system's structure normally requires locking, * but it's an 8B statistics read, there's no need. */ - stats->allocation_size = block->allocsize; - stats->block_checkpoint_size = (int64_t)block->live.ckpt_size; - stats->block_magic = WT_BLOCK_MAGIC; - stats->block_major = WT_BLOCK_MAJOR_VERSION; - stats->block_minor = WT_BLOCK_MINOR_VERSION; - stats->block_reuse_bytes = (int64_t)block->live.avail.bytes; - stats->block_size = block->fh->size; + WT_STAT_WRITE(stats, allocation_size, block->allocsize); + WT_STAT_WRITE( + stats, block_checkpoint_size, (int64_t)block->live.ckpt_size); + WT_STAT_WRITE(stats, block_magic, WT_BLOCK_MAGIC); + WT_STAT_WRITE(stats, block_major, WT_BLOCK_MAJOR_VERSION); + WT_STAT_WRITE(stats, block_minor, WT_BLOCK_MINOR_VERSION); + WT_STAT_WRITE( + stats, block_reuse_bytes, (int64_t)block->live.avail.bytes); + WT_STAT_WRITE(stats, block_size, block->fh->size); } /* * __wt_block_manager_size -- - * Set the size statistic for a file. + * Return the size of a live block handle. */ int -__wt_block_manager_size( - WT_SESSION_IMPL *session, const char *filename, WT_DSRC_STATS *stats) +__wt_block_manager_size(WT_BM *bm, WT_SESSION_IMPL *session, wt_off_t *sizep) { - wt_off_t filesize; - - WT_RET(__wt_filesize_name(session, filename, false, &filesize)); - stats->block_size = filesize; + WT_UNUSED(session); + *sizep = bm->block->fh == NULL ? 0 : bm->block->fh->size; return (0); } + +/* + * __wt_block_manager_named_size -- + * Return the size of a named file. + */ +int +__wt_block_manager_named_size( + WT_SESSION_IMPL *session, const char *name, wt_off_t *sizep) +{ + return (__wt_filesize_name(session, name, false, sizep)); +} diff --git a/src/third_party/wiredtiger/src/btree/bt_compact.c b/src/third_party/wiredtiger/src/btree/bt_compact.c index 8044d4f852d..8935d39b696 100644 --- a/src/third_party/wiredtiger/src/btree/bt_compact.c +++ b/src/third_party/wiredtiger/src/btree/bt_compact.c @@ -17,9 +17,11 @@ __compact_rewrite(WT_SESSION_IMPL *session, WT_REF *ref, bool *skipp) { WT_BM *bm; WT_DECL_RET; + WT_MULTI *multi; WT_PAGE *page; WT_PAGE_MODIFY *mod; size_t addr_size; + uint32_t i; const uint8_t *addr; *skipp = true; /* Default skip. */ @@ -41,29 +43,46 @@ __compact_rewrite(WT_SESSION_IMPL *session, WT_REF *ref, bool *skipp) /* * If the page is clean, test the original addresses. - * If the page is a 1-to-1 replacement, test the replacement addresses. + * If the page is a replacement, test the replacement addresses. * Ignore empty pages, they get merged into the parent. */ if (mod == NULL || mod->rec_result == 0) { __wt_ref_info(ref, &addr, &addr_size, NULL); if (addr == NULL) return (0); - WT_RET( + return ( bm->compact_page_skip(bm, session, addr, addr_size, skipp)); - } else if (mod->rec_result == WT_PM_REC_REPLACE) { - /* - * The page's modification information can change underfoot if - * the page is being reconciled, serialize with reconciliation. - */ + } + + /* + * The page's modification information can change underfoot if the page + * is being reconciled, serialize with reconciliation. + */ + if (mod->rec_result == WT_PM_REC_REPLACE || + mod->rec_result == WT_PM_REC_MULTIBLOCK) WT_RET(__wt_fair_lock(session, &page->page_lock)); + if (mod->rec_result == WT_PM_REC_REPLACE) ret = bm->compact_page_skip(bm, session, mod->mod_replace.addr, mod->mod_replace.size, skipp); + if (mod->rec_result == WT_PM_REC_MULTIBLOCK) + for (multi = mod->mod_multi, + i = 0; i < mod->mod_multi_entries; ++multi, ++i) { + if (multi->disk_image != NULL) + continue; + if ((ret = bm->compact_page_skip(bm, session, + multi->addr.addr, multi->addr.size, skipp)) != 0) + break; + if (!*skipp) + break; + } + + if (mod->rec_result == WT_PM_REC_REPLACE || + mod->rec_result == WT_PM_REC_MULTIBLOCK) WT_TRET(__wt_fair_unlock(session, &page->page_lock)); - WT_RET(ret); - } - return (0); + + return (ret); } /* @@ -139,7 +158,8 @@ __wt_compact(WT_SESSION_IMPL *session, const char *cfg[]) if (skip) continue; - session->compaction = true; + session->compact_state = WT_COMPACT_SUCCESS; + /* Rewrite the page: mark the page and tree dirty. */ WT_ERR(__wt_page_modify_init(session, ref->page)); __wt_page_modify_set(session, ref->page); diff --git a/src/third_party/wiredtiger/src/btree/bt_curnext.c b/src/third_party/wiredtiger/src/btree/bt_curnext.c index 55843d1cae5..6573bc60165 100644 --- a/src/third_party/wiredtiger/src/btree/bt_curnext.c +++ b/src/third_party/wiredtiger/src/btree/bt_curnext.c @@ -389,6 +389,14 @@ __wt_btcur_iterate_setup(WT_CURSOR_BTREE *cbt) */ cbt->page_deleted_count = 0; +#ifdef HAVE_DIAGNOSTIC + /* + * If starting a new iteration, clear the last-key returned, it doesn't + * apply. + */ + cbt->lastkey->size = 0; + cbt->lastrecno = WT_RECNO_OOB; +#endif /* * If we don't have a search page, then we're done, we're starting at * the beginning or end of the tree, not as a result of a search. @@ -430,6 +438,104 @@ __wt_btcur_iterate_setup(WT_CURSOR_BTREE *cbt) } } +#ifdef HAVE_DIAGNOSTIC +/* + * __cursor_key_order_check_col -- + * Check key ordering for column-store cursor movements. + */ +static int +__cursor_key_order_check_col( + WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt, bool next) +{ + int cmp; + + cmp = 0; /* -Werror=maybe-uninitialized */ + + if (cbt->lastrecno != WT_RECNO_OOB) { + if (cbt->lastrecno < cbt->recno) + cmp = -1; + if (cbt->lastrecno > cbt->recno) + cmp = 1; + } + + if (cbt->lastrecno == WT_RECNO_OOB || + (next && cmp < 0) || (!next && cmp > 0)) { + cbt->lastrecno = cbt->recno; + return (0); + } + + WT_PANIC_RET(session, EINVAL, + "WT_CURSOR.%s out-of-order returns: returned key %" PRIu64 " then " + "key %" PRIu64, + next ? "next" : "prev", cbt->lastrecno, cbt->recno); +} + +/* + * __cursor_key_order_check_row -- + * Check key ordering for row-store cursor movements. + */ +static int +__cursor_key_order_check_row( + WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt, bool next) +{ + WT_BTREE *btree; + WT_ITEM *key; + WT_DECL_RET; + WT_DECL_ITEM(a); + WT_DECL_ITEM(b); + int cmp; + + btree = S2BT(session); + key = &cbt->iface.key; + cmp = 0; /* -Werror=maybe-uninitialized */ + + if (cbt->lastkey->size != 0) + WT_RET(__wt_compare( + session, btree->collator, cbt->lastkey, key, &cmp)); + + if (cbt->lastkey->size == 0 || (next && cmp < 0) || (!next && cmp > 0)) + return (__wt_buf_set(session, cbt->lastkey, + cbt->iface.key.data, cbt->iface.key.size)); + + WT_ERR(__wt_scr_alloc(session, 512, &a)); + WT_ERR(__wt_buf_set_printable( + session, a, cbt->lastkey->data, cbt->lastkey->size)); + + WT_ERR(__wt_scr_alloc(session, 512, &b)); + WT_ERR(__wt_buf_set_printable(session, b, key->data, key->size)); + + WT_PANIC_ERR(session, EINVAL, + "WT_CURSOR.%s out-of-order returns: returned key %.*s then " + "key %.*s", + next ? "next" : "prev", + (int)a->size, (const char *)a->data, + (int)b->size, (const char *)b->data); + +err: __wt_scr_free(session, &a); + __wt_scr_free(session, &b); + + return (ret); +} + +/* + * __wt_cursor_key_order_check -- + * Check key ordering for cursor movements. + */ +int +__wt_cursor_key_order_check( + WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt, bool next) +{ + switch (cbt->ref->page->type) { + case WT_PAGE_COL_FIX: + case WT_PAGE_COL_VAR: + return (__cursor_key_order_check_col(session, cbt, next)); + case WT_PAGE_ROW_LEAF: + return (__cursor_key_order_check_row(session, cbt, next)); + WT_ILLEGAL_VALUE(session); + } +} +#endif + /* * __wt_btcur_next -- * Move to the next record in the tree. @@ -531,6 +637,11 @@ __wt_btcur_next(WT_CURSOR_BTREE *cbt, bool truncating) WT_ERR_TEST(cbt->ref == NULL, WT_NOTFOUND); } +#ifdef HAVE_DIAGNOSTIC + if (ret == 0) + WT_ERR(__wt_cursor_key_order_check(session, cbt, true)); +#endif + err: if (ret != 0) WT_TRET(__cursor_reset(cbt)); return (ret); diff --git a/src/third_party/wiredtiger/src/btree/bt_curprev.c b/src/third_party/wiredtiger/src/btree/bt_curprev.c index 1d23b976edd..1e4b1daa090 100644 --- a/src/third_party/wiredtiger/src/btree/bt_curprev.c +++ b/src/third_party/wiredtiger/src/btree/bt_curprev.c @@ -618,6 +618,10 @@ __wt_btcur_prev(WT_CURSOR_BTREE *cbt, bool truncating) WT_ERR(__wt_tree_walk(session, &cbt->ref, flags)); WT_ERR_TEST(cbt->ref == NULL, WT_NOTFOUND); } +#ifdef HAVE_DIAGNOSTIC + if (ret == 0) + WT_ERR(__wt_cursor_key_order_check(session, cbt, false)); +#endif err: if (ret != 0) WT_TRET(__cursor_reset(cbt)); diff --git a/src/third_party/wiredtiger/src/btree/bt_cursor.c b/src/third_party/wiredtiger/src/btree/bt_cursor.c index f2bf2978320..28b51fd2865 100644 --- a/src/third_party/wiredtiger/src/btree/bt_cursor.c +++ b/src/third_party/wiredtiger/src/btree/bt_cursor.c @@ -62,8 +62,18 @@ __cursor_size_chk(WT_SESSION_IMPL *session, WT_ITEM *kv) static inline int __cursor_fix_implicit(WT_BTREE *btree, WT_CURSOR_BTREE *cbt) { - return (btree->type == BTREE_COL_FIX && - !F_ISSET(cbt, WT_CBT_MAX_RECORD)); + /* + * When there's no exact match, column-store search returns the key + * nearest the searched-for key (continuing past keys smaller than the + * searched-for key to return the next-largest key). Therefore, if the + * returned comparison is -1, the searched-for key was larger than any + * row on the page's standard information or column-store insert list. + * + * If the returned comparison is NOT -1, there was a row equal to or + * larger than the searched-for key, and we implicitly create missing + * rows. + */ + return (btree->type == BTREE_COL_FIX && cbt->compare != -1); } /* @@ -502,19 +512,14 @@ retry: WT_RET(__cursor_func_init(cbt, true)); case BTREE_COL_VAR: /* * If WT_CURSTD_APPEND is set, insert a new record (ignoring - * the application's record number). First we search for the - * maximum possible record number so the search ends on the - * last page. The real record number is assigned by the - * serialized append operation. + * the application's record number). The real record number + * is assigned by the serialized append operation. */ if (F_ISSET(cursor, WT_CURSTD_APPEND)) - cbt->iface.recno = UINT64_MAX; + cbt->iface.recno = WT_RECNO_OOB; WT_ERR(__cursor_col_search(session, cbt, NULL)); - if (F_ISSET(cursor, WT_CURSTD_APPEND)) - cbt->iface.recno = WT_RECNO_OOB; - /* * If not overwriting, fail if the key exists. Creating a * record past the end of the tree in a fixed-length @@ -830,6 +835,7 @@ __wt_btcur_next_random(WT_CURSOR_BTREE *cbt) WT_DECL_RET; WT_SESSION_IMPL *session; WT_UPDATE *upd; + wt_off_t size; uint64_t skip; session = (WT_SESSION_IMPL *)cbt->iface.session; @@ -866,10 +872,12 @@ __wt_btcur_next_random(WT_CURSOR_BTREE *cbt) * !!! * Ideally, the number would be prime to avoid restart issues. */ - if (cbt->next_random_sample_size != 0) + if (cbt->next_random_sample_size != 0) { + WT_ERR(btree->bm->size(btree->bm, session, &size)); cbt->next_random_leaf_skip = (uint64_t) - ((btree->bm->block->fh->size / btree->allocsize) / + ((size / btree->allocsize) / cbt->next_random_sample_size) + 1; + } /* * Choose a leaf page from the tree. @@ -1225,6 +1233,11 @@ __wt_btcur_open(WT_CURSOR_BTREE *cbt) { cbt->row_key = &cbt->_row_key; cbt->tmp = &cbt->_tmp; + +#ifdef HAVE_DIAGNOSTIC + cbt->lastkey = &cbt->_lastkey; + cbt->lastrecno = WT_RECNO_OOB; +#endif } /* @@ -1250,6 +1263,9 @@ __wt_btcur_close(WT_CURSOR_BTREE *cbt, bool lowlevel) __wt_buf_free(session, &cbt->_row_key); __wt_buf_free(session, &cbt->_tmp); +#ifdef HAVE_DIAGNOSTIC + __wt_buf_free(session, &cbt->_lastkey); +#endif return (ret); } diff --git a/src/third_party/wiredtiger/src/btree/bt_debug.c b/src/third_party/wiredtiger/src/btree/bt_debug.c index d52a94a6da2..393f869ece9 100644 --- a/src/third_party/wiredtiger/src/btree/bt_debug.c +++ b/src/third_party/wiredtiger/src/btree/bt_debug.c @@ -74,9 +74,7 @@ __wt_debug_set_verbose(WT_SESSION_IMPL *session, const char *v) static inline void __debug_hex_byte(WT_DBG *ds, uint8_t v) { - static const char hex[] = "0123456789abcdef"; - - __dmsg(ds, "#%c%c", hex[(v & 0xf0) >> 4], hex[v & 0x0f]); + __dmsg(ds, "#%c%c", __wt_hex[(v & 0xf0) >> 4], __wt_hex[v & 0x0f]); } /* diff --git a/src/third_party/wiredtiger/src/btree/bt_huffman.c b/src/third_party/wiredtiger/src/btree/bt_huffman.c index d9ff9616072..a34e57796a8 100644 --- a/src/third_party/wiredtiger/src/btree/bt_huffman.c +++ b/src/third_party/wiredtiger/src/btree/bt_huffman.c @@ -332,11 +332,17 @@ __wt_huffman_read(WT_SESSION_IMPL *session, WT_CONFIG_ITEM *ip, for (tp = table, lineno = 1; (ret = fscanf(fp, "%" SCNi64 " %" SCNi64, &symbol, &frequency)) != EOF; ++tp, ++lineno) { - if (lineno > entries) + /* + * Entries is 0-based, that is, there are (entries +1) possible + * values that can be configured. The line number is 1-based, so + * adjust the test for too many entries, and report (entries +1) + * in the error as the maximum possible number of entries. + */ + if (lineno > entries + 1) WT_ERR_MSG(session, EINVAL, "Huffman table file %.*s is corrupted, " "more than %" PRIu32 " entries", - (int)ip->len, ip->str, entries); + (int)ip->len, ip->str, entries + 1); if (ret != 2) WT_ERR_MSG(session, EINVAL, "line %u of Huffman table file %.*s is corrupted: " diff --git a/src/third_party/wiredtiger/src/btree/bt_page.c b/src/third_party/wiredtiger/src/btree/bt_page.c index 8808f0b1a85..fdccf033828 100644 --- a/src/third_party/wiredtiger/src/btree/bt_page.c +++ b/src/third_party/wiredtiger/src/btree/bt_page.c @@ -272,7 +272,7 @@ __inmem_col_int(WT_SESSION_IMPL *session, WT_PAGE *page) const WT_PAGE_HEADER *dsk; WT_PAGE_INDEX *pindex; WT_REF **refp, *ref; - uint32_t i; + uint32_t hint, i; btree = S2BT(session); dsk = page->dsk; @@ -284,9 +284,11 @@ __inmem_col_int(WT_SESSION_IMPL *session, WT_PAGE *page) */ pindex = WT_INTL_INDEX_GET_SAFE(page); refp = pindex->index; + hint = 0; WT_CELL_FOREACH(btree, dsk, cell, unpack, i) { ref = *refp++; ref->home = page; + ref->pindex_hint = hint++; __wt_cell_unpack(cell, unpack); ref->addr = cell; @@ -404,7 +406,7 @@ __inmem_row_int(WT_SESSION_IMPL *session, WT_PAGE *page, size_t *sizep) const WT_PAGE_HEADER *dsk; WT_PAGE_INDEX *pindex; WT_REF *ref, **refp; - uint32_t i; + uint32_t hint, i; bool overflow_keys; btree = S2BT(session); @@ -421,9 +423,11 @@ __inmem_row_int(WT_SESSION_IMPL *session, WT_PAGE *page, size_t *sizep) pindex = WT_INTL_INDEX_GET_SAFE(page); refp = pindex->index; overflow_keys = false; + hint = 0; WT_CELL_FOREACH(btree, dsk, cell, unpack, i) { ref = *refp; ref->home = page; + ref->pindex_hint = hint++; __wt_cell_unpack(cell, unpack); switch (unpack->type) { diff --git a/src/third_party/wiredtiger/src/btree/bt_slvg.c b/src/third_party/wiredtiger/src/btree/bt_slvg.c index 756ffd98f3a..b5c299b9ea9 100644 --- a/src/third_party/wiredtiger/src/btree/bt_slvg.c +++ b/src/third_party/wiredtiger/src/btree/bt_slvg.c @@ -1807,7 +1807,7 @@ err: if (page != NULL) */ static int __slvg_row_build_internal( - WT_SESSION_IMPL *session, uint32_t leaf_cnt, WT_STUFF *ss) + WT_SESSION_IMPL *session, uint32_t leaf_cnt, WT_STUFF *ss) { WT_ADDR *addr; WT_DECL_RET; diff --git a/src/third_party/wiredtiger/src/btree/bt_split.c b/src/third_party/wiredtiger/src/btree/bt_split.c index 12f4197e9e7..69c787c9385 100644 --- a/src/third_party/wiredtiger/src/btree/bt_split.c +++ b/src/third_party/wiredtiger/src/btree/bt_split.c @@ -14,6 +14,22 @@ to_incr += __len; \ } while (0) +/* + * A note on error handling: main split functions first allocate/initialize new + * structures; failures during that period are handled by discarding the memory + * and returning an error code, the caller knows the split didn't happen and + * proceeds accordingly. Second, split functions update the tree, and a failure + * in that period is catastrophic, any partial update to the tree requires a + * panic, we can't recover. Third, once the split is complete and the tree has + * been fully updated, we have to ignore most errors, the split is complete and + * correct, callers have to proceed accordingly. + */ +typedef enum { + WT_ERR_IGNORE, /* Ignore minor errors */ + WT_ERR_PANIC, /* Panic on all errors */ + WT_ERR_RETURN /* Clean up and return error */ +} WT_SPLIT_ERROR_PHASE; + /* * __split_oldest_gen -- * Calculate the oldest active split generation. @@ -512,25 +528,13 @@ __split_root(WT_SESSION_IMPL *session, WT_PAGE *root) WT_PAGE_INDEX *alloc_index, *child_pindex, *pindex; WT_REF **alloc_refp; WT_REF **child_refp, *ref, **root_refp; + WT_SPLIT_ERROR_PHASE complete; size_t child_incr, root_decr, root_incr, size; uint64_t split_gen; uint32_t children, chunk, i, j, remain; uint32_t slots; void *p; - /* - * A note on error handling: this function first allocates/initializes - * new structures; failures during that period are handled by discarding - * the memory and returning an error code, our caller knows the split - * didn't happen and proceeds accordingly. Second, this function updates - * the tree, and a failure in that period is catastrophic, any partial - * update to the tree requires a panic, we can't recover. Third, once - * the split is complete and the tree has been fully updated, we have to - * ignore most errors because the split is complete and correct, callers - * have to proceed accordingly. - */ - enum { ERR_RETURN, ERR_PANIC, ERR_IGNORE } complete; - WT_STAT_FAST_CONN_INCR(session, cache_eviction_deepen); WT_STAT_FAST_DATA_INCR(session, cache_eviction_deepen); WT_STAT_FAST_CONN_INCR(session, cache_eviction_split_internal); @@ -539,7 +543,7 @@ __split_root(WT_SESSION_IMPL *session, WT_PAGE *root) btree = S2BT(session); alloc_index = NULL; root_decr = root_incr = 0; - complete = ERR_RETURN; + complete = WT_ERR_RETURN; /* The root page will be marked dirty, make sure that will succeed. */ WT_RET(__wt_page_modify_init(session, root)); @@ -623,7 +627,7 @@ __split_root(WT_SESSION_IMPL *session, WT_PAGE *root) * threads may be underneath us right now changing the structure * state.) However, if the WT_REF structures reference on-page * information, we have to fix that, because the disk image for - * the page that has an page index entry for the WT_REF is about + * the page that has a page index entry for the WT_REF is about * to change. */ child_pindex = WT_INTL_INDEX_GET_SAFE(child); @@ -641,7 +645,7 @@ __split_root(WT_SESSION_IMPL *session, WT_PAGE *root) root_refp - pindex->index == (ptrdiff_t)pindex->entries); /* Start making real changes to the tree, errors are fatal. */ - complete = ERR_PANIC; + complete = WT_ERR_PANIC; /* Prepare the WT_REFs for the move. */ __split_ref_step1(session, alloc_index, false); @@ -661,7 +665,7 @@ __split_root(WT_SESSION_IMPL *session, WT_PAGE *root) WT_ERR(__split_ref_step2(session, alloc_index, false)); /* The split is complete and correct, ignore benign errors. */ - complete = ERR_IGNORE; + complete = WT_ERR_IGNORE; /* We've installed the allocated page-index, ensure error handling. */ alloc_index = NULL; @@ -687,15 +691,15 @@ __split_root(WT_SESSION_IMPL *session, WT_PAGE *root) __wt_page_modify_set(session, root); err: switch (complete) { - case ERR_RETURN: + case WT_ERR_RETURN: __wt_free_ref_index(session, root, alloc_index, true); break; - case ERR_PANIC: + case WT_ERR_PANIC: __wt_err(session, ret, "fatal error during root page split to deepen the tree"); ret = WT_PANIC; break; - case ERR_IGNORE: + case WT_ERR_IGNORE: if (ret != 0 && ret != WT_PANIC) { __wt_err(session, ret, "ignoring not-fatal error during root page split " @@ -721,19 +725,21 @@ __split_parent(WT_SESSION_IMPL *session, WT_REF *ref, WT_REF **ref_new, WT_PAGE *parent; WT_PAGE_INDEX *alloc_index, *pindex; WT_REF **alloc_refp, *next_ref; + WT_SPLIT_ERROR_PHASE complete; size_t parent_decr, size; uint64_t split_gen; - uint32_t i, j; + uint32_t hint, i, j; uint32_t deleted_entries, parent_entries, result_entries; uint32_t *deleted_refs; - bool complete, empty_parent; + bool empty_parent; parent = ref->home; alloc_index = pindex = NULL; parent_decr = 0; parent_entries = 0; - complete = empty_parent = false; + empty_parent = false; + complete = WT_ERR_RETURN; /* The parent page will be marked dirty, make sure that will succeed. */ WT_RET(__wt_page_modify_init(session, parent)); @@ -751,7 +757,7 @@ __split_parent(WT_SESSION_IMPL *session, WT_REF *ref, WT_REF **ref_new, * array anyway. Switch them to the special split state, so that any * reading thread will restart. */ - WT_RET(__wt_scr_alloc(session, 10 * sizeof(uint32_t), &scr)); + WT_ERR(__wt_scr_alloc(session, 10 * sizeof(uint32_t), &scr)); for (deleted_entries = 0, i = 0; i < parent_entries; ++i) { next_ref = pindex->index[i]; WT_ASSERT(session, next_ref->state != WT_REF_SPLIT); @@ -791,28 +797,40 @@ __split_parent(WT_SESSION_IMPL *session, WT_REF *ref, WT_REF **ref_new, * Allocate and initialize a new page index array for the parent, then * copy references from the original index array, plus references from * the newly created split array, into place. + * + * Update the WT_REF's page-index hint as we go. This can race with a + * thread setting the hint based on an older page-index, and the change + * isn't backed out in the case of an error, so there ways for the hint + * to be wrong; OK because it's just a hint. */ size = sizeof(WT_PAGE_INDEX) + result_entries * sizeof(WT_REF *); WT_ERR(__wt_calloc(session, 1, size, &alloc_index)); parent_incr += size; alloc_index->index = (WT_REF **)(alloc_index + 1); alloc_index->entries = result_entries; - for (alloc_refp = alloc_index->index, i = 0; i < parent_entries; ++i) { + for (alloc_refp = alloc_index->index, + hint = i = 0; i < parent_entries; ++i) { next_ref = pindex->index[i]; if (next_ref == ref) for (j = 0; j < new_entries; ++j) { ref_new[j]->home = parent; + ref_new[j]->pindex_hint = hint++; *alloc_refp++ = ref_new[j]; } - else if (next_ref->state != WT_REF_SPLIT) + else if (next_ref->state != WT_REF_SPLIT) { /* Skip refs we have marked for deletion. */ + next_ref->pindex_hint = hint++; *alloc_refp++ = next_ref; + } } /* Check that we filled in all the entries. */ WT_ASSERT(session, alloc_refp - alloc_index->index == (ptrdiff_t)result_entries); + /* Start making real changes to the tree, errors are fatal. */ + complete = WT_ERR_PANIC; + /* * Confirm the parent page's index hasn't moved then update it, which * makes the split visible to threads descending the tree. @@ -853,16 +871,8 @@ __split_parent(WT_SESSION_IMPL *session, WT_REF *ref, WT_REF **ref_new, */ WT_FULL_BARRIER(); - /* - * A note on error handling: failures before we swapped the new page - * index into the parent can be resolved by freeing allocated memory - * because the original page is unchanged, we can continue to use it - * and we have not yet modified the parent. Failures after we swap - * the new page index into the parent are also relatively benign, the - * split is OK and complete. For those reasons, we ignore errors past - * this point unless there's a panic. - */ - complete = true; + /* The split is complete and correct, ignore benign errors. */ + complete = WT_ERR_IGNORE; WT_ERR(__wt_verbose(session, WT_VERB_SPLIT, "%p: %s %s" "split into parent %p, %" PRIu32 " -> %" PRIu32 @@ -946,7 +956,8 @@ err: __wt_scr_free(session, &scr); * nothing really bad can have happened, and our caller has to proceed * with the split. */ - if (!complete) { + switch (complete) { + case WT_ERR_RETURN: for (i = 0; i < parent_entries; ++i) { next_ref = pindex->index[i]; if (next_ref->state == WT_REF_SPLIT) @@ -954,20 +965,28 @@ err: __wt_scr_free(session, &scr); } __wt_free_ref_index(session, NULL, alloc_index, false); - /* * The split couldn't proceed because the parent would be empty, * return EBUSY so our caller knows to unlock the WT_REF that's * being deleted, but don't be noisy, there's nothing wrong. */ if (empty_parent) - return (EBUSY); + ret = EBUSY; + break; + case WT_ERR_PANIC: + __wt_err(session, ret, "fatal error during parent page split"); + ret = WT_PANIC; + break; + case WT_ERR_IGNORE: + if (ret != 0 && ret != WT_PANIC) { + __wt_err(session, ret, + "ignoring not-fatal error during parent page " + "split"); + ret = 0; + } + break; } - - if (ret != 0 && ret != WT_PANIC) - __wt_err(session, ret, - "ignoring not-fatal error during parent page split"); - return (ret == WT_PANIC || !complete ? ret : 0); + return (ret); } /* @@ -983,25 +1002,13 @@ __split_internal(WT_SESSION_IMPL *session, WT_PAGE *parent, WT_PAGE *page) WT_PAGE_INDEX *alloc_index, *child_pindex, *pindex, *replace_index; WT_REF **alloc_refp; WT_REF **child_refp, *page_ref, **page_refp, *ref; + WT_SPLIT_ERROR_PHASE complete; size_t child_incr, page_decr, page_incr, parent_incr, size; uint64_t split_gen; uint32_t children, chunk, i, j, remain; uint32_t slots; void *p; - /* - * A note on error handling: this function first allocates/initializes - * new structures; failures during that period are handled by discarding - * the memory and returning an error code, our caller knows the split - * didn't happen and proceeds accordingly. Second, this function updates - * the tree, and a failure in that period is catastrophic, any partial - * update to the tree requires a panic, we can't recover. Third, once - * the split is complete and the tree has been fully updated, we have to - * ignore most errors because the split is complete and correct, callers - * have to proceed accordingly. - */ - enum { ERR_RETURN, ERR_PANIC, ERR_IGNORE } complete; - WT_STAT_FAST_CONN_INCR(session, cache_eviction_split_internal); WT_STAT_FAST_DATA_INCR(session, cache_eviction_split_internal); @@ -1012,7 +1019,7 @@ __split_internal(WT_SESSION_IMPL *session, WT_PAGE *parent, WT_PAGE *page) alloc_index = replace_index = NULL; page_ref = page->pg_intl_parent_ref; page_decr = page_incr = parent_incr = 0; - complete = ERR_RETURN; + complete = WT_ERR_RETURN; /* * Our caller is holding the page locked to single-thread splits, which @@ -1133,7 +1140,7 @@ __split_internal(WT_SESSION_IMPL *session, WT_PAGE *parent, WT_PAGE *page) page_refp - pindex->index == (ptrdiff_t)pindex->entries); /* Start making real changes to the tree, errors are fatal. */ - complete = ERR_PANIC; + complete = WT_ERR_PANIC; /* Prepare the WT_REFs for the move. */ __split_ref_step1(session, alloc_index, true); @@ -1157,7 +1164,7 @@ __split_internal(WT_SESSION_IMPL *session, WT_PAGE *parent, WT_PAGE *page) WT_ERR(__split_ref_step2(session, alloc_index, true)); /* The split is complete and correct, ignore benign errors. */ - complete = ERR_IGNORE; + complete = WT_ERR_IGNORE; /* * Push out the changes: not required for correctness, but no reason @@ -1193,16 +1200,16 @@ __split_internal(WT_SESSION_IMPL *session, WT_PAGE *parent, WT_PAGE *page) __wt_page_modify_set(session, page); err: switch (complete) { - case ERR_RETURN: + case WT_ERR_RETURN: __wt_free_ref_index(session, page, alloc_index, true); __wt_free_ref_index(session, page, replace_index, false); break; - case ERR_PANIC: + case WT_ERR_PANIC: __wt_err(session, ret, "fatal error during internal page split"); ret = WT_PANIC; break; - case ERR_IGNORE: + case WT_ERR_IGNORE: if (ret != 0 && ret != WT_PANIC) { __wt_err(session, ret, "ignoring not-fatal error during internal page " diff --git a/src/third_party/wiredtiger/src/btree/bt_stat.c b/src/third_party/wiredtiger/src/btree/bt_stat.c index 5dd75835b0b..ef70160aa72 100644 --- a/src/third_party/wiredtiger/src/btree/bt_stat.c +++ b/src/third_party/wiredtiger/src/btree/bt_stat.c @@ -35,10 +35,10 @@ __wt_btree_stat_init(WT_SESSION_IMPL *session, WT_CURSOR_STAT *cst) WT_STAT_SET(session, stats, btree_fixed_len, btree->bitcnt); WT_STAT_SET(session, stats, btree_maximum_depth, btree->maximum_depth); - WT_STAT_SET(session, stats, btree_maxintlpage, btree->maxintlpage); WT_STAT_SET(session, stats, btree_maxintlkey, btree->maxintlkey); - WT_STAT_SET(session, stats, btree_maxleafpage, btree->maxleafpage); + WT_STAT_SET(session, stats, btree_maxintlpage, btree->maxintlpage); WT_STAT_SET(session, stats, btree_maxleafkey, btree->maxleafkey); + WT_STAT_SET(session, stats, btree_maxleafpage, btree->maxleafpage); WT_STAT_SET(session, stats, btree_maxleafvalue, btree->maxleafvalue); /* Everything else is really, really expensive. */ diff --git a/src/third_party/wiredtiger/src/btree/col_srch.c b/src/third_party/wiredtiger/src/btree/col_srch.c index e9fa570f97b..c5e2abbe440 100644 --- a/src/third_party/wiredtiger/src/btree/col_srch.c +++ b/src/third_party/wiredtiger/src/btree/col_srch.c @@ -8,13 +8,61 @@ #include "wt_internal.h" +/* + * __check_leaf_key_range -- + * Check the search key is in the leaf page's key range. + */ +static inline int +__check_leaf_key_range(WT_SESSION_IMPL *session, + uint64_t recno, WT_REF *leaf, WT_CURSOR_BTREE *cbt) +{ + WT_PAGE_INDEX *pindex; + uint32_t indx; + + /* + * There are reasons we can't do the fast checks, and we continue with + * the leaf page search in those cases, only skipping the complete leaf + * page search if we know it's not going to work. + */ + cbt->compare = 0; + + /* + * Check if the search key is smaller than the parent's starting key for + * this page. + */ + if (recno < leaf->key.recno) { + cbt->compare = 1; /* page keys > search key */ + return (0); + } + + /* + * Check if the search key is greater than or equal to the starting key + * for the parent's next page. + * + * !!! + * Check that "indx + 1" is a valid page-index entry first, because it + * also checks that "indx" is a valid page-index entry, and we have to + * do that latter check before looking at the indx slot of the array + * for a match to leaf (in other words, our page hint might be wrong). + */ + WT_INTL_INDEX_GET(session, leaf->home, pindex); + indx = leaf->pindex_hint; + if (indx + 1 < pindex->entries && pindex->index[indx] == leaf) + if (recno >= pindex->index[indx + 1]->key.recno) { + cbt->compare = -1; /* page keys < search key */ + return (0); + } + + return (0); +} + /* * __wt_col_search -- * Search a column-store tree for a specific record-based key. */ int __wt_col_search(WT_SESSION_IMPL *session, - uint64_t recno, WT_REF *leaf, WT_CURSOR_BTREE *cbt) + uint64_t search_recno, WT_REF *leaf, WT_CURSOR_BTREE *cbt) { WT_BTREE *btree; WT_COL *cip; @@ -24,6 +72,7 @@ __wt_col_search(WT_SESSION_IMPL *session, WT_PAGE *page; WT_PAGE_INDEX *pindex, *parent_pindex; WT_REF *current, *descent; + uint64_t recno; uint32_t base, indx, limit; int depth; @@ -31,8 +80,38 @@ __wt_col_search(WT_SESSION_IMPL *session, __cursor_pos_clear(cbt); - /* We may only be searching a single leaf page, not the full tree. */ + /* + * When appending a new record, the search record number will be an + * out-of-band value, search for the largest key in the table instead. + */ + if ((recno = search_recno) == WT_RECNO_OOB) + recno = UINT64_MAX; + + /* + * We may be searching only a single leaf page, not the full tree. In + * the normal case where the page links to a parent, check the page's + * parent keys before doing the full search, it's faster when the + * cursor is being re-positioned. (One case where the page doesn't + * have a parent is if it is being re-instantiated in memory as part + * of a split). + */ if (leaf != NULL) { + WT_ASSERT(session, search_recno != WT_RECNO_OOB); + + if (leaf->home != NULL) { + WT_RET(__check_leaf_key_range( + session, recno, leaf, cbt)); + if (cbt->compare != 0) { + /* + * !!! + * WT_CURSOR.search_near uses the slot value to + * decide if there was an on-page match. + */ + cbt->slot = 0; + return (0); + } + } + current = leaf; goto leaf_only; } @@ -120,7 +199,17 @@ leaf_only: page = current->page; cbt->ref = current; cbt->recno = recno; - cbt->compare = 0; + + /* + * Don't bother searching if the caller is appending a new record where + * we'll allocate the record number; we're not going to find a match by + * definition, and we figure out the record number and position when we + * do the work. + */ + if (search_recno == WT_RECNO_OOB) { + cbt->compare = -1; + return (0); + } /* * Set the on-page slot to an impossible value larger than any possible @@ -142,6 +231,7 @@ leaf_only: * that's impossibly large for the page. We do have additional setup to * do in that case, the record may be appended to the page. */ + cbt->compare = 0; if (page->type == WT_PAGE_COL_FIX) { if (recno < page->pg_fix_recno) { cbt->compare = 1; @@ -190,18 +280,10 @@ past_end: * This is a rarely used path: we normally find exact matches, because * column-store files are dense, but in this case the caller searched * past the end of the table. - * - * Don't bother searching if the caller is appending a new record where - * we'll allocate the record number; we're not going to find a match by - * definition, and we figure out the position when we do the work. */ cbt->ins_head = WT_COL_APPEND(page); - if (recno == UINT64_MAX) - cbt->ins = NULL; - else - cbt->ins = __col_insert_search( - cbt->ins_head, cbt->ins_stack, cbt->next_stack, recno); - if (cbt->ins == NULL) + if ((cbt->ins = __col_insert_search( + cbt->ins_head, cbt->ins_stack, cbt->next_stack, recno)) == NULL) cbt->compare = -1; else { cbt->recno = WT_INSERT_RECNO(cbt->ins); @@ -212,14 +294,5 @@ past_end: else cbt->compare = -1; } - - /* - * Note if the record is past the maximum record in the tree, the cursor - * search functions need to know for fixed-length column-stores because - * appended records implicitly create any skipped records, and cursor - * search functions have to handle that case. - */ - if (cbt->compare == -1) - F_SET(cbt, WT_CBT_MAX_RECORD); return (0); } diff --git a/src/third_party/wiredtiger/src/btree/row_srch.c b/src/third_party/wiredtiger/src/btree/row_srch.c index 079f9d3bad1..e98d30152ab 100644 --- a/src/third_party/wiredtiger/src/btree/row_srch.c +++ b/src/third_party/wiredtiger/src/btree/row_srch.c @@ -131,6 +131,76 @@ __wt_search_insert( return (0); } +/* + * __check_leaf_key_range -- + * Check the search key is in the leaf page's key range. + */ +static inline int +__check_leaf_key_range(WT_SESSION_IMPL *session, + WT_ITEM *srch_key, WT_REF *leaf, WT_CURSOR_BTREE *cbt) +{ + WT_BTREE *btree; + WT_COLLATOR *collator; + WT_ITEM *item; + WT_PAGE_INDEX *pindex; + uint32_t indx; + int cmp; + + btree = S2BT(session); + collator = btree->collator; + item = cbt->tmp; + + /* + * There are reasons we can't do the fast checks, and we continue with + * the leaf page search in those cases, only skipping the complete leaf + * page search if we know it's not going to work. + */ + cbt->compare = 0; + + /* + * First, confirm we have the right parent page-index slot, and quit if + * we don't. We don't search for the correct slot, that would make this + * cheap test expensive. + */ + WT_INTL_INDEX_GET(session, leaf->home, pindex); + indx = leaf->pindex_hint; + if (indx >= pindex->entries || pindex->index[indx] != leaf) + return (0); + + /* + * Check if the search key is smaller than the parent's starting key for + * this page. + * + * We can't compare against slot 0 on a row-store internal page because + * reconciliation doesn't build it, it may not be a valid key. + */ + if (indx != 0) { + __wt_ref_key(leaf->home, leaf, &item->data, &item->size); + WT_RET(__wt_compare(session, collator, srch_key, item, &cmp)); + if (cmp < 0) { + cbt->compare = 1; /* page keys > search key */ + return (0); + } + } + + /* + * Check if the search key is greater than or equal to the starting key + * for the parent's next page. + */ + ++indx; + if (indx < pindex->entries) { + __wt_ref_key( + leaf->home, pindex->index[indx], &item->data, &item->size); + WT_RET(__wt_compare(session, collator, srch_key, item, &cmp)); + if (cmp >= 0) { + cbt->compare = -1; /* page keys < search key */ + return (0); + } + } + + return (0); +} + /* * __wt_row_search -- * Search a row-store tree for a specific key. @@ -179,8 +249,29 @@ __wt_row_search(WT_SESSION_IMPL *session, append_check = insert && cbt->append_tree; descend_right = true; - /* We may only be searching a single leaf page, not the full tree. */ + /* + * We may be searching only a single leaf page, not the full tree. In + * the normal case where the page links to a parent, check the page's + * parent keys before doing the full search, it's faster when the + * cursor is being re-positioned. (One case where the page doesn't + * have a parent is if it is being re-instantiated in memory as part + * of a split). + */ if (leaf != NULL) { + if (leaf->home != NULL) { + WT_RET(__check_leaf_key_range( + session, srch_key, leaf, cbt)); + if (cbt->compare != 0) { + /* + * !!! + * WT_CURSOR.search_near uses the slot value to + * decide if there was an on-page match. + */ + cbt->slot = 0; + return (0); + } + } + current = leaf; goto leaf_only; } @@ -196,15 +287,6 @@ restart_page: page = current->page; WT_INTL_INDEX_GET(session, page, pindex); - /* - * Fast-path internal pages with one child, a common case for - * the root page in new trees. - */ - if (pindex->entries == 1) { - descent = pindex->index[0]; - goto descend; - } - /* Fast-path appends. */ if (append_check) { descent = pindex->index[pindex->entries - 1]; @@ -542,12 +624,18 @@ err: /* int __wt_row_random_leaf(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt) { - WT_INSERT *p, *t; + WT_INSERT *ins, **start, **stop; + WT_INSERT_HEAD *ins_head; WT_PAGE *page; - uint32_t cnt; + uint32_t choice, entries, i; + int level; page = cbt->ref->page; + start = stop = NULL; /* [-Wconditional-uninitialized] */ + entries = 0; /* [-Wconditional-uninitialized] */ + + /* If the page has disk-based entries, select from them. */ if (page->pg_row_entries != 0) { cbt->compare = 0; cbt->slot = __wt_random(&session->rnd) % page->pg_row_entries; @@ -562,24 +650,115 @@ __wt_row_random_leaf(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt) /* * If the tree is new (and not empty), it might have a large insert - * list. Count how many records are in the list. + * list. */ F_SET(cbt, WT_CBT_SEARCH_SMALLEST); if ((cbt->ins_head = WT_ROW_INSERT_SMALLEST(page)) == NULL) return (WT_NOTFOUND); - for (cnt = 1, p = WT_SKIP_FIRST(cbt->ins_head);; ++cnt) - if ((p = WT_SKIP_NEXT(p)) == NULL) - break; /* - * Select a random number from 0 to (N - 1), return that record. + * Walk down the list until we find a level with at least 50 entries, + * that's where we'll start rolling random numbers. The value 50 is + * used to ignore levels with only a few entries, that is, levels which + * are potentially badly skewed. */ - cnt = __wt_random(&session->rnd) % cnt; - for (p = t = WT_SKIP_FIRST(cbt->ins_head);; t = p) - if (cnt-- == 0 || (p = WT_SKIP_NEXT(p)) == NULL) + for (ins_head = cbt->ins_head, + level = WT_SKIP_MAXDEPTH - 1; level >= 0; --level) { + start = &ins_head->head[level]; + for (entries = 0, stop = start; + *stop != NULL; stop = &(*stop)->next[level]) + ++entries; + + if (entries > 50) break; + } + + /* + * If it's a tiny list and we went all the way to level 0, correct the + * level; entries is correctly set. + */ + if (level < 0) + level = 0; + + /* + * Step down the skip list levels, selecting a random chunk of the name + * space at each level. + */ + while (level > 0) { + /* + * There are (entries) or (entries + 1) chunks of the name space + * considered at each level. They are: between start and the 1st + * element, between the 1st and 2nd elements, and so on to the + * last chunk which is the name space after the stop element on + * the current level. This last chunk of name space may or may + * not be there: as we descend the levels of the skip list, this + * chunk may appear, depending if the next level down has + * entries logically after the stop point in the current level. + * We can't ignore those entries: because of the algorithm used + * to determine the depth of a skiplist, there may be a large + * number of entries "revealed" by descending a level. + * + * If the next level down has more items after the current stop + * point, there are (entries + 1) chunks to consider, else there + * are (entries) chunks. + */ + if (*(stop - 1) == NULL) + choice = __wt_random(&session->rnd) % entries; + else + choice = __wt_random(&session->rnd) % (entries + 1); + + if (choice == entries) { + /* + * We selected the name space after the stop element on + * this level. Set the start point to the current stop + * point, descend a level and move the stop element to + * the end of the list, that is, the end of the newly + * discovered name space, counting entries as we go. + */ + start = stop; + --start; + --level; + for (entries = 0, stop = start; + *stop != NULL; stop = &(*stop)->next[level]) + ++entries; + } else { + /* + * We selected another name space on the level. Move the + * start pointer the selected number of entries forward + * to the start of the selected chunk (if the selected + * number is 0, start won't move). Set the stop pointer + * to the next element in the list and drop both start + * and stop down a level. + */ + for (i = 0; i < choice; ++i) + start = &(*start)->next[level]; + stop = &(*start)->next[level]; + + --start; + --stop; + --level; + + /* Count the entries in the selected name space. */ + for (entries = 0, + ins = *start; ins != *stop; ins = ins->next[level]) + ++entries; + } + } + + /* + * When we reach the bottom level, entries will already be set. Select + * a random entry from the name space and return it. + * + * It should be impossible for the entries count to be 0 at this point, + * but check for it out of paranoia and to quiet static testing tools. + */ + if (entries > 0) + entries = __wt_random(&session->rnd) % entries; + for (ins = *start; entries > 0; --entries) + ins = ins->next[0]; + + cbt->ins = ins; cbt->compare = 0; - cbt->ins = t; return (0); } diff --git a/src/third_party/wiredtiger/src/cache/cache_las.c b/src/third_party/wiredtiger/src/cache/cache_las.c index d3a0265c13a..e943f01236e 100644 --- a/src/third_party/wiredtiger/src/cache/cache_las.c +++ b/src/third_party/wiredtiger/src/cache/cache_las.c @@ -18,6 +18,7 @@ __wt_las_stats_update(WT_SESSION_IMPL *session) WT_CONNECTION_IMPL *conn; WT_CONNECTION_STATS **cstats; WT_DSRC_STATS **dstats; + int64_t v; conn = S2C(session); @@ -37,10 +38,10 @@ __wt_las_stats_update(WT_SESSION_IMPL *session) dstats = ((WT_CURSOR_BTREE *) conn->las_session->las_cursor)->btree->dhandle->stats; - WT_STAT_SET(session, cstats, - cache_lookaside_insert, WT_STAT_READ(dstats, cursor_insert)); - WT_STAT_SET(session, cstats, - cache_lookaside_remove, WT_STAT_READ(dstats, cursor_remove)); + v = WT_STAT_READ(dstats, cursor_insert); + WT_STAT_SET(session, cstats, cache_lookaside_insert, v); + v = WT_STAT_READ(dstats, cursor_remove); + WT_STAT_SET(session, cstats, cache_lookaside_remove, v); } /* diff --git a/src/third_party/wiredtiger/src/conn/conn_api.c b/src/third_party/wiredtiger/src/conn/conn_api.c index bd14e1bf4fd..ee9935828e2 100644 --- a/src/third_party/wiredtiger/src/conn/conn_api.c +++ b/src/third_party/wiredtiger/src/conn/conn_api.c @@ -2003,6 +2003,9 @@ wiredtiger_open(const char *home, WT_EVENT_HANDLER *event_handler, WT_ERR(__wt_sweep_config(session, cfg)); WT_ERR(__wt_verbose_config(session, cfg)); + /* Initialize the OS page size for mmap */ + conn->page_size = __wt_get_vm_pagesize(); + /* Now that we know if verbose is configured, output the version. */ WT_ERR(__wt_verbose( session, WT_VERB_VERSION, "%s", WIREDTIGER_VERSION_STRING)); diff --git a/src/third_party/wiredtiger/src/conn/conn_dhandle.c b/src/third_party/wiredtiger/src/conn/conn_dhandle.c index c6d5b535b86..0821238fbd7 100644 --- a/src/third_party/wiredtiger/src/conn/conn_dhandle.c +++ b/src/third_party/wiredtiger/src/conn/conn_dhandle.c @@ -147,12 +147,14 @@ __conn_dhandle_mark_dead(WT_SESSION_IMPL *session) int __wt_conn_btree_sync_and_close(WT_SESSION_IMPL *session, bool final, bool force) { + WT_BM *bm; WT_BTREE *btree; WT_DATA_HANDLE *dhandle; WT_DECL_RET; bool marked_dead, no_schema_lock; btree = S2BT(session); + bm = btree->bm; dhandle = session->dhandle; marked_dead = false; @@ -191,7 +193,7 @@ __wt_conn_btree_sync_and_close(WT_SESSION_IMPL *session, bool final, bool force) */ if (!F_ISSET(btree, WT_BTREE_SALVAGE | WT_BTREE_UPGRADE | WT_BTREE_VERIFY)) { - if (force && (btree->bm == NULL || btree->bm->map == NULL)) { + if (force && (bm == NULL || !bm->is_mapped(bm, session))) { WT_ERR(__conn_dhandle_mark_dead(session)); marked_dead = true; } diff --git a/src/third_party/wiredtiger/src/cursor/cur_bulk.c b/src/third_party/wiredtiger/src/cursor/cur_bulk.c index b996b934464..db64d2ad498 100644 --- a/src/third_party/wiredtiger/src/cursor/cur_bulk.c +++ b/src/third_party/wiredtiger/src/cursor/cur_bulk.c @@ -8,6 +8,25 @@ #include "wt_internal.h" +/* + * __bulk_col_keycmp_err -- + * Error routine when column-store keys inserted out-of-order. + */ +static int +__bulk_col_keycmp_err(WT_CURSOR_BULK *cbulk) +{ + WT_CURSOR *cursor; + WT_SESSION_IMPL *session; + + session = (WT_SESSION_IMPL *)cbulk->cbt.iface.session; + cursor = &cbulk->cbt.iface; + + WT_RET_MSG(session, EINVAL, + "bulk-load presented with out-of-order keys: %" PRIu64 " is less " + "than previously inserted key %" PRIu64, + cursor->recno, cbulk->recno); +} + /* * __curbulk_insert_fix -- * Fixed-length column-store bulk cursor insert. @@ -19,6 +38,7 @@ __curbulk_insert_fix(WT_CURSOR *cursor) WT_CURSOR_BULK *cbulk; WT_DECL_RET; WT_SESSION_IMPL *session; + uint64_t recno; cbulk = (WT_CURSOR_BULK *)cursor; btree = cbulk->cbt.btree; @@ -29,13 +49,63 @@ __curbulk_insert_fix(WT_CURSOR *cursor) * until the bulk cursor is closed. */ CURSOR_API_CALL(cursor, session, insert, btree); + WT_STAT_FAST_DATA_INCR(session, cursor_insert_bulk); - WT_CURSOR_NEEDVALUE(cursor); + /* + * If the "append" flag was configured, the application doesn't have to + * supply a key, else require a key. + */ + if (F_ISSET(cursor, WT_CURSTD_APPEND)) + recno = cbulk->recno + 1; + else { + WT_CURSOR_CHECKKEY(cursor); + if ((recno = cursor->recno) <= cbulk->recno) + WT_ERR(__bulk_col_keycmp_err(cbulk)); + } + WT_CURSOR_CHECKVALUE(cursor); - WT_ERR(__wt_bulk_insert_fix(session, cbulk)); + /* + * Insert any skipped records as deleted records, update the current + * record count. + */ + for (; recno != cbulk->recno + 1; ++cbulk->recno) + WT_ERR(__wt_bulk_insert_fix(session, cbulk, true)); + cbulk->recno = recno; + + /* Insert the current record. */ + ret = __wt_bulk_insert_fix(session, cbulk, false); + +err: API_END_RET(session, ret); +} + +/* + * __curbulk_insert_fix_bitmap -- + * Fixed-length column-store bulk cursor insert for bitmaps. + */ +static int +__curbulk_insert_fix_bitmap(WT_CURSOR *cursor) +{ + WT_BTREE *btree; + WT_CURSOR_BULK *cbulk; + WT_DECL_RET; + WT_SESSION_IMPL *session; + cbulk = (WT_CURSOR_BULK *)cursor; + btree = cbulk->cbt.btree; + + /* + * Bulk cursor inserts are updates, but don't need auto-commit + * transactions because they are single-threaded and not visible + * until the bulk cursor is closed. + */ + CURSOR_API_CALL(cursor, session, insert, btree); WT_STAT_FAST_DATA_INCR(session, cursor_insert_bulk); + WT_CURSOR_CHECKVALUE(cursor); + + /* Insert the current record. */ + ret = __wt_bulk_insert_fix_bitmap(session, cbulk); + err: API_END_RET(session, ret); } @@ -50,7 +120,7 @@ __curbulk_insert_var(WT_CURSOR *cursor) WT_CURSOR_BULK *cbulk; WT_DECL_RET; WT_SESSION_IMPL *session; - bool duplicate; + uint64_t recno; cbulk = (WT_CURSOR_BULK *)cursor; btree = cbulk->cbt.btree; @@ -61,45 +131,63 @@ __curbulk_insert_var(WT_CURSOR *cursor) * until the bulk cursor is closed. */ CURSOR_API_CALL(cursor, session, insert, btree); - - WT_CURSOR_NEEDVALUE(cursor); + WT_STAT_FAST_DATA_INCR(session, cursor_insert_bulk); /* - * If this isn't the first value inserted, compare it against the last - * value and increment the RLE count. - * - * Instead of a "first time" variable, I'm using the RLE count, because - * it is only zero before the first row is inserted. + * If the "append" flag was configured, the application doesn't have to + * supply a key, else require a key. */ - duplicate = false; - if (cbulk->rle != 0) { - if (cbulk->last.size == cursor->value.size && - memcmp(cbulk->last.data, cursor->value.data, - cursor->value.size) == 0) { - ++cbulk->rle; - duplicate = true; - } else - WT_ERR(__wt_bulk_insert_var(session, cbulk)); + if (F_ISSET(cursor, WT_CURSTD_APPEND)) + recno = cbulk->recno + 1; + else { + WT_CURSOR_CHECKKEY(cursor); + if ((recno = cursor->recno) <= cbulk->recno) + WT_ERR(__bulk_col_keycmp_err(cbulk)); } + WT_CURSOR_CHECKVALUE(cursor); + + if (!cbulk->first_insert) { + /* + * If not the first insert and the key space is sequential, + * compare the current value against the last value; if the + * same, just increment the RLE count. + */ + if (recno == cbulk->recno + 1 && + cbulk->last.size == cursor->value.size && + memcmp(cbulk->last.data, + cursor->value.data, cursor->value.size) == 0) { + ++cbulk->rle; + ++cbulk->recno; + goto duplicate; + } + + /* Insert the previous key/value pair. */ + WT_ERR(__wt_bulk_insert_var(session, cbulk, false)); + } else + cbulk->first_insert = false; /* - * Save a copy of the value for the next comparison and reset the RLE - * counter. + * Insert any skipped records as deleted records, update the current + * record count and RLE counter. */ - if (!duplicate) { - WT_ERR(__wt_buf_set(session, - &cbulk->last, cursor->value.data, cursor->value.size)); - cbulk->rle = 1; + if (recno != cbulk->recno + 1) { + cbulk->rle = (recno - cbulk->recno) - 1; + WT_ERR(__wt_bulk_insert_var(session, cbulk, true)); } + cbulk->rle = 1; + cbulk->recno = recno; - WT_STAT_FAST_DATA_INCR(session, cursor_insert_bulk); + /* Save a copy of the value for the next comparison. */ + ret = __wt_buf_set(session, + &cbulk->last, cursor->value.data, cursor->value.size); +duplicate: err: API_END_RET(session, ret); } /* * __bulk_row_keycmp_err -- - * Error routine when keys inserted out-of-order. + * Error routine when row-store keys inserted out-of-order. */ static int __bulk_row_keycmp_err(WT_CURSOR_BULK *cbulk) @@ -154,6 +242,7 @@ __curbulk_insert_row(WT_CURSOR *cursor) * until the bulk cursor is closed. */ CURSOR_API_CALL(cursor, session, insert, btree); + WT_STAT_FAST_DATA_INCR(session, cursor_insert_bulk); WT_CURSOR_CHECKKEY(cursor); WT_CURSOR_CHECKVALUE(cursor); @@ -161,28 +250,20 @@ __curbulk_insert_row(WT_CURSOR *cursor) /* * If this isn't the first key inserted, compare it against the last key * to ensure the application doesn't accidentally corrupt the table. - * - * Instead of a "first time" variable, I'm using the RLE count, because - * it is only zero before the first row is inserted. */ - if (cbulk->rle != 0) { + if (!cbulk->first_insert) { WT_ERR(__wt_compare(session, btree->collator, &cursor->key, &cbulk->last, &cmp)); if (cmp <= 0) WT_ERR(__bulk_row_keycmp_err(cbulk)); - } + } else + cbulk->first_insert = false; - /* - * Save a copy of the key for the next comparison and set the RLE - * counter. - */ + /* Save a copy of the key for the next comparison. */ WT_ERR(__wt_buf_set(session, &cbulk->last, cursor->key.data, cursor->key.size)); - cbulk->rle = 1; - - WT_ERR(__wt_bulk_insert_row(session, cbulk)); - WT_STAT_FAST_DATA_INCR(session, cursor_insert_bulk); + ret = __wt_bulk_insert_row(session, cbulk); err: API_END_RET(session, ret); } @@ -208,13 +289,12 @@ __curbulk_insert_row_skip_check(WT_CURSOR *cursor) * until the bulk cursor is closed. */ CURSOR_API_CALL(cursor, session, insert, btree); + WT_STAT_FAST_DATA_INCR(session, cursor_insert_bulk); - WT_CURSOR_NEEDKEY(cursor); - WT_CURSOR_NEEDVALUE(cursor); - - WT_ERR(__wt_bulk_insert_row(session, cbulk)); + WT_CURSOR_CHECKKEY(cursor); + WT_CURSOR_CHECKVALUE(cursor); - WT_STAT_FAST_DATA_INCR(session, cursor_insert_bulk); + ret = __wt_bulk_insert_row(session, cbulk); err: API_END_RET(session, ret); } @@ -237,18 +317,25 @@ __wt_curbulk_init(WT_SESSION_IMPL *session, __wt_cursor_set_notsup(c); switch (cbt->btree->type) { case BTREE_COL_FIX: - c->insert = __curbulk_insert_fix; + c->insert = bitmap ? + __curbulk_insert_fix_bitmap : __curbulk_insert_fix; break; case BTREE_COL_VAR: c->insert = __curbulk_insert_var; break; case BTREE_ROW: + /* + * Row-store order comparisons are expensive, so we optionally + * skip them when we know the input is correct. + */ c->insert = skip_sort_check ? __curbulk_insert_row_skip_check : __curbulk_insert_row; break; WT_ILLEGAL_VALUE(session); } + cbulk->first_insert = true; + cbulk->recno = 0; cbulk->bitmap = bitmap; if (bitmap) F_SET(c, WT_CURSTD_RAW); diff --git a/src/third_party/wiredtiger/src/cursor/cur_json.c b/src/third_party/wiredtiger/src/cursor/cur_json.c index 8f858a5012f..3270be07de4 100644 --- a/src/third_party/wiredtiger/src/cursor/cur_json.c +++ b/src/third_party/wiredtiger/src/cursor/cur_json.c @@ -313,7 +313,6 @@ size_t __wt_json_unpack_char(char ch, u_char *buf, size_t bufsz, bool force_unicode) { char abbrev; - u_char h; if (!force_unicode) { if (isprint(ch) && ch != '\\' && ch != '"') { @@ -354,16 +353,8 @@ __wt_json_unpack_char(char ch, u_char *buf, size_t bufsz, bool force_unicode) *buf++ = 'u'; *buf++ = '0'; *buf++ = '0'; - h = (((u_char)ch) >> 4) & 0xF; - if (h >= 10) - *buf++ = 'A' + (h - 10); - else - *buf++ = '0' + h; - h = ((u_char)ch) & 0xF; - if (h >= 10) - *buf++ = 'A' + (h - 10); - else - *buf++ = '0' + h; + *buf++ = __wt_hex[(ch & 0xf0) >> 4]; + *buf++ = __wt_hex[ch & 0x0f]; } return (6); } diff --git a/src/third_party/wiredtiger/src/cursor/cur_stat.c b/src/third_party/wiredtiger/src/cursor/cur_stat.c index e1d5b8eb91a..652dec364fb 100644 --- a/src/third_party/wiredtiger/src/cursor/cur_stat.c +++ b/src/third_party/wiredtiger/src/cursor/cur_stat.c @@ -384,6 +384,7 @@ __curstat_file_init(WT_SESSION_IMPL *session, { WT_DATA_HANDLE *dhandle; WT_DECL_RET; + wt_off_t size; const char *filename; /* @@ -395,8 +396,8 @@ __curstat_file_init(WT_SESSION_IMPL *session, if (!WT_PREFIX_SKIP(filename, "file:")) return (EINVAL); __wt_stat_dsrc_init_single(&cst->u.dsrc_stats); - WT_RET(__wt_block_manager_size( - session, filename, &cst->u.dsrc_stats)); + WT_RET(__wt_block_manager_named_size(session, filename, &size)); + cst->u.dsrc_stats.block_size = size; __wt_curstat_dsrc_final(cst); return (0); } @@ -662,7 +663,7 @@ __wt_curstat_open(WT_SESSION_IMPL *session, /* * We return the statistics field's offset as the key, and a string - * description, a string value, and a uint64_t value as the value + * description, a string value, and a uint64_t value as the value * columns. */ cursor->key_format = "i"; diff --git a/src/third_party/wiredtiger/src/cursor/cur_table.c b/src/third_party/wiredtiger/src/cursor/cur_table.c index dca72a16ee5..e746ccd5871 100644 --- a/src/third_party/wiredtiger/src/cursor/cur_table.c +++ b/src/third_party/wiredtiger/src/cursor/cur_table.c @@ -968,8 +968,11 @@ __wt_curtable_open(WT_SESSION_IMPL *session, WT_ERR(__wt_strdup(session, tmp->data, &ctable->cfg[1])); if (0) { -err: WT_TRET(__curtable_close(cursor)); - *cursorp = NULL; +err: if (*cursorp != NULL) { + WT_TRET(__wt_cursor_close(*cursorp)); + *cursorp = NULL; + } + WT_TRET(__curtable_close(cursor)); } __wt_scr_free(session, &tmp); diff --git a/src/third_party/wiredtiger/src/evict/evict_lru.c b/src/third_party/wiredtiger/src/evict/evict_lru.c index ac481581c23..0e2b33c35ec 100644 --- a/src/third_party/wiredtiger/src/evict/evict_lru.c +++ b/src/third_party/wiredtiger/src/evict/evict_lru.c @@ -727,6 +727,10 @@ __evict_request_walk_clear(WT_SESSION_IMPL *session) F_CLR(session, WT_SESSION_CLEAR_EVICT_WALK); + /* An error is unexpected - flag the failure. */ + if (ret != 0) + __wt_err(session, ret, "Failed to clear eviction walk point"); + return (ret); } @@ -760,20 +764,18 @@ __wt_evict_file_exclusive_on(WT_SESSION_IMPL *session, bool *evict_resetp) { WT_BTREE *btree; WT_CACHE *cache; + WT_DECL_RET; WT_EVICT_ENTRY *evict; u_int i, elem; + *evict_resetp = false; + btree = S2BT(session); cache = S2C(session)->cache; - /* - * If the file isn't evictable, there's no work to do. - */ - if (F_ISSET(btree, WT_BTREE_NO_EVICTION)) { - *evict_resetp = false; + /* If the file wasn't evictable, there's no work to do. */ + if (F_ISSET(btree, WT_BTREE_NO_EVICTION)) return (0); - } - *evict_resetp = true; /* * Hold the walk lock to set the "no eviction" flag: no new pages from @@ -784,7 +786,7 @@ __wt_evict_file_exclusive_on(WT_SESSION_IMPL *session, bool *evict_resetp) __wt_spin_unlock(session, &cache->evict_walk_lock); /* Clear any existing LRU eviction walk for the file. */ - WT_RET(__evict_request_walk_clear(session)); + WT_ERR(__evict_request_walk_clear(session)); /* Hold the evict lock to remove any queued pages from this file. */ __wt_spin_lock(session, &cache->evict_lock); @@ -806,7 +808,11 @@ __wt_evict_file_exclusive_on(WT_SESSION_IMPL *session, bool *evict_resetp) while (btree->evict_busy > 0) __wt_yield(); + *evict_resetp = true; return (0); + +err: F_CLR(btree, WT_BTREE_NO_EVICTION); + return (ret); } /* diff --git a/src/third_party/wiredtiger/src/include/block.h b/src/third_party/wiredtiger/src/include/block.h index 4bff6c82783..804eec24874 100644 --- a/src/third_party/wiredtiger/src/include/block.h +++ b/src/third_party/wiredtiger/src/include/block.h @@ -173,6 +173,7 @@ struct __wt_bm { int (*compact_skip)(WT_BM *, WT_SESSION_IMPL *, bool *); int (*compact_start)(WT_BM *, WT_SESSION_IMPL *); int (*free)(WT_BM *, WT_SESSION_IMPL *, const uint8_t *, size_t); + bool (*is_mapped)(WT_BM *, WT_SESSION_IMPL *); int (*preload)(WT_BM *, WT_SESSION_IMPL *, const uint8_t *, size_t); int (*read) (WT_BM *, WT_SESSION_IMPL *, WT_ITEM *, const uint8_t *, size_t); @@ -182,6 +183,7 @@ struct __wt_bm { int (*salvage_start)(WT_BM *, WT_SESSION_IMPL *); int (*salvage_valid) (WT_BM *, WT_SESSION_IMPL *, uint8_t *, size_t, bool); + int (*size)(WT_BM *, WT_SESSION_IMPL *, wt_off_t *); int (*stat)(WT_BM *, WT_SESSION_IMPL *, WT_DSRC_STATS *stats); int (*sync)(WT_BM *, WT_SESSION_IMPL *, bool); int (*verify_addr)(WT_BM *, WT_SESSION_IMPL *, const uint8_t *, size_t); @@ -244,7 +246,10 @@ struct __wt_block { bool ckpt_inprogress;/* Live checkpoint in progress */ /* Compaction support */ - int compact_pct_tenths; /* Percent to compact */ + int compact_pct_tenths; /* Percent to compact */ + uint64_t compact_pages_reviewed;/* Pages reviewed */ + uint64_t compact_pages_skipped; /* Pages skipped */ + uint64_t compact_pages_written; /* Pages rewritten */ /* Salvage support */ wt_off_t slvg_off; /* Salvage file offset */ diff --git a/src/third_party/wiredtiger/src/include/btmem.h b/src/third_party/wiredtiger/src/include/btmem.h index 6ee74c61a38..12a736c56a2 100644 --- a/src/third_party/wiredtiger/src/include/btmem.h +++ b/src/third_party/wiredtiger/src/include/btmem.h @@ -478,7 +478,7 @@ struct __wt_page { #define pg_row_ins u.row.ins #undef pg_row_upd #define pg_row_upd u.row.upd -#define pg_row_entries u.row.entries +#undef pg_row_entries #define pg_row_entries u.row.entries /* Fixed-length column-store leaf page. */ @@ -1049,7 +1049,7 @@ struct __wt_insert_head { uint64_t __prev_split_gen = (session)->split_gen; \ if (__prev_split_gen == 0) \ do { \ - WT_PUBLISH((session)->split_gen, \ + WT_PUBLISH((session)->split_gen, \ S2C(session)->split_gen); \ } while ((session)->split_gen != S2C(session)->split_gen) diff --git a/src/third_party/wiredtiger/src/include/column.i b/src/third_party/wiredtiger/src/include/column.i index fc1f372b2a9..9388e07d0d8 100644 --- a/src/third_party/wiredtiger/src/include/column.i +++ b/src/third_party/wiredtiger/src/include/column.i @@ -176,6 +176,16 @@ __col_insert_search(WT_INSERT_HEAD *inshead, continue; } + /* + * When no exact match is found, the search returns the smallest + * key larger than the searched-for key, or the largest key + * smaller than the searched-for key, if there is no larger key. + * Our callers depend on that: specifically, the fixed-length + * column store cursor code interprets returning a key smaller + * than the searched-for key to mean the searched-for key is + * larger than any key on the page. Don't change that behavior, + * things will break. + */ ins_recno = WT_INSERT_RECNO(ret_ins); cmp = (recno == ins_recno) ? 0 : (recno < ins_recno) ? -1 : 1; @@ -282,7 +292,17 @@ __col_var_search(WT_PAGE *page, uint64_t recno, uint64_t *start_recnop) start_recno = repeat->recno + repeat->rle; } - if (recno >= start_recno + (page->pg_var_entries - start_indx)) + /* + * !!! + * The test could be written more simply as: + * + * (recno >= start_recno + (page->pg_var_entries - start_indx)) + * + * It's split into two parts because the simpler test will overflow if + * searching for large record numbers. + */ + if (recno >= start_recno && + recno - start_recno >= page->pg_var_entries - start_indx) return (NULL); return (page->pg_var_d + start_indx + (uint32_t)(recno - start_recno)); diff --git a/src/third_party/wiredtiger/src/include/connection.h b/src/third_party/wiredtiger/src/include/connection.h index 2367f5a0035..1c1cb9b8987 100644 --- a/src/third_party/wiredtiger/src/include/connection.h +++ b/src/third_party/wiredtiger/src/include/connection.h @@ -415,6 +415,7 @@ struct __wt_connection_impl { uint32_t direct_io; uint32_t write_through; /* FILE_FLAG_WRITE_THROUGH type flags */ bool mmap; /* mmap configuration */ + int page_size; /* OS page size for mmap alignment */ uint32_t verbose; uint32_t flags; diff --git a/src/third_party/wiredtiger/src/include/cursor.h b/src/third_party/wiredtiger/src/include/cursor.h index 275e2f2db46..4f232ce4fd0 100644 --- a/src/third_party/wiredtiger/src/include/cursor.h +++ b/src/third_party/wiredtiger/src/include/cursor.h @@ -200,18 +200,23 @@ struct __wt_cursor_btree { uint8_t append_tree; /* Cursor appended to the tree */ +#ifdef HAVE_DIAGNOSTIC + /* Check that cursor next/prev never returns keys out-of-order. */ + WT_ITEM *lastkey, _lastkey; + uint64_t lastrecno; +#endif + #define WT_CBT_ACTIVE 0x01 /* Active in the tree */ #define WT_CBT_ITERATE_APPEND 0x02 /* Col-store: iterating append list */ #define WT_CBT_ITERATE_NEXT 0x04 /* Next iteration configuration */ #define WT_CBT_ITERATE_PREV 0x08 /* Prev iteration configuration */ -#define WT_CBT_MAX_RECORD 0x10 /* Col-store: past end-of-table */ -#define WT_CBT_NO_TXN 0x20 /* Non-transactional cursor +#define WT_CBT_NO_TXN 0x10 /* Non-transactional cursor (e.g. on a checkpoint) */ -#define WT_CBT_SEARCH_SMALLEST 0x40 /* Row-store: small-key insert list */ +#define WT_CBT_SEARCH_SMALLEST 0x20 /* Row-store: small-key insert list */ #define WT_CBT_POSITION_MASK /* Flags associated with position */ \ (WT_CBT_ITERATE_APPEND | WT_CBT_ITERATE_NEXT | WT_CBT_ITERATE_PREV | \ - WT_CBT_MAX_RECORD | WT_CBT_SEARCH_SMALLEST) + WT_CBT_SEARCH_SMALLEST) uint8_t flags; }; @@ -219,33 +224,32 @@ struct __wt_cursor_btree { struct __wt_cursor_bulk { WT_CURSOR_BTREE cbt; - WT_REF *ref; /* The leaf page */ - WT_PAGE *leaf; - /* * Variable-length column store compares values during bulk load as * part of RLE compression, row-store compares keys during bulk load * to avoid corruption. */ - WT_ITEM last; /* Last key/value seen */ + bool first_insert; /* First insert */ + WT_ITEM last; /* Last key/value inserted */ /* - * Variable-length column-store RLE counter (also overloaded to mean - * the first time through the bulk-load insert routine, when set to 0). + * Additional column-store bulk load support. */ - uint64_t rle; + uint64_t recno; /* Record number */ + uint64_t rle; /* Variable-length RLE counter */ /* - * Fixed-length column-store current entry in memory chunk count, and - * the maximum number of records per chunk. + * Additional fixed-length column store bitmap bulk load support: + * current entry in memory chunk count, and the maximum number of + * records per chunk. */ + bool bitmap; /* Bitmap bulk load */ uint32_t entry; /* Entry count */ uint32_t nrecs; /* Max records per chunk */ - /* Special bitmap bulk load for fixed-length column stores. */ - bool bitmap; - - void *reconcile; /* Reconciliation information */ + void *reconcile; /* Reconciliation support */ + WT_REF *ref; /* The leaf page */ + WT_PAGE *leaf; }; struct __wt_cursor_config { diff --git a/src/third_party/wiredtiger/src/include/extern.h b/src/third_party/wiredtiger/src/include/extern.h index d84403cc16d..7338f8dae3b 100644 --- a/src/third_party/wiredtiger/src/include/extern.h +++ b/src/third_party/wiredtiger/src/include/extern.h @@ -14,6 +14,7 @@ extern int __wt_block_buffer_to_addr(WT_BLOCK *block, const uint8_t *p, wt_off_t extern int __wt_block_addr_invalid(WT_SESSION_IMPL *session, WT_BLOCK *block, const uint8_t *addr, size_t addr_size, bool live); extern int __wt_block_addr_string(WT_SESSION_IMPL *session, WT_BLOCK *block, WT_ITEM *buf, const uint8_t *addr, size_t addr_size); extern int __wt_block_buffer_to_ckpt(WT_SESSION_IMPL *session, WT_BLOCK *block, const uint8_t *p, WT_BLOCK_CKPT *ci); +extern int __wt_block_ckpt_decode(WT_SESSION *wt_session, size_t allocsize, const uint8_t *p, WT_BLOCK_CKPT *ci); extern int __wt_block_ckpt_to_buffer(WT_SESSION_IMPL *session, WT_BLOCK *block, uint8_t **pp, WT_BLOCK_CKPT *ci); extern int __wt_block_ckpt_init( WT_SESSION_IMPL *session, WT_BLOCK_CKPT *ci, const char *name); extern int __wt_block_checkpoint_load(WT_SESSION_IMPL *session, WT_BLOCK *block, const uint8_t *addr, size_t addr_size, uint8_t *root_addr, size_t *root_addr_sizep, bool checkpoint); @@ -50,7 +51,8 @@ extern int __wt_block_open(WT_SESSION_IMPL *session, const char *filename, const extern int __wt_block_close(WT_SESSION_IMPL *session, WT_BLOCK *block); extern int __wt_desc_init(WT_SESSION_IMPL *session, WT_FH *fh, uint32_t allocsize); extern void __wt_block_stat(WT_SESSION_IMPL *session, WT_BLOCK *block, WT_DSRC_STATS *stats); -extern int __wt_block_manager_size( WT_SESSION_IMPL *session, const char *filename, WT_DSRC_STATS *stats); +extern int __wt_block_manager_size(WT_BM *bm, WT_SESSION_IMPL *session, wt_off_t *sizep); +extern int __wt_block_manager_named_size( WT_SESSION_IMPL *session, const char *name, wt_off_t *sizep); extern int __wt_bm_preload( WT_BM *bm, WT_SESSION_IMPL *session, const uint8_t *addr, size_t addr_size); extern int __wt_bm_read(WT_BM *bm, WT_SESSION_IMPL *session, WT_ITEM *buf, const uint8_t *addr, size_t addr_size); extern int __wt_block_read_off_blind( WT_SESSION_IMPL *session, WT_BLOCK *block, WT_ITEM *buf, wt_off_t offset); @@ -90,6 +92,7 @@ extern int __wt_bloom_drop(WT_BLOOM *bloom, const char *config); extern int __wt_compact(WT_SESSION_IMPL *session, const char *cfg[]); extern int __wt_compact_page_skip(WT_SESSION_IMPL *session, WT_REF *ref, bool *skipp); extern void __wt_btcur_iterate_setup(WT_CURSOR_BTREE *cbt); +extern int __wt_cursor_key_order_check( WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt, bool next); extern int __wt_btcur_next(WT_CURSOR_BTREE *cbt, bool truncating); extern int __wt_btcur_prev(WT_CURSOR_BTREE *cbt, bool truncating); extern int __wt_btcur_reset(WT_CURSOR_BTREE *cbt); @@ -170,7 +173,7 @@ extern int __wt_tree_walk(WT_SESSION_IMPL *session, WT_REF **refp, uint32_t flag extern int __wt_tree_walk_count(WT_SESSION_IMPL *session, WT_REF **refp, uint64_t *walkcntp, uint32_t flags); extern int __wt_tree_walk_skip(WT_SESSION_IMPL *session, WT_REF **refp, uint64_t *skipleafcntp, uint32_t flags); extern int __wt_col_modify(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt, uint64_t recno, WT_ITEM *value, WT_UPDATE *upd_arg, bool is_remove); -extern int __wt_col_search(WT_SESSION_IMPL *session, uint64_t recno, WT_REF *leaf, WT_CURSOR_BTREE *cbt); +extern int __wt_col_search(WT_SESSION_IMPL *session, uint64_t search_recno, WT_REF *leaf, WT_CURSOR_BTREE *cbt); extern int __wt_row_leaf_keys(WT_SESSION_IMPL *session, WT_PAGE *page); extern int __wt_row_leaf_key_copy( WT_SESSION_IMPL *session, WT_PAGE *page, WT_ROW *rip, WT_ITEM *key); extern int __wt_row_leaf_key_work(WT_SESSION_IMPL *session, WT_PAGE *page, WT_ROW *rip_arg, WT_ITEM *keyb, bool instantiate); @@ -362,23 +365,23 @@ extern int __wt_logrec_read(WT_SESSION_IMPL *session, const uint8_t **pp, const extern int __wt_logop_read(WT_SESSION_IMPL *session, const uint8_t **pp, const uint8_t *end, uint32_t *optypep, uint32_t *opsizep); extern int __wt_logop_col_put_pack( WT_SESSION_IMPL *session, WT_ITEM *logrec, uint32_t fileid, uint64_t recno, WT_ITEM *value); extern int __wt_logop_col_put_unpack( WT_SESSION_IMPL *session, const uint8_t **pp, const uint8_t *end, uint32_t *fileidp, uint64_t *recnop, WT_ITEM *valuep); -extern int __wt_logop_col_put_print( WT_SESSION_IMPL *session, const uint8_t **pp, const uint8_t *end, FILE *out); +extern int __wt_logop_col_put_print( WT_SESSION_IMPL *session, const uint8_t **pp, const uint8_t *end, FILE *out, uint32_t flags); extern int __wt_logop_col_remove_pack( WT_SESSION_IMPL *session, WT_ITEM *logrec, uint32_t fileid, uint64_t recno); extern int __wt_logop_col_remove_unpack( WT_SESSION_IMPL *session, const uint8_t **pp, const uint8_t *end, uint32_t *fileidp, uint64_t *recnop); -extern int __wt_logop_col_remove_print( WT_SESSION_IMPL *session, const uint8_t **pp, const uint8_t *end, FILE *out); +extern int __wt_logop_col_remove_print( WT_SESSION_IMPL *session, const uint8_t **pp, const uint8_t *end, FILE *out, uint32_t flags); extern int __wt_logop_col_truncate_pack( WT_SESSION_IMPL *session, WT_ITEM *logrec, uint32_t fileid, uint64_t start, uint64_t stop); extern int __wt_logop_col_truncate_unpack( WT_SESSION_IMPL *session, const uint8_t **pp, const uint8_t *end, uint32_t *fileidp, uint64_t *startp, uint64_t *stopp); -extern int __wt_logop_col_truncate_print( WT_SESSION_IMPL *session, const uint8_t **pp, const uint8_t *end, FILE *out); +extern int __wt_logop_col_truncate_print( WT_SESSION_IMPL *session, const uint8_t **pp, const uint8_t *end, FILE *out, uint32_t flags); extern int __wt_logop_row_put_pack( WT_SESSION_IMPL *session, WT_ITEM *logrec, uint32_t fileid, WT_ITEM *key, WT_ITEM *value); extern int __wt_logop_row_put_unpack( WT_SESSION_IMPL *session, const uint8_t **pp, const uint8_t *end, uint32_t *fileidp, WT_ITEM *keyp, WT_ITEM *valuep); -extern int __wt_logop_row_put_print( WT_SESSION_IMPL *session, const uint8_t **pp, const uint8_t *end, FILE *out); +extern int __wt_logop_row_put_print( WT_SESSION_IMPL *session, const uint8_t **pp, const uint8_t *end, FILE *out, uint32_t flags); extern int __wt_logop_row_remove_pack( WT_SESSION_IMPL *session, WT_ITEM *logrec, uint32_t fileid, WT_ITEM *key); extern int __wt_logop_row_remove_unpack( WT_SESSION_IMPL *session, const uint8_t **pp, const uint8_t *end, uint32_t *fileidp, WT_ITEM *keyp); -extern int __wt_logop_row_remove_print( WT_SESSION_IMPL *session, const uint8_t **pp, const uint8_t *end, FILE *out); +extern int __wt_logop_row_remove_print( WT_SESSION_IMPL *session, const uint8_t **pp, const uint8_t *end, FILE *out, uint32_t flags); extern int __wt_logop_row_truncate_pack( WT_SESSION_IMPL *session, WT_ITEM *logrec, uint32_t fileid, WT_ITEM *start, WT_ITEM *stop, uint32_t mode); extern int __wt_logop_row_truncate_unpack( WT_SESSION_IMPL *session, const uint8_t **pp, const uint8_t *end, uint32_t *fileidp, WT_ITEM *startp, WT_ITEM *stopp, uint32_t *modep); -extern int __wt_logop_row_truncate_print( WT_SESSION_IMPL *session, const uint8_t **pp, const uint8_t *end, FILE *out); -extern int __wt_txn_op_printlog( WT_SESSION_IMPL *session, const uint8_t **pp, const uint8_t *end, FILE *out); +extern int __wt_logop_row_truncate_print( WT_SESSION_IMPL *session, const uint8_t **pp, const uint8_t *end, FILE *out, uint32_t flags); +extern int __wt_txn_op_printlog( WT_SESSION_IMPL *session, const uint8_t **pp, const uint8_t *end, FILE *out, uint32_t flags); extern void __wt_log_slot_activate(WT_SESSION_IMPL *session, WT_LOGSLOT *slot); extern int __wt_log_slot_switch( WT_SESSION_IMPL *session, WT_MYSLOT *myslot, bool retry, bool forced); extern int __wt_log_slot_new(WT_SESSION_IMPL *session); @@ -468,7 +471,7 @@ extern int __wt_meta_track_init(WT_SESSION_IMPL *session); extern int __wt_meta_track_destroy(WT_SESSION_IMPL *session); extern int __wt_turtle_init(WT_SESSION_IMPL *session); extern int __wt_turtle_read(WT_SESSION_IMPL *session, const char *key, char **valuep); -extern int __wt_turtle_update( WT_SESSION_IMPL *session, const char *key, const char *value); +extern int __wt_turtle_update(WT_SESSION_IMPL *session, const char *key, const char *value); extern void __wt_abort(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((noreturn)); extern int __wt_calloc(WT_SESSION_IMPL *session, size_t number, size_t size, void *retp); extern int __wt_realloc(WT_SESSION_IMPL *session, size_t *bytes_allocated_ret, size_t bytes_to_allocate, void *retp); @@ -514,6 +517,7 @@ extern int __wt_rwlock_destroy(WT_SESSION_IMPL *session, WT_RWLOCK **rwlockp); extern int __wt_once(void (*init_routine)(void)); extern int __wt_open(WT_SESSION_IMPL *session, const char *name, bool ok_create, bool exclusive, int dio_type, WT_FH **fhp); extern int __wt_close(WT_SESSION_IMPL *session, WT_FH **fhp); +extern int __wt_get_vm_pagesize(void); extern bool __wt_absolute_path(const char *path); extern const char *__wt_path_separator(void); extern bool __wt_has_priv(void); @@ -558,8 +562,9 @@ extern uint32_t __wt_split_page_size(WT_BTREE *btree, uint32_t maxpagesize); extern int __wt_bulk_init(WT_SESSION_IMPL *session, WT_CURSOR_BULK *cbulk); extern int __wt_bulk_wrapup(WT_SESSION_IMPL *session, WT_CURSOR_BULK *cbulk); extern int __wt_bulk_insert_row(WT_SESSION_IMPL *session, WT_CURSOR_BULK *cbulk); -extern int __wt_bulk_insert_fix(WT_SESSION_IMPL *session, WT_CURSOR_BULK *cbulk); -extern int __wt_bulk_insert_var(WT_SESSION_IMPL *session, WT_CURSOR_BULK *cbulk); +extern int __wt_bulk_insert_fix( WT_SESSION_IMPL *session, WT_CURSOR_BULK *cbulk, bool deleted); +extern int __wt_bulk_insert_fix_bitmap(WT_SESSION_IMPL *session, WT_CURSOR_BULK *cbulk); +extern int __wt_bulk_insert_var( WT_SESSION_IMPL *session, WT_CURSOR_BULK *cbulk, bool deleted); extern int __wt_schema_create_strip(WT_SESSION_IMPL *session, const char *v1, const char *v2, char **value_ret); extern int __wt_direct_io_size_check(WT_SESSION_IMPL *session, const char **cfg, const char *config_name, uint32_t *allocsizep); extern int __wt_schema_colgroup_source(WT_SESSION_IMPL *session, WT_TABLE *table, const char *cgname, const char *config, WT_ITEM *buf); @@ -654,6 +659,7 @@ __wt_hazard_set(WT_SESSION_IMPL *session, WT_REF *ref, bool *busyp ); extern int __wt_hazard_clear(WT_SESSION_IMPL *session, WT_PAGE *page); extern void __wt_hazard_close(WT_SESSION_IMPL *session); +extern void __wt_fill_hex(const uint8_t *src, size_t src_max, uint8_t *dest, size_t dest_max, size_t *lenp); extern int __wt_raw_to_hex( WT_SESSION_IMPL *session, const uint8_t *from, size_t size, WT_ITEM *to); extern int __wt_raw_to_esc_hex( WT_SESSION_IMPL *session, const uint8_t *from, size_t size, WT_ITEM *to); extern int __wt_hex2byte(const u_char *from, u_char *to); @@ -671,6 +677,7 @@ extern uint32_t __wt_log2_int(uint32_t n); extern bool __wt_ispo2(uint32_t v); extern uint32_t __wt_rduppo2(uint32_t n, uint32_t po2); extern void __wt_random_init(WT_RAND_STATE volatile *rnd_state); +extern int __wt_random_init_seed( WT_SESSION_IMPL *session, WT_RAND_STATE volatile *rnd_state); extern uint32_t __wt_random(WT_RAND_STATE volatile *rnd_state); extern int __wt_buf_grow_worker(WT_SESSION_IMPL *session, WT_ITEM *buf, size_t size); extern int __wt_buf_fmt(WT_SESSION_IMPL *session, WT_ITEM *buf, const char *fmt, ...) WT_GCC_FUNC_DECL_ATTRIBUTE((format (printf, 3, 4))); @@ -732,7 +739,7 @@ extern int __wt_txn_checkpoint_logread( WT_SESSION_IMPL *session, const uint8_t extern int __wt_txn_checkpoint_log( WT_SESSION_IMPL *session, bool full, uint32_t flags, WT_LSN *lsnp); extern int __wt_txn_truncate_log( WT_SESSION_IMPL *session, WT_CURSOR_BTREE *start, WT_CURSOR_BTREE *stop); extern int __wt_txn_truncate_end(WT_SESSION_IMPL *session); -extern int __wt_txn_printlog(WT_SESSION *wt_session, FILE *out); +extern int __wt_txn_printlog(WT_SESSION *wt_session, FILE *out, uint32_t flags); extern int __wt_txn_named_snapshot_begin(WT_SESSION_IMPL *session, const char *cfg[]); extern int __wt_txn_named_snapshot_drop(WT_SESSION_IMPL *session, const char *cfg[]); extern int __wt_txn_named_snapshot_get(WT_SESSION_IMPL *session, WT_CONFIG_ITEM *nameval); diff --git a/src/third_party/wiredtiger/src/include/gcc.h b/src/third_party/wiredtiger/src/include/gcc.h index 01e33792d73..bb80f8b738b 100644 --- a/src/third_party/wiredtiger/src/include/gcc.h +++ b/src/third_party/wiredtiger/src/include/gcc.h @@ -156,8 +156,7 @@ __wt_atomic_cas_ptr(void *vp, void *old, void *new) #if defined(x86_64) || defined(__x86_64__) /* Pause instruction to prevent excess processor bus usage */ -#define WT_PAUSE() __asm__ volatile("pause\n" ::: "memory") - +#define WT_PAUSE() __asm__ volatile("pause\n" ::: "memory") #define WT_FULL_BARRIER() do { \ __asm__ volatile ("mfence" ::: "memory"); \ } while (0) @@ -169,7 +168,7 @@ __wt_atomic_cas_ptr(void *vp, void *old, void *new) } while (0) #elif defined(i386) || defined(__i386__) -#define WT_PAUSE() __asm__ volatile("pause\n" ::: "memory") +#define WT_PAUSE() __asm__ volatile("pause\n" ::: "memory") #define WT_FULL_BARRIER() do { \ __asm__ volatile ("lock; addl $0, 0(%%esp)" ::: "memory"); \ } while (0) @@ -177,23 +176,58 @@ __wt_atomic_cas_ptr(void *vp, void *old, void *new) #define WT_WRITE_BARRIER() WT_FULL_BARRIER() #elif defined(__PPC64__) || defined(PPC64) +/* ori 0,0,0 is the PPC64 noop instruction */ #define WT_PAUSE() __asm__ volatile("ori 0,0,0" ::: "memory") -#define WT_FULL_BARRIER() do { +#define WT_FULL_BARRIER() do { \ __asm__ volatile ("sync" ::: "memory"); \ } while (0) -#define WT_READ_BARRIER() WT_FULL_BARRIER() -#define WT_WRITE_BARRIER() WT_FULL_BARRIER() + +/* TODO: ISA 2.07 Elemental Memory Barriers would be better, + specifically mbll, and mbss, but they are not supported by POWER 8 */ +#define WT_READ_BARRIER() do { \ + __asm__ volatile ("lwsync" ::: "memory"); \ +} while (0) +#define WT_WRITE_BARRIER() do { \ + __asm__ volatile ("lwsync" ::: "memory"); \ +} while (0) #elif defined(__aarch64__) #define WT_PAUSE() __asm__ volatile("yield" ::: "memory") #define WT_FULL_BARRIER() do { \ - __asm__ volatile ("dsb sy" ::: "memory"); \ + __asm__ volatile ("dsb sy" ::: "memory"); \ +} while (0) +#define WT_READ_BARRIER() do { \ + __asm__ volatile ("dsb ld" ::: "memory"); \ +} while (0) +#define WT_WRITE_BARRIER() do { \ + __asm__ volatile ("dsb st" ::: "memory"); \ +} while (0) + +#elif defined(__s390x__) +#define WT_PAUSE() __asm__ volatile("lr 0,0" ::: "memory") +#define WT_FULL_BARRIER() do { \ + __asm__ volatile ("bcr 15,0\n" ::: "memory"); \ } while (0) +#define WT_READ_BARRIER() WT_FULL_BARRIER() +#define WT_WRITE_BARRIER() WT_FULL_BARRIER() + +#elif defined(__sparc__) +#define WT_PAUSE() __asm__ volatile("rd %%ccr, %%g0" ::: "memory") + +#define WT_FULL_BARRIER() do { \ + __asm__ volatile ("membar #StoreLoad" ::: "memory"); \ +} while (0) + +/* + * On UltraSparc machines, TSO is used, and so there is no need for membar. + * READ_BARRIER = #LoadLoad, and WRITE_BARRIER = #StoreStore are noop. + */ #define WT_READ_BARRIER() do { \ - __asm__ volatile ("dsb ld" ::: "memory"); \ + __asm__ volatile ("" ::: "memory"); \ } while (0) + #define WT_WRITE_BARRIER() do { \ - __asm__ volatile ("dsb st" ::: "memory"); \ + __asm__ volatile ("" ::: "memory"); \ } while (0) #else diff --git a/src/third_party/wiredtiger/src/include/log.h b/src/third_party/wiredtiger/src/include/log.h index 521de567fc0..e7737e12663 100644 --- a/src/third_party/wiredtiger/src/include/log.h +++ b/src/third_party/wiredtiger/src/include/log.h @@ -266,6 +266,11 @@ struct __wt_log_desc { uint64_t log_size; /* 08-15: Log file size */ }; +/* + * Flags for __wt_txn_op_printlog. + */ +#define WT_TXN_PRINTLOG_HEX 0x0001 /* Add hex output */ + /* * WT_LOG_REC_DESC -- * A descriptor for a log record type. diff --git a/src/third_party/wiredtiger/src/include/misc.h b/src/third_party/wiredtiger/src/include/misc.h index e542baec642..898e44eb8e0 100644 --- a/src/third_party/wiredtiger/src/include/misc.h +++ b/src/third_party/wiredtiger/src/include/misc.h @@ -268,3 +268,6 @@ union __wt_rand_state { uint32_t w, z; } x; }; + +/* Shared array for converting to hex */ +extern const u_char __wt_hex[]; diff --git a/src/third_party/wiredtiger/src/include/session.h b/src/third_party/wiredtiger/src/include/session.h index 5c3bcfb8ed0..1eca49f2c40 100644 --- a/src/third_party/wiredtiger/src/include/session.h +++ b/src/third_party/wiredtiger/src/include/session.h @@ -74,7 +74,10 @@ struct WT_COMPILER_TYPE_ALIGN(WT_CACHE_LINE_ALIGNMENT) __wt_session_impl { TAILQ_HEAD(__cursors, __wt_cursor) cursors; WT_CURSOR_BACKUP *bkp_cursor; /* Hot backup cursor */ - WT_COMPACT *compact; /* Compact state */ + + WT_COMPACT *compact; /* Compaction information */ + enum { WT_COMPACT_NONE=0, + WT_COMPACT_RUNNING, WT_COMPACT_SUCCESS } compact_state; /* * Lookaside table cursor, sweep and eviction worker threads only. @@ -134,8 +137,6 @@ struct WT_COMPILER_TYPE_ALIGN(WT_CACHE_LINE_ALIGNMENT) __wt_session_impl { void *reconcile; /* Reconciliation support */ int (*reconcile_cleanup)(WT_SESSION_IMPL *); - bool compaction; /* Compaction did some work */ - uint32_t flags; /* diff --git a/src/third_party/wiredtiger/src/include/stat.h b/src/third_party/wiredtiger/src/include/stat.h index dfe7ee5c6cd..a554607b7d5 100644 --- a/src/third_party/wiredtiger/src/include/stat.h +++ b/src/third_party/wiredtiger/src/include/stat.h @@ -139,8 +139,8 @@ __wt_stats_clear(void *stats_arg, int slot) */ #define WT_STAT_READ(stats, fld) \ __wt_stats_aggregate(stats, WT_STATS_FIELD_TO_SLOT(stats, fld)) -#define WT_STAT_WRITE(session, stats, fld) \ - ((stats)[WT_STATS_SLOT_ID(session)]->fld); +#define WT_STAT_WRITE(stats, fld, v) \ + (stats)->fld = (int64_t)(v) #define WT_STAT_DECRV(session, stats, fld, value) \ (stats)[WT_STATS_SLOT_ID(session)]->fld -= (int64_t)(value) diff --git a/src/third_party/wiredtiger/src/log/log_auto.c b/src/third_party/wiredtiger/src/log/log_auto.c index 5a1d03b1976..54df01d01ab 100644 --- a/src/third_party/wiredtiger/src/log/log_auto.c +++ b/src/third_party/wiredtiger/src/log/log_auto.c @@ -69,7 +69,7 @@ __logrec_json_unpack_str(char *dest, size_t destlen, const char *src, } static int -__logrec_jsonify_str(WT_SESSION_IMPL *session, char **destp, WT_ITEM *item) +__logrec_make_json_str(WT_SESSION_IMPL *session, char **destp, WT_ITEM *item) { size_t needed; @@ -79,6 +79,17 @@ __logrec_jsonify_str(WT_SESSION_IMPL *session, char **destp, WT_ITEM *item) return (0); } +static int +__logrec_make_hex_str(WT_SESSION_IMPL *session, char **destp, WT_ITEM *item) +{ + size_t needed; + + needed = item->size * 2 + 1; + WT_RET(__wt_realloc(session, NULL, needed, destp)); + __wt_fill_hex(item->data, item->size, (uint8_t *)*destp, needed, NULL); + return (0); +} + int __wt_logop_col_put_pack( WT_SESSION_IMPL *session, WT_ITEM *logrec, @@ -121,7 +132,8 @@ __wt_logop_col_put_unpack( int __wt_logop_col_put_print( - WT_SESSION_IMPL *session, const uint8_t **pp, const uint8_t *end, FILE *out) + WT_SESSION_IMPL *session, const uint8_t **pp, const uint8_t *end, + FILE *out, uint32_t flags) { WT_DECL_RET; uint32_t fileid; @@ -138,9 +150,14 @@ __wt_logop_col_put_print( " \"fileid\": \"%" PRIu32 "\",\n", fileid)); WT_ERR(__wt_fprintf(out, " \"recno\": \"%" PRIu64 "\",\n", recno)); - WT_ERR(__logrec_jsonify_str(session, &escaped, &value)); + WT_ERR(__logrec_make_json_str(session, &escaped, &value)); WT_ERR(__wt_fprintf(out, " \"value\": \"%s\"", escaped)); + if (LF_ISSET(WT_TXN_PRINTLOG_HEX)) { + WT_ERR(__logrec_make_hex_str(session, &escaped, &value)); + WT_ERR(__wt_fprintf(out, + ",\n \"value-hex\": \"%s\"", escaped)); + } err: __wt_free(session, escaped); return (ret); @@ -188,11 +205,13 @@ __wt_logop_col_remove_unpack( int __wt_logop_col_remove_print( - WT_SESSION_IMPL *session, const uint8_t **pp, const uint8_t *end, FILE *out) + WT_SESSION_IMPL *session, const uint8_t **pp, const uint8_t *end, + FILE *out, uint32_t flags) { uint32_t fileid; uint64_t recno; + WT_UNUSED(flags); WT_RET(__wt_logop_col_remove_unpack( session, pp, end, &fileid, &recno)); @@ -246,12 +265,14 @@ __wt_logop_col_truncate_unpack( int __wt_logop_col_truncate_print( - WT_SESSION_IMPL *session, const uint8_t **pp, const uint8_t *end, FILE *out) + WT_SESSION_IMPL *session, const uint8_t **pp, const uint8_t *end, + FILE *out, uint32_t flags) { uint32_t fileid; uint64_t start; uint64_t stop; + WT_UNUSED(flags); WT_RET(__wt_logop_col_truncate_unpack( session, pp, end, &fileid, &start, &stop)); @@ -307,7 +328,8 @@ __wt_logop_row_put_unpack( int __wt_logop_row_put_print( - WT_SESSION_IMPL *session, const uint8_t **pp, const uint8_t *end, FILE *out) + WT_SESSION_IMPL *session, const uint8_t **pp, const uint8_t *end, + FILE *out, uint32_t flags) { WT_DECL_RET; uint32_t fileid; @@ -322,12 +344,22 @@ __wt_logop_row_put_print( WT_RET(__wt_fprintf(out, " \"optype\": \"row_put\",\n")); WT_ERR(__wt_fprintf(out, " \"fileid\": \"%" PRIu32 "\",\n", fileid)); - WT_ERR(__logrec_jsonify_str(session, &escaped, &key)); + WT_ERR(__logrec_make_json_str(session, &escaped, &key)); WT_ERR(__wt_fprintf(out, " \"key\": \"%s\",\n", escaped)); - WT_ERR(__logrec_jsonify_str(session, &escaped, &value)); + if (LF_ISSET(WT_TXN_PRINTLOG_HEX)) { + WT_ERR(__logrec_make_hex_str(session, &escaped, &key)); + WT_ERR(__wt_fprintf(out, + " \"key-hex\": \"%s\",\n", escaped)); + } + WT_ERR(__logrec_make_json_str(session, &escaped, &value)); WT_ERR(__wt_fprintf(out, " \"value\": \"%s\"", escaped)); + if (LF_ISSET(WT_TXN_PRINTLOG_HEX)) { + WT_ERR(__logrec_make_hex_str(session, &escaped, &value)); + WT_ERR(__wt_fprintf(out, + ",\n \"value-hex\": \"%s\"", escaped)); + } err: __wt_free(session, escaped); return (ret); @@ -375,7 +407,8 @@ __wt_logop_row_remove_unpack( int __wt_logop_row_remove_print( - WT_SESSION_IMPL *session, const uint8_t **pp, const uint8_t *end, FILE *out) + WT_SESSION_IMPL *session, const uint8_t **pp, const uint8_t *end, + FILE *out, uint32_t flags) { WT_DECL_RET; uint32_t fileid; @@ -389,9 +422,14 @@ __wt_logop_row_remove_print( WT_RET(__wt_fprintf(out, " \"optype\": \"row_remove\",\n")); WT_ERR(__wt_fprintf(out, " \"fileid\": \"%" PRIu32 "\",\n", fileid)); - WT_ERR(__logrec_jsonify_str(session, &escaped, &key)); + WT_ERR(__logrec_make_json_str(session, &escaped, &key)); WT_ERR(__wt_fprintf(out, " \"key\": \"%s\"", escaped)); + if (LF_ISSET(WT_TXN_PRINTLOG_HEX)) { + WT_ERR(__logrec_make_hex_str(session, &escaped, &key)); + WT_ERR(__wt_fprintf(out, + ",\n \"key-hex\": \"%s\"", escaped)); + } err: __wt_free(session, escaped); return (ret); @@ -439,7 +477,8 @@ __wt_logop_row_truncate_unpack( int __wt_logop_row_truncate_print( - WT_SESSION_IMPL *session, const uint8_t **pp, const uint8_t *end, FILE *out) + WT_SESSION_IMPL *session, const uint8_t **pp, const uint8_t *end, + FILE *out, uint32_t flags) { WT_DECL_RET; uint32_t fileid; @@ -455,12 +494,22 @@ __wt_logop_row_truncate_print( WT_RET(__wt_fprintf(out, " \"optype\": \"row_truncate\",\n")); WT_ERR(__wt_fprintf(out, " \"fileid\": \"%" PRIu32 "\",\n", fileid)); - WT_ERR(__logrec_jsonify_str(session, &escaped, &start)); + WT_ERR(__logrec_make_json_str(session, &escaped, &start)); WT_ERR(__wt_fprintf(out, " \"start\": \"%s\",\n", escaped)); - WT_ERR(__logrec_jsonify_str(session, &escaped, &stop)); + if (LF_ISSET(WT_TXN_PRINTLOG_HEX)) { + WT_ERR(__logrec_make_hex_str(session, &escaped, &start)); + WT_ERR(__wt_fprintf(out, + " \"start-hex\": \"%s\",\n", escaped)); + } + WT_ERR(__logrec_make_json_str(session, &escaped, &stop)); WT_ERR(__wt_fprintf(out, " \"stop\": \"%s\",\n", escaped)); + if (LF_ISSET(WT_TXN_PRINTLOG_HEX)) { + WT_ERR(__logrec_make_hex_str(session, &escaped, &stop)); + WT_ERR(__wt_fprintf(out, + " \"stop-hex\": \"%s\",\n", escaped)); + } WT_ERR(__wt_fprintf(out, " \"mode\": \"%" PRIu32 "\"", mode)); @@ -470,7 +519,8 @@ err: __wt_free(session, escaped); int __wt_txn_op_printlog( - WT_SESSION_IMPL *session, const uint8_t **pp, const uint8_t *end, FILE *out) + WT_SESSION_IMPL *session, const uint8_t **pp, const uint8_t *end, + FILE *out, uint32_t flags) { uint32_t optype, opsize; @@ -480,27 +530,33 @@ __wt_txn_op_printlog( switch (optype) { case WT_LOGOP_COL_PUT: - WT_RET(__wt_logop_col_put_print(session, pp, end, out)); + WT_RET(__wt_logop_col_put_print(session, pp, end, out, + flags)); break; case WT_LOGOP_COL_REMOVE: - WT_RET(__wt_logop_col_remove_print(session, pp, end, out)); + WT_RET(__wt_logop_col_remove_print(session, pp, end, out, + flags)); break; case WT_LOGOP_COL_TRUNCATE: - WT_RET(__wt_logop_col_truncate_print(session, pp, end, out)); + WT_RET(__wt_logop_col_truncate_print(session, pp, end, out, + flags)); break; case WT_LOGOP_ROW_PUT: - WT_RET(__wt_logop_row_put_print(session, pp, end, out)); + WT_RET(__wt_logop_row_put_print(session, pp, end, out, + flags)); break; case WT_LOGOP_ROW_REMOVE: - WT_RET(__wt_logop_row_remove_print(session, pp, end, out)); + WT_RET(__wt_logop_row_remove_print(session, pp, end, out, + flags)); break; case WT_LOGOP_ROW_TRUNCATE: - WT_RET(__wt_logop_row_truncate_print(session, pp, end, out)); + WT_RET(__wt_logop_row_truncate_print(session, pp, end, out, + flags)); break; WT_ILLEGAL_VALUE(session); diff --git a/src/third_party/wiredtiger/src/lsm/lsm_stat.c b/src/third_party/wiredtiger/src/lsm/lsm_stat.c index c1eb7a2a389..7c53990a2a2 100644 --- a/src/third_party/wiredtiger/src/lsm/lsm_stat.c +++ b/src/third_party/wiredtiger/src/lsm/lsm_stat.c @@ -91,7 +91,7 @@ __curstat_lsm_init( * top-level. */ new = (WT_DSRC_STATS *)WT_CURSOR_STATS(stat_cursor); - new->lsm_generation_max = chunk->generation; + WT_STAT_WRITE(new, lsm_generation_max, chunk->generation); /* Aggregate statistics from each new chunk. */ __wt_stat_dsrc_aggregate_single(new, stats); @@ -115,37 +115,40 @@ __curstat_lsm_init( * into the top-level. */ new = (WT_DSRC_STATS *)WT_CURSOR_STATS(stat_cursor); - new->bloom_size = - (int64_t)((chunk->count * lsm_tree->bloom_bit_count) / 8); - new->bloom_page_evict = - new->cache_eviction_clean + new->cache_eviction_dirty; - new->bloom_page_read = new->cache_read; + WT_STAT_WRITE(new, bloom_size, + (int64_t)((chunk->count * lsm_tree->bloom_bit_count) / 8)); + WT_STAT_WRITE(new, bloom_page_evict, + new->cache_eviction_clean + new->cache_eviction_dirty); + WT_STAT_WRITE(new, bloom_page_read, new->cache_read); __wt_stat_dsrc_aggregate_single(new, stats); WT_ERR(stat_cursor->close(stat_cursor)); } /* Set statistics that aren't aggregated directly into the cursor */ - stats->bloom_count = bloom_count; - stats->lsm_chunk_count = lsm_tree->nchunks; + WT_STAT_WRITE(stats, bloom_count, bloom_count); + WT_STAT_WRITE(stats, lsm_chunk_count, lsm_tree->nchunks); /* Include, and optionally clear, LSM-level specific information. */ - stats->bloom_miss = lsm_tree->bloom_miss; + WT_STAT_WRITE(stats, bloom_miss, lsm_tree->bloom_miss); if (F_ISSET(cst, WT_CONN_STAT_CLEAR)) lsm_tree->bloom_miss = 0; - stats->bloom_hit = lsm_tree->bloom_hit; + WT_STAT_WRITE(stats, bloom_hit, lsm_tree->bloom_hit); if (F_ISSET(cst, WT_CONN_STAT_CLEAR)) lsm_tree->bloom_hit = 0; - stats->bloom_false_positive = lsm_tree->bloom_false_positive; + WT_STAT_WRITE( + stats, bloom_false_positive, lsm_tree->bloom_false_positive); if (F_ISSET(cst, WT_CONN_STAT_CLEAR)) lsm_tree->bloom_false_positive = 0; - stats->lsm_lookup_no_bloom = lsm_tree->lsm_lookup_no_bloom; + WT_STAT_WRITE( + stats, lsm_lookup_no_bloom, lsm_tree->lsm_lookup_no_bloom); if (F_ISSET(cst, WT_CONN_STAT_CLEAR)) lsm_tree->lsm_lookup_no_bloom = 0; - stats->lsm_checkpoint_throttle = lsm_tree->lsm_checkpoint_throttle; + WT_STAT_WRITE( + stats, lsm_checkpoint_throttle, lsm_tree->lsm_checkpoint_throttle); if (F_ISSET(cst, WT_CONN_STAT_CLEAR)) lsm_tree->lsm_checkpoint_throttle = 0; - stats->lsm_merge_throttle = lsm_tree->lsm_merge_throttle; + WT_STAT_WRITE(stats, lsm_merge_throttle, lsm_tree->lsm_merge_throttle); if (F_ISSET(cst, WT_CONN_STAT_CLEAR)) lsm_tree->lsm_merge_throttle = 0; diff --git a/src/third_party/wiredtiger/src/meta/meta_turtle.c b/src/third_party/wiredtiger/src/meta/meta_turtle.c index 13e8b31916f..3bd57846862 100644 --- a/src/third_party/wiredtiger/src/meta/meta_turtle.c +++ b/src/third_party/wiredtiger/src/meta/meta_turtle.c @@ -271,8 +271,7 @@ err: WT_TRET(__wt_fclose(&fp, WT_FHANDLE_READ)); * Update the turtle file. */ int -__wt_turtle_update( - WT_SESSION_IMPL *session, const char *key, const char *value) +__wt_turtle_update(WT_SESSION_IMPL *session, const char *key, const char *value) { WT_FH *fh; WT_DECL_ITEM(buf); diff --git a/src/third_party/wiredtiger/src/os_posix/os_map.c b/src/third_party/wiredtiger/src/os_posix/os_map.c index e95ccb0ade2..4276c89dbcf 100644 --- a/src/third_party/wiredtiger/src/os_posix/os_map.c +++ b/src/third_party/wiredtiger/src/os_posix/os_map.c @@ -48,8 +48,6 @@ __wt_mmap(WT_SESSION_IMPL *session, return (0); } -#define WT_VM_PAGESIZE 4096 - /* * __wt_mmap_preload -- * Cause a section of a memory map to be faulted in. @@ -59,9 +57,10 @@ __wt_mmap_preload(WT_SESSION_IMPL *session, const void *p, size_t size) { #ifdef HAVE_POSIX_MADVISE /* Linux requires the address be aligned to a 4KB boundary. */ + WT_CONNECTION_IMPL *conn = S2C(session); WT_BM *bm = S2BT(session)->bm; WT_DECL_RET; - void *blk = (void *)((uintptr_t)p & ~(uintptr_t)(WT_VM_PAGESIZE - 1)); + void *blk = (void *)((uintptr_t)p & ~(uintptr_t)(conn->page_size - 1)); size += WT_PTRDIFF(p, blk); /* XXX proxy for "am I doing a scan?" -- manual read-ahead */ @@ -78,9 +77,9 @@ __wt_mmap_preload(WT_SESSION_IMPL *session, const void *p, size_t size) * Manual pages aren't clear on whether alignment is required for the * size, so we will be conservative. */ - size &= ~(size_t)(WT_VM_PAGESIZE - 1); + size &= ~(size_t)(conn->page_size - 1); - if (size > WT_VM_PAGESIZE && + if (size > (size_t)conn->page_size && (ret = posix_madvise(blk, size, POSIX_MADV_WILLNEED)) != 0) WT_RET_MSG(session, ret, "posix_madvise will need"); #else @@ -101,8 +100,9 @@ __wt_mmap_discard(WT_SESSION_IMPL *session, void *p, size_t size) { #ifdef HAVE_POSIX_MADVISE /* Linux requires the address be aligned to a 4KB boundary. */ + WT_CONNECTION_IMPL *conn = S2C(session); WT_DECL_RET; - void *blk = (void *)((uintptr_t)p & ~(uintptr_t)(WT_VM_PAGESIZE - 1)); + void *blk = (void *)((uintptr_t)p & ~(uintptr_t)(conn->page_size - 1)); size += WT_PTRDIFF(p, blk); if ((ret = posix_madvise(blk, size, POSIX_MADV_DONTNEED)) != 0) diff --git a/src/third_party/wiredtiger/src/os_posix/os_pagesize.c b/src/third_party/wiredtiger/src/os_posix/os_pagesize.c new file mode 100644 index 00000000000..e7c7b4fdf15 --- /dev/null +++ b/src/third_party/wiredtiger/src/os_posix/os_pagesize.c @@ -0,0 +1,19 @@ +/*- + * Copyright (c) 2014-2015 MongoDB, Inc. + * Copyright (c) 2008-2014 WiredTiger, Inc. + * All rights reserved. + * + * See the file LICENSE for redistribution information. + */ + +#include "wt_internal.h" + +/* + * __wt_get_vm_pagesize -- + * Return the default page size of a virtual memory page. + */ +int +__wt_get_vm_pagesize(void) +{ + return (getpagesize()); +} diff --git a/src/third_party/wiredtiger/src/os_win/os_pagesize.c b/src/third_party/wiredtiger/src/os_win/os_pagesize.c new file mode 100644 index 00000000000..55cd6a694ec --- /dev/null +++ b/src/third_party/wiredtiger/src/os_win/os_pagesize.c @@ -0,0 +1,23 @@ +/*- + * Copyright (c) 2014-2015 MongoDB, Inc. + * Copyright (c) 2008-2014 WiredTiger, Inc. + * All rights reserved. + * + * See the file LICENSE for redistribution information. + */ + +#include "wt_internal.h" + +/* + * __wt_get_vm_pagesize -- + * Return the default page size of a virtual memory page. + */ +int +__wt_get_vm_pagesize(void) +{ + SYSTEM_INFO system_info; + + GetSystemInfo(&system_info); + + return (system_info.dwPageSize); +} diff --git a/src/third_party/wiredtiger/src/reconcile/rec_write.c b/src/third_party/wiredtiger/src/reconcile/rec_write.c index 21cc68ed119..2b07117f9d5 100644 --- a/src/third_party/wiredtiger/src/reconcile/rec_write.c +++ b/src/third_party/wiredtiger/src/reconcile/rec_write.c @@ -1276,6 +1276,8 @@ __rec_txn_read(WT_SESSION_IMPL *session, WT_RECONCILE *r, for (upd = upd_list; upd->next != NULL; upd = upd->next) ; upd->next = append; + __wt_cache_page_inmem_incr( + session, page, WT_UPDATE_MEMSIZE(append)); } /* @@ -1756,7 +1758,7 @@ __rec_key_state_update(WT_RECONCILE *r, bool ovfl_key) * Figure out the maximum leaf page size for the reconciliation. */ static inline uint32_t -__rec_leaf_page_max(WT_SESSION_IMPL *session, WT_RECONCILE *r) +__rec_leaf_page_max(WT_SESSION_IMPL *session, WT_RECONCILE *r) { WT_BTREE *btree; WT_PAGE *page; @@ -3263,7 +3265,14 @@ supd_check_complete: memset(WT_BLOCK_HEADER_REF(dsk), 0, btree->block_header); bnd->cksum = __wt_cksum(buf->data, buf->size); - if (mod->rec_result == WT_PM_REC_MULTIBLOCK && + /* + * One last check: don't reuse blocks if compacting, the reason + * for compaction is to move blocks to different locations. We + * do this check after calculating the checksums, hopefully the + * next write can be skipped. + */ + if (session->compact_state == WT_COMPACT_NONE && + mod->rec_result == WT_PM_REC_MULTIBLOCK && mod->mod_multi_entries > bnd_slot) { multi = &mod->mod_multi[bnd_slot]; if (multi->size == bnd->size && @@ -3502,7 +3511,7 @@ __wt_bulk_wrapup(WT_SESSION_IMPL *session, WT_CURSOR_BULK *cbulk) break; case BTREE_COL_VAR: if (cbulk->rle != 0) - WT_RET(__wt_bulk_insert_var(session, cbulk)); + WT_RET(__wt_bulk_insert_var(session, cbulk, false)); break; case BTREE_ROW: break; @@ -3625,55 +3634,69 @@ __rec_col_fix_bulk_insert_split_check(WT_CURSOR_BULK *cbulk) * Fixed-length column-store bulk insert. */ int -__wt_bulk_insert_fix(WT_SESSION_IMPL *session, WT_CURSOR_BULK *cbulk) +__wt_bulk_insert_fix( + WT_SESSION_IMPL *session, WT_CURSOR_BULK *cbulk, bool deleted) { WT_BTREE *btree; WT_CURSOR *cursor; WT_RECONCILE *r; - uint32_t entries, offset, page_entries, page_size; - const uint8_t *data; r = cbulk->reconcile; btree = S2BT(session); cursor = &cbulk->cbt.iface; - if (cbulk->bitmap) { - if (((r->recno - 1) * btree->bitcnt) & 0x7) - WT_RET_MSG(session, EINVAL, - "Bulk bitmap load not aligned on a byte boundary"); - for (data = cursor->value.data, - entries = (uint32_t)cursor->value.size; - entries > 0; - entries -= page_entries, data += page_size) { - WT_RET(__rec_col_fix_bulk_insert_split_check(cbulk)); - - page_entries = - WT_MIN(entries, cbulk->nrecs - cbulk->entry); - page_size = __bitstr_size(page_entries * btree->bitcnt); - offset = __bitstr_size(cbulk->entry * btree->bitcnt); - memcpy(r->first_free + offset, data, page_size); - cbulk->entry += page_entries; - r->recno += page_entries; - } - return (0); - } - WT_RET(__rec_col_fix_bulk_insert_split_check(cbulk)); - - __bit_setv(r->first_free, - cbulk->entry, btree->bitcnt, ((uint8_t *)cursor->value.data)[0]); + __bit_setv(r->first_free, cbulk->entry, + btree->bitcnt, deleted ? 0 : ((uint8_t *)cursor->value.data)[0]); ++cbulk->entry; ++r->recno; return (0); } +/* + * __wt_bulk_insert_fix_bitmap -- + * Fixed-length column-store bulk insert. + */ +int +__wt_bulk_insert_fix_bitmap(WT_SESSION_IMPL *session, WT_CURSOR_BULK *cbulk) +{ + WT_BTREE *btree; + WT_CURSOR *cursor; + WT_RECONCILE *r; + uint32_t entries, offset, page_entries, page_size; + const uint8_t *data; + + r = cbulk->reconcile; + btree = S2BT(session); + cursor = &cbulk->cbt.iface; + + if (((r->recno - 1) * btree->bitcnt) & 0x7) + WT_RET_MSG(session, EINVAL, + "Bulk bitmap load not aligned on a byte boundary"); + for (data = cursor->value.data, + entries = (uint32_t)cursor->value.size; + entries > 0; + entries -= page_entries, data += page_size) { + WT_RET(__rec_col_fix_bulk_insert_split_check(cbulk)); + + page_entries = WT_MIN(entries, cbulk->nrecs - cbulk->entry); + page_size = __bitstr_size(page_entries * btree->bitcnt); + offset = __bitstr_size(cbulk->entry * btree->bitcnt); + memcpy(r->first_free + offset, data, page_size); + cbulk->entry += page_entries; + r->recno += page_entries; + } + return (0); +} + /* * __wt_bulk_insert_var -- * Variable-length column-store bulk insert. */ int -__wt_bulk_insert_var(WT_SESSION_IMPL *session, WT_CURSOR_BULK *cbulk) +__wt_bulk_insert_var( + WT_SESSION_IMPL *session, WT_CURSOR_BULK *cbulk, bool deleted) { WT_BTREE *btree; WT_KV *val; @@ -3682,14 +3705,20 @@ __wt_bulk_insert_var(WT_SESSION_IMPL *session, WT_CURSOR_BULK *cbulk) r = cbulk->reconcile; btree = S2BT(session); - /* - * Store the bulk cursor's last buffer, not the current value, we're - * creating a duplicate count, which means we want the previous value - * seen, not the current value. - */ val = &r->v; - WT_RET(__rec_cell_build_val( - session, r, cbulk->last.data, cbulk->last.size, cbulk->rle)); + if (deleted) { + val->cell_len = __wt_cell_pack_del(&val->cell, cbulk->rle); + val->buf.data = NULL; + val->buf.size = 0; + val->len = val->cell_len; + } else + /* + * Store the bulk cursor's last buffer, not the current value, + * we're tracking duplicates, which means we want the previous + * value seen, not the current value. + */ + WT_RET(__rec_cell_build_val(session, + r, cbulk->last.data, cbulk->last.size, cbulk->rle)); /* Boundary: split or write the page. */ if (val->len > r->space_avail) @@ -4445,7 +4474,7 @@ compare: /* WT_ERR(__rec_txn_read(session, r, ins, NULL, NULL, &upd)); if (upd == NULL) continue; - for (n = WT_INSERT_RECNO(ins); src_recno <= n; ++src_recno) { + for (n = WT_INSERT_RECNO(ins); src_recno <= n;) { /* * The application may have inserted records which left * gaps in the name space, and these gaps can be huge. @@ -4485,7 +4514,7 @@ compare: /* last->size == size && memcmp(last->data, data, size) == 0)) { ++rle; - continue; + goto next; } WT_ERR(__rec_col_var_helper(session, r, salvage, last, last_deleted, 0, rle)); @@ -4504,6 +4533,15 @@ compare: /* } last_deleted = deleted; rle = 1; + + /* + * Move to the next record. It's not a simple increment + * because if it's the maximum record, incrementing it + * wraps to 0 and this turns into an infinite loop. + */ +next: if (src_recno == UINT64_MAX) + break; + ++src_recno; } } diff --git a/src/third_party/wiredtiger/src/session/session_api.c b/src/third_party/wiredtiger/src/session/session_api.c index 053f69ee7f8..f0d0f26db54 100644 --- a/src/third_party/wiredtiger/src/session/session_api.c +++ b/src/third_party/wiredtiger/src/session/session_api.c @@ -148,7 +148,7 @@ __session_close(WT_SESSION *wt_session, const char *config) * via the registered close callback. */ if (session->event_handler->handle_close != NULL && - !WT_STREQ(cursor->uri, WT_LAS_URI)) + !WT_STREQ(cursor->internal_uri, WT_LAS_URI)) WT_TRET(session->event_handler->handle_close( session->event_handler, wt_session, cursor)); WT_TRET(cursor->close(cursor)); diff --git a/src/third_party/wiredtiger/src/session/session_compact.c b/src/third_party/wiredtiger/src/session/session_compact.c index 456fcd3ce03..8a5b741c0c5 100644 --- a/src/third_party/wiredtiger/src/session/session_compact.c +++ b/src/third_party/wiredtiger/src/session/session_compact.c @@ -172,12 +172,12 @@ __compact_file(WT_SESSION_IMPL *session, const char *uri, const char *cfg[]) for (i = 0; i < 100; ++i) { WT_ERR(__wt_txn_checkpoint(session, checkpoint_cfg)); - session->compaction = false; + session->compact_state = WT_COMPACT_RUNNING; WT_WITH_SCHEMA_LOCK(session, ret = __wt_schema_worker( session, uri, __wt_compact, NULL, cfg, 0)); WT_ERR(ret); - if (!session->compaction) + if (session->compact_state != WT_COMPACT_SUCCESS) break; WT_ERR(__wt_txn_checkpoint(session, checkpoint_cfg)); @@ -185,7 +185,9 @@ __compact_file(WT_SESSION_IMPL *session, const char *uri, const char *cfg[]) WT_ERR(__session_compact_check_timeout(session, start_time)); } -err: __wt_scr_free(session, &t); +err: session->compact_state = WT_COMPACT_NONE; + + __wt_scr_free(session, &t); return (ret); } diff --git a/src/third_party/wiredtiger/src/support/global.c b/src/third_party/wiredtiger/src/support/global.c index 1e32f5b4453..2330a65a707 100644 --- a/src/third_party/wiredtiger/src/support/global.c +++ b/src/third_party/wiredtiger/src/support/global.c @@ -11,28 +11,6 @@ WT_PROCESS __wt_process; /* Per-process structure */ static int __wt_pthread_once_failed; /* If initialization failed */ -/* - * __system_is_little_endian -- - * Check if the system is little endian. - */ -static int -__system_is_little_endian(void) -{ - uint64_t v; - bool little; - - v = 1; - little = *((uint8_t *)&v) != 0; - - if (little) - return (0); - - fprintf(stderr, - "This release of the WiredTiger data engine does not support " - "big-endian systems; contact WiredTiger for more information.\n"); - return (EINVAL); -} - /* * __wt_global_once -- * Global initialization, run once. @@ -42,11 +20,6 @@ __wt_global_once(void) { WT_DECL_RET; - if ((ret = __system_is_little_endian()) != 0) { - __wt_pthread_once_failed = ret; - return; - } - if ((ret = __wt_spin_init(NULL, &__wt_process.spinlock, "global")) != 0) { __wt_pthread_once_failed = ret; @@ -115,7 +88,7 @@ __wt_attach(WT_SESSION_IMPL *session) /* Sleep forever, the debugger will interrupt us when it attaches. */ for (;;) - __wt_sleep(100, 0); + __wt_sleep(10, 0); #else WT_UNUSED(session); #endif diff --git a/src/third_party/wiredtiger/src/support/hash_city.c b/src/third_party/wiredtiger/src/support/hash_city.c index 9a4a6464f40..33f4113c004 100644 --- a/src/third_party/wiredtiger/src/support/hash_city.c +++ b/src/third_party/wiredtiger/src/support/hash_city.c @@ -99,6 +99,12 @@ static uint32_t UNALIGNED_LOAD32(const char *p) { #define bswap_32(x) OSSwapInt32(x) #define bswap_64(x) OSSwapInt64(x) +#elif defined(__sun) + +#include +#define bswap_32 BSWAP_32 +#define bswap_64 BSWAP_64 + #else #include #endif diff --git a/src/third_party/wiredtiger/src/support/hex.c b/src/third_party/wiredtiger/src/support/hex.c index eb9f420911a..5fb8d4bc190 100644 --- a/src/third_party/wiredtiger/src/support/hex.c +++ b/src/third_party/wiredtiger/src/support/hex.c @@ -8,7 +8,7 @@ #include "wt_internal.h" -static const u_char hex[] = "0123456789abcdef"; +const u_char __wt_hex[] = "0123456789abcdef"; /* * __fill_hex -- @@ -25,14 +25,25 @@ __fill_hex(const uint8_t *src, size_t src_max, --dest_max; for (; src_max > 0 && dest_max > 1; src_max -= 1, dest_max -= 2, ++src) { - *dest++ = hex[(*src & 0xf0) >> 4]; - *dest++ = hex[*src & 0x0f]; + *dest++ = __wt_hex[(*src & 0xf0) >> 4]; + *dest++ = __wt_hex[*src & 0x0f]; } *dest++ = '\0'; if (lenp != NULL) *lenp = WT_PTRDIFF(dest, dest_orig); } +/* + * __wt_fill_hex -- + * In-memory conversion of raw bytes to a hexadecimal representation. + */ +void +__wt_fill_hex(const uint8_t *src, size_t src_max, + uint8_t *dest, size_t dest_max, size_t *lenp) +{ + __fill_hex(src, src_max, dest, dest_max, lenp); +} + /* * __wt_raw_to_hex -- * Convert a chunk of data to a nul-terminated printable hex string. @@ -83,8 +94,8 @@ __wt_raw_to_esc_hex( *t++ = *p; } else { *t++ = '\\'; - *t++ = hex[(*p & 0xf0) >> 4]; - *t++ = hex[*p & 0x0f]; + *t++ = __wt_hex[(*p & 0xf0) >> 4]; + *t++ = __wt_hex[*p & 0x0f]; } *t++ = '\0'; to->size = WT_PTRDIFF(t, to->mem); diff --git a/src/third_party/wiredtiger/src/support/huffman.c b/src/third_party/wiredtiger/src/support/huffman.c index 4bda365cb10..9488dbf14fe 100644 --- a/src/third_party/wiredtiger/src/support/huffman.c +++ b/src/third_party/wiredtiger/src/support/huffman.c @@ -1,9 +1,31 @@ -/*- +/* * Copyright (c) 2014-2015 MongoDB, Inc. * Copyright (c) 2008-2014 WiredTiger, Inc. * All rights reserved. * - * See the file LICENSE for redistribution information. + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 4. Neither the name MongoDB or the name WiredTiger + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY MONGODB INC. ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. */ #include "wt_internal.h" diff --git a/src/third_party/wiredtiger/src/support/rand.c b/src/third_party/wiredtiger/src/support/rand.c index f5ecb12633e..3adcb801f03 100644 --- a/src/third_party/wiredtiger/src/support/rand.c +++ b/src/third_party/wiredtiger/src/support/rand.c @@ -59,6 +59,29 @@ __wt_random_init(WT_RAND_STATE volatile * rnd_state) *rnd_state = rnd; } +/* + * __wt_random_init_seed -- + * Initialize the state of a 32-bit pseudo-random number. + * Use this, instead of __wt_random_init if we are running with multiple + * threads and we want each thread to initialize its own random state based + * on a different random seed. + */ +int +__wt_random_init_seed( + WT_SESSION_IMPL *session, WT_RAND_STATE volatile * rnd_state) +{ + struct timespec ts; + WT_RAND_STATE rnd; + + WT_RET(__wt_epoch(session, &ts)); + M_W(rnd) = (uint32_t)(ts.tv_nsec + 521288629); + M_Z(rnd) = (uint32_t)(ts.tv_nsec + 362436069); + + *rnd_state = rnd; + + return (0); +} + /* * __wt_random -- * Return a 32-bit pseudo-random number. diff --git a/src/third_party/wiredtiger/src/support/stat.c b/src/third_party/wiredtiger/src/support/stat.c index 4d7cd65fd18..7a615131628 100644 --- a/src/third_party/wiredtiger/src/support/stat.c +++ b/src/third_party/wiredtiger/src/support/stat.c @@ -250,19 +250,24 @@ __wt_stat_dsrc_aggregate_single( to->block_alloc += from->block_alloc; to->block_free += from->block_free; to->block_checkpoint_size += from->block_checkpoint_size; - to->allocation_size = from->allocation_size; + if (from->allocation_size > to->allocation_size) + to->allocation_size = from->allocation_size; to->block_reuse_bytes += from->block_reuse_bytes; - to->block_magic = from->block_magic; - to->block_major = from->block_major; + if (from->block_magic > to->block_magic) + to->block_magic = from->block_magic; + if (from->block_major > to->block_major) + to->block_major = from->block_major; to->block_size += from->block_size; - to->block_minor = from->block_minor; + if (from->block_minor > to->block_minor) + to->block_minor = from->block_minor; to->btree_checkpoint_generation += from->btree_checkpoint_generation; to->btree_column_fix += from->btree_column_fix; to->btree_column_internal += from->btree_column_internal; to->btree_column_deleted += from->btree_column_deleted; to->btree_column_variable += from->btree_column_variable; to->btree_column_rle += from->btree_column_rle; - to->btree_fixed_len = from->btree_fixed_len; + if (from->btree_fixed_len > to->btree_fixed_len) + to->btree_fixed_len = from->btree_fixed_len; if (from->btree_maxintlkey > to->btree_maxintlkey) to->btree_maxintlkey = from->btree_maxintlkey; if (from->btree_maxintlpage > to->btree_maxintlpage) @@ -367,12 +372,16 @@ __wt_stat_dsrc_aggregate( to->block_free += WT_STAT_READ(from, block_free); to->block_checkpoint_size += WT_STAT_READ(from, block_checkpoint_size); - to->allocation_size = from[0]->allocation_size; + if ((v = WT_STAT_READ(from, allocation_size)) > to->allocation_size) + to->allocation_size = v; to->block_reuse_bytes += WT_STAT_READ(from, block_reuse_bytes); - to->block_magic = from[0]->block_magic; - to->block_major = from[0]->block_major; + if ((v = WT_STAT_READ(from, block_magic)) > to->block_magic) + to->block_magic = v; + if ((v = WT_STAT_READ(from, block_major)) > to->block_major) + to->block_major = v; to->block_size += WT_STAT_READ(from, block_size); - to->block_minor = from[0]->block_minor; + if ((v = WT_STAT_READ(from, block_minor)) > to->block_minor) + to->block_minor = v; to->btree_checkpoint_generation += WT_STAT_READ(from, btree_checkpoint_generation); to->btree_column_fix += WT_STAT_READ(from, btree_column_fix); @@ -382,15 +391,14 @@ __wt_stat_dsrc_aggregate( to->btree_column_variable += WT_STAT_READ(from, btree_column_variable); to->btree_column_rle += WT_STAT_READ(from, btree_column_rle); - to->btree_fixed_len = from[0]->btree_fixed_len; - if ((v = WT_STAT_READ(from, btree_maxintlkey)) > - to->btree_maxintlkey) + if ((v = WT_STAT_READ(from, btree_fixed_len)) > to->btree_fixed_len) + to->btree_fixed_len = v; + if ((v = WT_STAT_READ(from, btree_maxintlkey)) > to->btree_maxintlkey) to->btree_maxintlkey = v; if ((v = WT_STAT_READ(from, btree_maxintlpage)) > to->btree_maxintlpage) to->btree_maxintlpage = v; - if ((v = WT_STAT_READ(from, btree_maxleafkey)) > - to->btree_maxleafkey) + if ((v = WT_STAT_READ(from, btree_maxleafkey)) > to->btree_maxleafkey) to->btree_maxleafkey = v; if ((v = WT_STAT_READ(from, btree_maxleafpage)) > to->btree_maxleafpage) diff --git a/src/third_party/wiredtiger/src/txn/txn.c b/src/third_party/wiredtiger/src/txn/txn.c index f835fea8f67..0a3e4a7a7db 100644 --- a/src/third_party/wiredtiger/src/txn/txn.c +++ b/src/third_party/wiredtiger/src/txn/txn.c @@ -216,6 +216,7 @@ __wt_txn_update_oldest(WT_SESSION_IMPL *session, bool force) conn = S2C(session); txn_global = &conn->txn_global; +retry: current_id = last_running = txn_global->current; oldest_session = NULL; prev_oldest_id = txn_global->oldest_id; @@ -287,43 +288,60 @@ __wt_txn_update_oldest(WT_SESSION_IMPL *session, bool force) WT_TXNID_LT(txn_global->last_running, last_running); /* Update the oldest ID. */ - if ((WT_TXNID_LT(prev_oldest_id, oldest_id) || last_running_moved) && - __wt_atomic_casiv32(&txn_global->scan_count, 1, -1)) { - WT_ORDERED_READ(session_cnt, conn->session_cnt); - for (i = 0, s = txn_global->states; i < session_cnt; i++, s++) { - if ((id = s->id) != WT_TXN_NONE && - WT_TXNID_LT(id, last_running)) - last_running = id; - if ((id = s->snap_min) != WT_TXN_NONE && - WT_TXNID_LT(id, oldest_id)) - oldest_id = id; - } - - if (WT_TXNID_LT(last_running, oldest_id)) - oldest_id = last_running; - -#ifdef HAVE_DIAGNOSTIC + if (WT_TXNID_LT(prev_oldest_id, oldest_id) || last_running_moved) { /* - * Make sure the ID doesn't move past any named snapshots. - * - * Don't include the read/assignment in the assert statement. - * Coverity complains if there are assignments only done in - * diagnostic builds, and when the read is from a volatile. + * We know we want to update. Check if we're racing. */ - id = txn_global->nsnap_oldest_id; - WT_ASSERT(session, - id == WT_TXN_NONE || !WT_TXNID_LT(id, oldest_id)); + if (__wt_atomic_casiv32(&txn_global->scan_count, 1, -1)) { + WT_ORDERED_READ(session_cnt, conn->session_cnt); + for (i = 0, s = txn_global->states; + i < session_cnt; i++, s++) { + if ((id = s->id) != WT_TXN_NONE && + WT_TXNID_LT(id, last_running)) + last_running = id; + if ((id = s->snap_min) != WT_TXN_NONE && + WT_TXNID_LT(id, oldest_id)) + oldest_id = id; + } + + if (WT_TXNID_LT(last_running, oldest_id)) + oldest_id = last_running; + +#ifdef HAVE_DIAGNOSTIC + /* + * Make sure the ID doesn't move past any named + * snapshots. + * + * Don't include the read/assignment in the assert + * statement. Coverity complains if there are + * assignments only done in diagnostic builds, and + * when the read is from a volatile. + */ + id = txn_global->nsnap_oldest_id; + WT_ASSERT(session, + id == WT_TXN_NONE || !WT_TXNID_LT(id, oldest_id)); #endif - if (WT_TXNID_LT(txn_global->last_running, last_running)) - txn_global->last_running = last_running; - if (WT_TXNID_LT(txn_global->oldest_id, oldest_id)) - txn_global->oldest_id = oldest_id; - WT_ASSERT(session, txn_global->scan_count == -1); - txn_global->scan_count = 0; + if (WT_TXNID_LT(txn_global->last_running, last_running)) + txn_global->last_running = last_running; + if (WT_TXNID_LT(txn_global->oldest_id, oldest_id)) + txn_global->oldest_id = oldest_id; + WT_ASSERT(session, txn_global->scan_count == -1); + txn_global->scan_count = 0; + } else { + /* + * We wanted to update the oldest ID but we're racing + * another thread. Retry if this is a forced update. + */ + WT_ASSERT(session, txn_global->scan_count > 0); + (void)__wt_atomic_subiv32(&txn_global->scan_count, 1); + if (force) { + __wt_yield(); + goto retry; + } + } } else { if (WT_VERBOSE_ISSET(session, WT_VERB_TRANSACTION) && - current_id - oldest_id > 10000 && last_running_moved && - oldest_session != NULL) { + current_id - oldest_id > 10000 && oldest_session != NULL) { (void)__wt_verbose(session, WT_VERB_TRANSACTION, "old snapshot %" PRIu64 " pinned in session %d [%s]" diff --git a/src/third_party/wiredtiger/src/txn/txn_log.c b/src/third_party/wiredtiger/src/txn/txn_log.c index c5fa52dea6a..148ed868792 100644 --- a/src/third_party/wiredtiger/src/txn/txn_log.c +++ b/src/third_party/wiredtiger/src/txn/txn_log.c @@ -8,6 +8,12 @@ #include "wt_internal.h" +/* Cookie passed to __txn_printlog. */ +typedef struct { + FILE *out; + uint32_t flags; +} WT_TXN_PRINTLOG_ARGS; + /* * __txn_op_log -- * Log an operation for the current transaction. @@ -64,7 +70,8 @@ err: __wt_buf_free(session, &key); */ static int __txn_commit_printlog( - WT_SESSION_IMPL *session, const uint8_t **pp, const uint8_t *end, FILE *out) + WT_SESSION_IMPL *session, const uint8_t **pp, const uint8_t *end, FILE *out, + uint32_t flags) { bool firstrecord; @@ -79,7 +86,7 @@ __txn_commit_printlog( firstrecord = false; - WT_RET(__wt_txn_op_printlog(session, pp, end, out)); + WT_RET(__wt_txn_op_printlog(session, pp, end, out, flags)); WT_RET(__wt_fprintf(out, "\n }")); } @@ -459,6 +466,7 @@ __txn_printlog(WT_SESSION_IMPL *session, FILE *out; WT_LOG_RECORD *logrec; WT_LSN ckpt_lsn; + WT_TXN_PRINTLOG_ARGS *args; const uint8_t *end, *p; const char *msg; uint64_t txnid; @@ -467,7 +475,8 @@ __txn_printlog(WT_SESSION_IMPL *session, bool compressed; WT_UNUSED(next_lsnp); - out = cookie; + args = cookie; + out = args->out; p = WT_LOG_SKIP_HEADER(rawrec->data); end = (const uint8_t *)rawrec->data + rawrec->size; @@ -506,7 +515,8 @@ __txn_printlog(WT_SESSION_IMPL *session, WT_RET(__wt_fprintf(out, " \"type\" : \"commit\",\n")); WT_RET(__wt_fprintf(out, " \"txnid\" : %" PRIu64 ",\n", txnid)); - WT_RET(__txn_commit_printlog(session, &p, end, out)); + WT_RET(__txn_commit_printlog(session, &p, end, out, + args->flags)); break; case WT_LOGREC_FILE_SYNC: @@ -537,15 +547,18 @@ __txn_printlog(WT_SESSION_IMPL *session, * Print the log in a human-readable format. */ int -__wt_txn_printlog(WT_SESSION *wt_session, FILE *out) +__wt_txn_printlog(WT_SESSION *wt_session, FILE *out, uint32_t flags) { WT_SESSION_IMPL *session; + WT_TXN_PRINTLOG_ARGS args; session = (WT_SESSION_IMPL *)wt_session; + args.out = out; + args.flags = flags; WT_RET(__wt_fprintf(out, "[\n")); WT_RET(__wt_log_scan( - session, NULL, WT_LOGSCAN_FIRST, __txn_printlog, out)); + session, NULL, WT_LOGSCAN_FIRST, __txn_printlog, &args)); WT_RET(__wt_fprintf(out, "\n]\n")); return (0); diff --git a/src/third_party/wiredtiger/src/utilities/util_list.c b/src/third_party/wiredtiger/src/utilities/util_list.c index 99a1455a74e..135a8bab225 100644 --- a/src/third_party/wiredtiger/src/utilities/util_list.c +++ b/src/third_party/wiredtiger/src/utilities/util_list.c @@ -8,6 +8,7 @@ #include "util.h" +static int list_get_allocsize(WT_SESSION *, const char *, size_t *); static int list_print(WT_SESSION *, const char *, bool, bool); static int list_print_checkpoint(WT_SESSION *, const char *); static int usage(void); @@ -55,6 +56,48 @@ util_list(WT_SESSION *session, int argc, char *argv[]) return (ret); } +/* + * list_get_allocsize -- + * Get the allocation size for this file from the metadata. + */ +static int +list_get_allocsize(WT_SESSION *session, const char *key, size_t *allocsize) +{ + WT_CONFIG_ITEM szvalue; + WT_CONFIG_PARSER *parser; + WT_DECL_RET; + WT_EXTENSION_API *wt_api; + char *config; + + wt_api = session->connection->get_extension_api(session->connection); + if ((ret = + wt_api->metadata_search(wt_api, session, key, &config)) != 0) { + fprintf(stderr, "%s: %s: extension_api.metadata_search: %s\n", + progname, key, session->strerror(session, ret)); + return (ret); + } + if ((ret = wt_api->config_parser_open(wt_api, session, config, + strlen(config), &parser)) != 0) { + fprintf(stderr, "%s: extension_api.config_parser_open: %s\n", + progname, session->strerror(session, ret)); + return (ret); + } + if ((ret = parser->get(parser, "allocation_size", &szvalue)) != 0) { + if (ret != WT_NOTFOUND) + fprintf(stderr, "%s: config_parser.get: %s\n", + progname, session->strerror(session, ret)); + (void)parser->close(parser); + return (ret); + } + if ((ret = parser->close(parser)) != 0) { + fprintf(stderr, "%s: config_parser.close: %s\n", + progname, session->strerror(session, ret)); + return (ret); + } + *allocsize = (size_t)szvalue.val; + return (0); +} + /* * list_print -- * List the high-level objects in the database. @@ -137,9 +180,10 @@ list_print(WT_SESSION *session, const char *name, bool cflag, bool vflag) static int list_print_checkpoint(WT_SESSION *session, const char *key) { + WT_BLOCK_CKPT ci; WT_DECL_RET; WT_CKPT *ckpt, *ckptbase; - size_t len; + size_t allocsize, len; time_t t; uint64_t v; @@ -151,6 +195,14 @@ list_print_checkpoint(WT_SESSION *session, const char *key) if ((ret = __wt_metadata_get_ckptlist(session, key, &ckptbase)) != 0) return (ret == WT_NOTFOUND ? 0 : ret); + /* We need the allocation size for decoding the checkpoint addr */ + if ((ret = list_get_allocsize(session, key, &allocsize)) != 0) { + if (ret == WT_NOTFOUND) + allocsize = 0; + else + return (ret); + } + /* Find the longest name, so we can pretty-print. */ len = 0; WT_CKPT_FOREACH(ckptbase, ckpt) @@ -158,7 +210,15 @@ list_print_checkpoint(WT_SESSION *session, const char *key) len = strlen(ckpt->name); ++len; + memset(&ci, 0, sizeof(ci)); WT_CKPT_FOREACH(ckptbase, ckpt) { + if (allocsize != 0 && (ret = __wt_block_ckpt_decode( + session, allocsize, ckpt->raw.data, &ci)) != 0) { + fprintf(stderr, "%s: __wt_block_buffer_to_ckpt: %s\n", + progname, session->strerror(session, ret)); + /* continue if damaged */ + ci.root_size = 0; + } /* * Call ctime, not ctime_r; ctime_r has portability problems, * the Solaris version is different from the POSIX standard. @@ -179,6 +239,17 @@ list_print_checkpoint(WT_SESSION *session, const char *key) printf(" (%" PRIu64 " KB)\n", v / WT_KILOBYTE); else printf(" (%" PRIu64 " B)\n", v); + if (ci.root_size != 0) { + printf("\t\t" "root offset: %" PRIuMAX + " (0x%" PRIxMAX ")\n", + (intmax_t)ci.root_offset, (intmax_t)ci.root_offset); + printf("\t\t" "root size: %" PRIu32 + " (0x%" PRIx32 ")\n", + ci.root_size, ci.root_size); + printf("\t\t" "root checksum: %" PRIu32 + " (0x%" PRIx32 ")\n", + ci.root_cksum, ci.root_cksum); + } } __wt_metadata_free_ckptlist(session, ckptbase); diff --git a/src/third_party/wiredtiger/src/utilities/util_main.c b/src/third_party/wiredtiger/src/utilities/util_main.c index 9cbda08690e..3b7187bd0de 100644 --- a/src/third_party/wiredtiger/src/utilities/util_main.c +++ b/src/third_party/wiredtiger/src/utilities/util_main.c @@ -226,7 +226,6 @@ main(int argc, char *argv[]) ret = func(session, argc, argv); /* Close the database. */ - err: if (conn != NULL && (tret = conn->close(conn, NULL)) != 0 && ret == 0) ret = tret; diff --git a/src/third_party/wiredtiger/src/utilities/util_printlog.c b/src/third_party/wiredtiger/src/utilities/util_printlog.c index d202b09b228..3a665c1c657 100644 --- a/src/third_party/wiredtiger/src/utilities/util_printlog.c +++ b/src/third_party/wiredtiger/src/utilities/util_printlog.c @@ -15,10 +15,10 @@ util_printlog(WT_SESSION *session, int argc, char *argv[]) { WT_DECL_RET; int ch; - bool printable; + uint32_t flags; - printable = false; - while ((ch = __wt_getopt(progname, argc, argv, "f:p")) != EOF) + flags = 0; + while ((ch = __wt_getopt(progname, argc, argv, "f:x")) != EOF) switch (ch) { case 'f': /* output file */ if (freopen(__wt_optarg, "w", stdout) == NULL) { @@ -27,8 +27,8 @@ util_printlog(WT_SESSION *session, int argc, char *argv[]) return (1); } break; - case 'p': - printable = true; + case 'x': /* hex output */ + LF_SET(WT_TXN_PRINTLOG_HEX); break; case '?': default: @@ -41,8 +41,7 @@ util_printlog(WT_SESSION *session, int argc, char *argv[]) if (argc != 0) return (usage()); - WT_UNUSED(printable); - ret = __wt_txn_printlog(session, stdout); + ret = __wt_txn_printlog(session, stdout, flags); if (ret != 0) { fprintf(stderr, "%s: printlog failed: %s\n", @@ -61,7 +60,7 @@ usage(void) { (void)fprintf(stderr, "usage: %s %s " - "printlog [-p] [-f output-file]\n", + "printlog [-x] [-f output-file]\n", progname, usage_prefix); return (1); } diff --git a/src/third_party/wiredtiger/tools/wt_ckpt_decode.py b/src/third_party/wiredtiger/tools/wt_ckpt_decode.py new file mode 100644 index 00000000000..b27a0ae5297 --- /dev/null +++ b/src/third_party/wiredtiger/tools/wt_ckpt_decode.py @@ -0,0 +1,103 @@ +#!/usr/bin/env python +# +# Public Domain 2014-2015 MongoDB, Inc. +# Public Domain 2008-2014 WiredTiger, Inc. +# +# This is free and unencumbered software released into the public domain. +# +# Anyone is free to copy, modify, publish, use, compile, sell, or +# distribute this software, either in source code form or as a compiled +# binary, for any purpose, commercial or non-commercial, and by any +# means. +# +# In jurisdictions that recognize copyright laws, the author or authors +# of this software dedicate any and all copyright interest in the +# software to the public domain. We make this dedication for the benefit +# of the public at large and to the detriment of our heirs and +# successors. We intend this dedication to be an overt act of +# relinquishment in perpetuity of all present and future rights to this +# software under copyright law. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +# IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR +# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, +# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +# OTHER DEALINGS IN THE SOFTWARE. + +# Decode a checkpoint 'addr' + +import os, sys, getopt + +def usage(): + print 'Usage:\n\ + $ python .../tools/wt_ckpt_decode.py [ -a allocsize ] addr...\n\ +\n\ +addr is a hex string\n\ +' + +def err_usage(msg): + print 'wt_ckpt_decode.py: ERROR: ' + msg + usage() + sys.exit(False) + +# Set paths +wt_disttop = sys.path[0] +while not os.path.isdir(wt_disttop + '/build_posix'): + if wt_disttop == '/': + err_usage('current dir not in wiredtiger development directory') + wt_disttop = os.path.dirname(wt_disttop) +sys.path.insert(1, os.path.join(wt_disttop, 'lang', 'python', 'wiredtiger')) + +from packing import pack, unpack + +def show_one(label, value): + l = 16 - len(label) + l = l if l > 1 else 1 + print ' {0}{1}{2:10d} (0x{2:x})'.format(label, (' ' * l), value, value) + +def show_triple(triple, name, allocsize): + off = triple[0] + size = triple[1] + csum = triple[2] + if size == 0: + off = -1 + csum = 0 + show_one(name + ' offset', (off + 1) * allocsize) + show_one(name + ' size', (size) * allocsize) + show_one(name + ' cksum', csum) + print '' + +def decode_arg(arg, allocsize): + addr = arg.decode("hex") + version = ord(addr[0]) + print arg + ': ' + if version != 1: + print '**** ERROR: unknown version ' + str(version) + addr = addr[1:] + result = unpack('iiiiiiiiiiiiii',addr) + if len(result) != 14: + print '**** ERROR: result len unexpected: ' + str(len(result)) + show_triple(result[0:3], 'root', allocsize) + show_triple(result[3:6], 'alloc', allocsize) + show_triple(result[6:9], 'avail', allocsize) + show_triple(result[9:12], 'discard', allocsize) + file_size = result[12] + ckpt_size = result[13] + show_one('file size', file_size) + show_one('checkpoint size', ckpt_size) + +#decode_arg('018281e420f2fa4a8381e40c5855ca808080808080e22fc0e20fc0', 4096) + +allocsize = 4096 +try: + opts, args = getopt.getopt(sys.argv[1:], "a:", ["allocsize"]) +except getopt.GetoptError as err: + err_usage(str(err)) +for o, a in opts: + if o == '-a': + allocsize = int(a) + +for arg in args: + decode_arg(arg, allocsize) diff --git a/src/third_party/wiredtiger/tools/wtstats/wtstats.html.template b/src/third_party/wiredtiger/tools/wtstats/wtstats.html.template index 6d4376c1d82..ef0f60bb4f2 100644 --- a/src/third_party/wiredtiger/tools/wtstats/wtstats.html.template +++ b/src/third_party/wiredtiger/tools/wtstats/wtstats.html.template @@ -9834,7 +9834,7 @@ button.close { #wrapper { padding-left: 250px; - transition: all .4s ease 0; + transition: all .4s ease 0s; } #sidebar-wrapper { @@ -9846,7 +9846,7 @@ button.close { height: 100%; overflow-y: auto; z-index: 1000; - transition: all .4s ease 0; + transition: all .4s ease 0s; border-right: 1px solid #ddd; } @@ -9875,7 +9875,7 @@ button.close { #wrapper.active #sidebar-wrapper { left: 250px; width: 250px; - transition: all .4s ease 0; + transition: all .4s ease 0s; } } @@ -10011,17 +10011,19 @@ svg.multiline .line { }
\ No newline at end of file -- cgit v1.2.1